contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

   1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This transformation analyzes and transforms the induction variables (and
  10 // computations derived from them) into simpler forms suitable for subsequent
  11 // analysis and transformation.
  12 //
  13 // If the trip count of a loop is computable, this pass also makes the following
  14 // changes:
  15 //   1. The exit condition for the loop is canonicalized to compare the
  16 //      induction value against the exit value.  This turns loops like:
  17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
  18 //   2. Any use outside of the loop of an expression derived from the indvar
  19 //      is changed to compute the derived value outside of the loop, eliminating
  20 //      the dependence on the exit value of the induction variable.  If the only
  21 //      purpose of the loop is to compute the exit value of some derived
  22 //      expression, this transformation will make the loop dead.
  23 //
  24 //===----------------------------------------------------------------------===//
  25
  26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
  27 #include "llvm/ADT/APFloat.h"
  28 #include "llvm/ADT/APInt.h"
  29 #include "llvm/ADT/ArrayRef.h"
  30 #include "llvm/ADT/DenseMap.h"
  31 #include "llvm/ADT/None.h"
  32 #include "llvm/ADT/Optional.h"
  33 #include "llvm/ADT/STLExtras.h"
  34 #include "llvm/ADT/SmallSet.h"
  35 #include "llvm/ADT/SmallPtrSet.h"
  36 #include "llvm/ADT/SmallVector.h"
  37 #include "llvm/ADT/Statistic.h"
  38 #include "llvm/ADT/iterator_range.h"
  39 #include "llvm/Analysis/LoopInfo.h"
  40 #include "llvm/Analysis/LoopPass.h"
  41 #include "llvm/Analysis/ScalarEvolution.h"
  42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
  43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  44 #include "llvm/Analysis/TargetLibraryInfo.h"
  45 #include "llvm/Analysis/TargetTransformInfo.h"
  46 #include "llvm/Analysis/ValueTracking.h"
  47 #include "llvm/Transforms/Utils/Local.h"
  48 #include "llvm/IR/BasicBlock.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/ConstantRange.h"
  51 #include "llvm/IR/Constants.h"
  52 #include "llvm/IR/DataLayout.h"
  53 #include "llvm/IR/DerivedTypes.h"
  54 #include "llvm/IR/Dominators.h"
  55 #include "llvm/IR/Function.h"
  56 #include "llvm/IR/IRBuilder.h"
  57 #include "llvm/IR/InstrTypes.h"
  58 #include "llvm/IR/Instruction.h"
  59 #include "llvm/IR/Instructions.h"
  60 #include "llvm/IR/IntrinsicInst.h"
  61 #include "llvm/IR/Intrinsics.h"
  62 #include "llvm/IR/Module.h"
  63 #include "llvm/IR/Operator.h"
  64 #include "llvm/IR/PassManager.h"
  65 #include "llvm/IR/PatternMatch.h"
  66 #include "llvm/IR/Type.h"
  67 #include "llvm/IR/Use.h"
  68 #include "llvm/IR/User.h"
  69 #include "llvm/IR/Value.h"
  70 #include "llvm/IR/ValueHandle.h"
  71 #include "llvm/Pass.h"
  72 #include "llvm/Support/Casting.h"
  73 #include "llvm/Support/CommandLine.h"
  74 #include "llvm/Support/Compiler.h"
  75 #include "llvm/Support/Debug.h"
  76 #include "llvm/Support/ErrorHandling.h"
  77 #include "llvm/Support/MathExtras.h"
  78 #include "llvm/Support/raw_ostream.h"
  79 #include "llvm/Transforms/Scalar.h"
  80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
  81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  82 #include "llvm/Transforms/Utils/LoopUtils.h"
  83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  84 #include <cassert>
  85 #include <cstdint>
  86 #include <utility>
  87
  88 using namespace llvm;
  89
  90 #define DEBUG_TYPE "indvars"
  91
  92 STATISTIC(NumWidened     , "Number of indvars widened");
  93 STATISTIC(NumReplaced    , "Number of exit values replaced");
  94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
  95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
  96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
  97
  98 // Trip count verification can be enabled by default under NDEBUG if we
  99 // implement a strong expression equivalence checker in SCEV. Until then, we
 100 // use the verify-indvars flag, which may assert in some cases.
 101 static cl::opt<bool> VerifyIndvars(
 102   "verify-indvars", cl::Hidden,
 103   cl::desc("Verify the ScalarEvolution result after running indvars"));
 104
 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
 106
 107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
 108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
 109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
 110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
 111                clEnumValN(OnlyCheapRepl, "cheap",
 112                           "only replace exit value when the cost is cheap"),
 113                clEnumValN(NoHardUse, "noharduse",
 114                           "only replace exit values when loop def likely dead"),
 115                clEnumValN(AlwaysRepl, "always",
 116                           "always replace exit value whenever possible")));
 117
 118 static cl::opt<bool> UsePostIncrementRanges(
 119   "indvars-post-increment-ranges", cl::Hidden,
 120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
 121   cl::init(true));
 122
 123 static cl::opt<bool>
 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
 125             cl::desc("Disable Linear Function Test Replace optimization"));
 126
 127 namespace {
 128
 129 struct RewritePhi;
 130
 131 class IndVarSimplify {
 132   LoopInfo *LI;
 133   ScalarEvolution *SE;
 134   DominatorTree *DT;
 135   const DataLayout &DL;
 136   TargetLibraryInfo *TLI;
 137   const TargetTransformInfo *TTI;
 138
 139   SmallVector<WeakTrackingVH, 16> DeadInsts;
 140
 141   bool isValidRewrite(Value *FromVal, Value *ToVal);
 142
 143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
 144   bool rewriteNonIntegerIVs(Loop *L);
 145
 146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
 147   bool optimizeLoopExits(Loop *L);
 148
 149   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
 150   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
 151   bool rewriteFirstIterationLoopExitValues(Loop *L);
 152   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 153
 154   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
 155                                  const SCEV *ExitCount,
 156                                  PHINode *IndVar, SCEVExpander &Rewriter);
 157
 158   bool sinkUnusedInvariants(Loop *L);
 159
 160 public:
 161   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
 162                  const DataLayout &DL, TargetLibraryInfo *TLI,
 163                  TargetTransformInfo *TTI)
 164       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
 165
 166   bool run(Loop *L);
 167 };
 168
 169 } // end anonymous namespace
 170
 171 /// Return true if the SCEV expansion generated by the rewriter can replace the
 172 /// original value. SCEV guarantees that it produces the same value, but the way
 173 /// it is produced may be illegal IR.  Ideally, this function will only be
 174 /// called for verification.
 175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
 176   // If an SCEV expression subsumed multiple pointers, its expansion could
 177   // reassociate the GEP changing the base pointer. This is illegal because the
 178   // final address produced by a GEP chain must be inbounds relative to its
 179   // underlying object. Otherwise basic alias analysis, among other things,
 180   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
 181   // producing an expression involving multiple pointers. Until then, we must
 182   // bail out here.
 183   //
 184   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
 185   // because it understands lcssa phis while SCEV does not.
 186   Value *FromPtr = FromVal;
 187   Value *ToPtr = ToVal;
 188   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
 189     FromPtr = GEP->getPointerOperand();
 190   }
 191   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
 192     ToPtr = GEP->getPointerOperand();
 193   }
 194   if (FromPtr != FromVal || ToPtr != ToVal) {
 195     // Quickly check the common case
 196     if (FromPtr == ToPtr)
 197       return true;
 198
 199     // SCEV may have rewritten an expression that produces the GEP's pointer
 200     // operand. That's ok as long as the pointer operand has the same base
 201     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
 202     // base of a recurrence. This handles the case in which SCEV expansion
 203     // converts a pointer type recurrence into a nonrecurrent pointer base
 204     // indexed by an integer recurrence.
 205
 206     // If the GEP base pointer is a vector of pointers, abort.
 207     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
 208       return false;
 209
 210     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
 211     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
 212     if (FromBase == ToBase)
 213       return true;
 214
 215     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
 216                       << " != " << *ToBase << "\n");
 217
 218     return false;
 219   }
 220   return true;
 221 }
 222
 223 /// Determine the insertion point for this user. By default, insert immediately
 224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
 225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
 226 /// common dominator for the incoming blocks. A nullptr can be returned if no
 227 /// viable location is found: it may happen if User is a PHI and Def only comes
 228 /// to this PHI from unreachable blocks.
 229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
 230                                           DominatorTree *DT, LoopInfo *LI) {
 231   PHINode *PHI = dyn_cast<PHINode>(User);
 232   if (!PHI)
 233     return User;
 234
 235   Instruction *InsertPt = nullptr;
 236   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
 237     if (PHI->getIncomingValue(i) != Def)
 238       continue;
 239
 240     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
 241
 242     if (!DT->isReachableFromEntry(InsertBB))
 243       continue;
 244
 245     if (!InsertPt) {
 246       InsertPt = InsertBB->getTerminator();
 247       continue;
 248     }
 249     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
 250     InsertPt = InsertBB->getTerminator();
 251   }
 252
 253   // If we have skipped all inputs, it means that Def only comes to Phi from
 254   // unreachable blocks.
 255   if (!InsertPt)
 256     return nullptr;
 257
 258   auto *DefI = dyn_cast<Instruction>(Def);
 259   if (!DefI)
 260     return InsertPt;
 261
 262   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
 263
 264   auto *L = LI->getLoopFor(DefI->getParent());
 265   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
 266
 267   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
 268     if (LI->getLoopFor(DTN->getBlock()) == L)
 269       return DTN->getBlock()->getTerminator();
 270
 271   llvm_unreachable("DefI dominates InsertPt!");
 272 }
 273
 274 //===----------------------------------------------------------------------===//
 275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
 276 //===----------------------------------------------------------------------===//
 277
 278 /// Convert APF to an integer, if possible.
 279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
 280   bool isExact = false;
 281   // See if we can convert this to an int64_t
 282   uint64_t UIntVal;
 283   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
 284                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
 285       !isExact)
 286     return false;
 287   IntVal = UIntVal;
 288   return true;
 289 }
 290
 291 /// If the loop has floating induction variable then insert corresponding
 292 /// integer induction variable if possible.
 293 /// For example,
 294 /// for(double i = 0; i < 10000; ++i)
 295 ///   bar(i)
 296 /// is converted into
 297 /// for(int i = 0; i < 10000; ++i)
 298 ///   bar((double)i);
 299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 300   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
 301   unsigned BackEdge     = IncomingEdge^1;
 302
 303   // Check incoming value.
 304   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 305
 306   int64_t InitValue;
 307   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
 308     return false;
 309
 310   // Check IV increment. Reject this PN if increment operation is not
 311   // an add or increment value can not be represented by an integer.
 312   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
 313   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
 314
 315   // If this is not an add of the PHI with a constantfp, or if the constant fp
 316   // is not an integer, bail out.
 317   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
 318   int64_t IncValue;
 319   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
 320       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
 321     return false;
 322
 323   // Check Incr uses. One user is PN and the other user is an exit condition
 324   // used by the conditional terminator.
 325   Value::user_iterator IncrUse = Incr->user_begin();
 326   Instruction *U1 = cast<Instruction>(*IncrUse++);
 327   if (IncrUse == Incr->user_end()) return false;
 328   Instruction *U2 = cast<Instruction>(*IncrUse++);
 329   if (IncrUse != Incr->user_end()) return false;
 330
 331   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
 332   // only used by a branch, we can't transform it.
 333   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
 334   if (!Compare)
 335     Compare = dyn_cast<FCmpInst>(U2);
 336   if (!Compare || !Compare->hasOneUse() ||
 337       !isa<BranchInst>(Compare->user_back()))
 338     return false;
 339
 340   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
 341
 342   // We need to verify that the branch actually controls the iteration count
 343   // of the loop.  If not, the new IV can overflow and no one will notice.
 344   // The branch block must be in the loop and one of the successors must be out
 345   // of the loop.
 346   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
 347   if (!L->contains(TheBr->getParent()) ||
 348       (L->contains(TheBr->getSuccessor(0)) &&
 349        L->contains(TheBr->getSuccessor(1))))
 350     return false;
 351
 352   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
 353   // transform it.
 354   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
 355   int64_t ExitValue;
 356   if (ExitValueVal == nullptr ||
 357       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
 358     return false;
 359
 360   // Find new predicate for integer comparison.
 361   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
 362   switch (Compare->getPredicate()) {
 363   default: return false;  // Unknown comparison.
 364   case CmpInst::FCMP_OEQ:
 365   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
 366   case CmpInst::FCMP_ONE:
 367   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
 368   case CmpInst::FCMP_OGT:
 369   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
 370   case CmpInst::FCMP_OGE:
 371   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
 372   case CmpInst::FCMP_OLT:
 373   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
 374   case CmpInst::FCMP_OLE:
 375   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
 376   }
 377
 378   // We convert the floating point induction variable to a signed i32 value if
 379   // we can.  This is only safe if the comparison will not overflow in a way
 380   // that won't be trapped by the integer equivalent operations.  Check for this
 381   // now.
 382   // TODO: We could use i64 if it is native and the range requires it.
 383
 384   // The start/stride/exit values must all fit in signed i32.
 385   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
 386     return false;
 387
 388   // If not actually striding (add x, 0.0), avoid touching the code.
 389   if (IncValue == 0)
 390     return false;
 391
 392   // Positive and negative strides have different safety conditions.
 393   if (IncValue > 0) {
 394     // If we have a positive stride, we require the init to be less than the
 395     // exit value.
 396     if (InitValue >= ExitValue)
 397       return false;
 398
 399     uint32_t Range = uint32_t(ExitValue-InitValue);
 400     // Check for infinite loop, either:
 401     // while (i <= Exit) or until (i > Exit)
 402     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
 403       if (++Range == 0) return false;  // Range overflows.
 404     }
 405
 406     unsigned Leftover = Range % uint32_t(IncValue);
 407
 408     // If this is an equality comparison, we require that the strided value
 409     // exactly land on the exit value, otherwise the IV condition will wrap
 410     // around and do things the fp IV wouldn't.
 411     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 412         Leftover != 0)
 413       return false;
 414
 415     // If the stride would wrap around the i32 before exiting, we can't
 416     // transform the IV.
 417     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
 418       return false;
 419   } else {
 420     // If we have a negative stride, we require the init to be greater than the
 421     // exit value.
 422     if (InitValue <= ExitValue)
 423       return false;
 424
 425     uint32_t Range = uint32_t(InitValue-ExitValue);
 426     // Check for infinite loop, either:
 427     // while (i >= Exit) or until (i < Exit)
 428     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
 429       if (++Range == 0) return false;  // Range overflows.
 430     }
 431
 432     unsigned Leftover = Range % uint32_t(-IncValue);
 433
 434     // If this is an equality comparison, we require that the strided value
 435     // exactly land on the exit value, otherwise the IV condition will wrap
 436     // around and do things the fp IV wouldn't.
 437     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 438         Leftover != 0)
 439       return false;
 440
 441     // If the stride would wrap around the i32 before exiting, we can't
 442     // transform the IV.
 443     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
 444       return false;
 445   }
 446
 447   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 448
 449   // Insert new integer induction variable.
 450   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
 451   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
 452                       PN->getIncomingBlock(IncomingEdge));
 453
 454   Value *NewAdd =
 455     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
 456                               Incr->getName()+".int", Incr);
 457   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 458
 459   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
 460                                       ConstantInt::get(Int32Ty, ExitValue),
 461                                       Compare->getName());
 462
 463   // In the following deletions, PN may become dead and may be deleted.
 464   // Use a WeakTrackingVH to observe whether this happens.
 465   WeakTrackingVH WeakPH = PN;
 466
 467   // Delete the old floating point exit comparison.  The branch starts using the
 468   // new comparison.
 469   NewCompare->takeName(Compare);
 470   Compare->replaceAllUsesWith(NewCompare);
 471   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
 472
 473   // Delete the old floating point increment.
 474   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
 475   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
 476
 477   // If the FP induction variable still has uses, this is because something else
 478   // in the loop uses its value.  In order to canonicalize the induction
 479   // variable, we chose to eliminate the IV and rewrite it in terms of an
 480   // int->fp cast.
 481   //
 482   // We give preference to sitofp over uitofp because it is faster on most
 483   // platforms.
 484   if (WeakPH) {
 485     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
 486                                  &*PN->getParent()->getFirstInsertionPt());
 487     PN->replaceAllUsesWith(Conv);
 488     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
 489   }
 490   return true;
 491 }
 492
 493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
 494   // First step.  Check to see if there are any floating-point recurrences.
 495   // If there are, change them into integer recurrences, permitting analysis by
 496   // the SCEV routines.
 497   BasicBlock *Header = L->getHeader();
 498
 499   SmallVector<WeakTrackingVH, 8> PHIs;
 500   for (PHINode &PN : Header->phis())
 501     PHIs.push_back(&PN);
 502
 503   bool Changed = false;
 504   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
 505     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
 506       Changed |= handleFloatingPointIV(L, PN);
 507
 508   // If the loop previously had floating-point IV, ScalarEvolution
 509   // may not have been able to compute a trip count. Now that we've done some
 510   // re-writing, the trip count may be computable.
 511   if (Changed)
 512     SE->forgetLoop(L);
 513   return Changed;
 514 }
 515
 516 namespace {
 517
 518 // Collect information about PHI nodes which can be transformed in
 519 // rewriteLoopExitValues.
 520 struct RewritePhi {
 521   PHINode *PN;
 522
 523   // Ith incoming value.
 524   unsigned Ith;
 525
 526   // Exit value after expansion.
 527   Value *Val;
 528
 529   // High Cost when expansion.
 530   bool HighCost;
 531
 532   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
 533       : PN(P), Ith(I), Val(V), HighCost(H) {}
 534 };
 535
 536 } // end anonymous namespace
 537
 538 //===----------------------------------------------------------------------===//
 539 // rewriteLoopExitValues - Optimize IV users outside the loop.
 540 // As a side effect, reduces the amount of IV processing within the loop.
 541 //===----------------------------------------------------------------------===//
 542
 543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
 544   SmallPtrSet<const Instruction *, 8> Visited;
 545   SmallVector<const Instruction *, 8> WorkList;
 546   Visited.insert(I);
 547   WorkList.push_back(I);
 548   while (!WorkList.empty()) {
 549     const Instruction *Curr = WorkList.pop_back_val();
 550     // This use is outside the loop, nothing to do.
 551     if (!L->contains(Curr))
 552       continue;
 553     // Do we assume it is a "hard" use which will not be eliminated easily?
 554     if (Curr->mayHaveSideEffects())
 555       return true;
 556     // Otherwise, add all its users to worklist.
 557     for (auto U : Curr->users()) {
 558       auto *UI = cast<Instruction>(U);
 559       if (Visited.insert(UI).second)
 560         WorkList.push_back(UI);
 561     }
 562   }
 563   return false;
 564 }
 565
 566 /// Check to see if this loop has a computable loop-invariant execution count.
 567 /// If so, this means that we can compute the final value of any expressions
 568 /// that are recurrent in the loop, and substitute the exit values from the loop
 569 /// into any instructions outside of the loop that use the final values of the
 570 /// current expressions.
 571 ///
 572 /// This is mostly redundant with the regular IndVarSimplify activities that
 573 /// happen later, except that it's more powerful in some cases, because it's
 574 /// able to brute-force evaluate arbitrary instructions as long as they have
 575 /// constant operands at the beginning of the loop.
 576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
 577   // Check a pre-condition.
 578   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
 579          "Indvars did not preserve LCSSA!");
 580
 581   SmallVector<BasicBlock*, 8> ExitBlocks;
 582   L->getUniqueExitBlocks(ExitBlocks);
 583
 584   SmallVector<RewritePhi, 8> RewritePhiSet;
 585   // Find all values that are computed inside the loop, but used outside of it.
 586   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
 587   // the exit blocks of the loop to find them.
 588   for (BasicBlock *ExitBB : ExitBlocks) {
 589     // If there are no PHI nodes in this exit block, then no values defined
 590     // inside the loop are used on this path, skip it.
 591     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
 592     if (!PN) continue;
 593
 594     unsigned NumPreds = PN->getNumIncomingValues();
 595
 596     // Iterate over all of the PHI nodes.
 597     BasicBlock::iterator BBI = ExitBB->begin();
 598     while ((PN = dyn_cast<PHINode>(BBI++))) {
 599       if (PN->use_empty())
 600         continue; // dead use, don't replace it
 601
 602       if (!SE->isSCEVable(PN->getType()))
 603         continue;
 604
 605       // It's necessary to tell ScalarEvolution about this explicitly so that
 606       // it can walk the def-use list and forget all SCEVs, as it may not be
 607       // watching the PHI itself. Once the new exit value is in place, there
 608       // may not be a def-use connection between the loop and every instruction
 609       // which got a SCEVAddRecExpr for that loop.
 610       SE->forgetValue(PN);
 611
 612       // Iterate over all of the values in all the PHI nodes.
 613       for (unsigned i = 0; i != NumPreds; ++i) {
 614         // If the value being merged in is not integer or is not defined
 615         // in the loop, skip it.
 616         Value *InVal = PN->getIncomingValue(i);
 617         if (!isa<Instruction>(InVal))
 618           continue;
 619
 620         // If this pred is for a subloop, not L itself, skip it.
 621         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
 622           continue; // The Block is in a subloop, skip it.
 623
 624         // Check that InVal is defined in the loop.
 625         Instruction *Inst = cast<Instruction>(InVal);
 626         if (!L->contains(Inst))
 627           continue;
 628
 629         // Okay, this instruction has a user outside of the current loop
 630         // and varies predictably *inside* the loop.  Evaluate the value it
 631         // contains when the loop exits, if possible.
 632         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
 633         if (!SE->isLoopInvariant(ExitValue, L) ||
 634             !isSafeToExpand(ExitValue, *SE))
 635           continue;
 636
 637         // Computing the value outside of the loop brings no benefit if it is
 638         // definitely used inside the loop in a way which can not be optimized
 639         // away.  Avoid doing so unless we know we have a value which computes
 640         // the ExitValue already.  TODO: This should be merged into SCEV
 641         // expander to leverage its knowledge of existing expressions.
 642         if (ReplaceExitValue != AlwaysRepl &&
 643             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
 644             hasHardUserWithinLoop(L, Inst))
 645           continue;
 646
 647         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
 648         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 649
 650         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
 651                           << '\n'
 652                           << "  LoopVal = " << *Inst << "\n");
 653
 654         if (!isValidRewrite(Inst, ExitVal)) {
 655           DeadInsts.push_back(ExitVal);
 656           continue;
 657         }
 658
 659 #ifndef NDEBUG
 660         // If we reuse an instruction from a loop which is neither L nor one of
 661         // its containing loops, we end up breaking LCSSA form for this loop by
 662         // creating a new use of its instruction.
 663         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
 664           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
 665             if (EVL != L)
 666               assert(EVL->contains(L) && "LCSSA breach detected!");
 667 #endif
 668
 669         // Collect all the candidate PHINodes to be rewritten.
 670         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
 671       }
 672     }
 673   }
 674
 675   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
 676
 677   bool Changed = false;
 678   // Transformation.
 679   for (const RewritePhi &Phi : RewritePhiSet) {
 680     PHINode *PN = Phi.PN;
 681     Value *ExitVal = Phi.Val;
 682
 683     // Only do the rewrite when the ExitValue can be expanded cheaply.
 684     // If LoopCanBeDel is true, rewrite exit value aggressively.
 685     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
 686       DeadInsts.push_back(ExitVal);
 687       continue;
 688     }
 689
 690     Changed = true;
 691     ++NumReplaced;
 692     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
 693     PN->setIncomingValue(Phi.Ith, ExitVal);
 694
 695     // If this instruction is dead now, delete it. Don't do it now to avoid
 696     // invalidating iterators.
 697     if (isInstructionTriviallyDead(Inst, TLI))
 698       DeadInsts.push_back(Inst);
 699
 700     // Replace PN with ExitVal if that is legal and does not break LCSSA.
 701     if (PN->getNumIncomingValues() == 1 &&
 702         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
 703       PN->replaceAllUsesWith(ExitVal);
 704       PN->eraseFromParent();
 705     }
 706   }
 707
 708   // The insertion point instruction may have been deleted; clear it out
 709   // so that the rewriter doesn't trip over it later.
 710   Rewriter.clearInsertPoint();
 711   return Changed;
 712 }
 713
 714 //===---------------------------------------------------------------------===//
 715 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
 716 // they will exit at the first iteration.
 717 //===---------------------------------------------------------------------===//
 718
 719 /// Check to see if this loop has loop invariant conditions which lead to loop
 720 /// exits. If so, we know that if the exit path is taken, it is at the first
 721 /// loop iteration. This lets us predict exit values of PHI nodes that live in
 722 /// loop header.
 723 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
 724   // Verify the input to the pass is already in LCSSA form.
 725   assert(L->isLCSSAForm(*DT));
 726
 727   SmallVector<BasicBlock *, 8> ExitBlocks;
 728   L->getUniqueExitBlocks(ExitBlocks);
 729
 730   bool MadeAnyChanges = false;
 731   for (auto *ExitBB : ExitBlocks) {
 732     // If there are no more PHI nodes in this exit block, then no more
 733     // values defined inside the loop are used on this path.
 734     for (PHINode &PN : ExitBB->phis()) {
 735       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
 736            IncomingValIdx != E; ++IncomingValIdx) {
 737         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
 738
 739         // Can we prove that the exit must run on the first iteration if it
 740         // runs at all?  (i.e. early exits are fine for our purposes, but
 741         // traces which lead to this exit being taken on the 2nd iteration
 742         // aren't.)  Note that this is about whether the exit branch is
 743         // executed, not about whether it is taken.
 744         if (!L->getLoopLatch() ||
 745             !DT->dominates(IncomingBB, L->getLoopLatch()))
 746           continue;
 747
 748         // Get condition that leads to the exit path.
 749         auto *TermInst = IncomingBB->getTerminator();
 750
 751         Value *Cond = nullptr;
 752         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
 753           // Must be a conditional branch, otherwise the block
 754           // should not be in the loop.
 755           Cond = BI->getCondition();
 756         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
 757           Cond = SI->getCondition();
 758         else
 759           continue;
 760
 761         if (!L->isLoopInvariant(Cond))
 762           continue;
 763
 764         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
 765
 766         // Only deal with PHIs in the loop header.
 767         if (!ExitVal || ExitVal->getParent() != L->getHeader())
 768           continue;
 769
 770         // If ExitVal is a PHI on the loop header, then we know its
 771         // value along this exit because the exit can only be taken
 772         // on the first iteration.
 773         auto *LoopPreheader = L->getLoopPreheader();
 774         assert(LoopPreheader && "Invalid loop");
 775         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
 776         if (PreheaderIdx != -1) {
 777           assert(ExitVal->getParent() == L->getHeader() &&
 778                  "ExitVal must be in loop header");
 779           MadeAnyChanges = true;
 780           PN.setIncomingValue(IncomingValIdx,
 781                               ExitVal->getIncomingValue(PreheaderIdx));
 782         }
 783       }
 784     }
 785   }
 786   return MadeAnyChanges;
 787 }
 788
 789 /// Check whether it is possible to delete the loop after rewriting exit
 790 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
 791 /// aggressively.
 792 bool IndVarSimplify::canLoopBeDeleted(
 793     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
 794   BasicBlock *Preheader = L->getLoopPreheader();
 795   // If there is no preheader, the loop will not be deleted.
 796   if (!Preheader)
 797     return false;
 798
 799   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
 800   // We obviate multiple ExitingBlocks case for simplicity.
 801   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
 802   // after exit value rewriting, we can enhance the logic here.
 803   SmallVector<BasicBlock *, 4> ExitingBlocks;
 804   L->getExitingBlocks(ExitingBlocks);
 805   SmallVector<BasicBlock *, 8> ExitBlocks;
 806   L->getUniqueExitBlocks(ExitBlocks);
 807   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
 808     return false;
 809
 810   BasicBlock *ExitBlock = ExitBlocks[0];
 811   BasicBlock::iterator BI = ExitBlock->begin();
 812   while (PHINode *P = dyn_cast<PHINode>(BI)) {
 813     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
 814
 815     // If the Incoming value of P is found in RewritePhiSet, we know it
 816     // could be rewritten to use a loop invariant value in transformation
 817     // phase later. Skip it in the loop invariant check below.
 818     bool found = false;
 819     for (const RewritePhi &Phi : RewritePhiSet) {
 820       unsigned i = Phi.Ith;
 821       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
 822         found = true;
 823         break;
 824       }
 825     }
 826
 827     Instruction *I;
 828     if (!found && (I = dyn_cast<Instruction>(Incoming)))
 829       if (!L->hasLoopInvariantOperands(I))
 830         return false;
 831
 832     ++BI;
 833   }
 834
 835   for (auto *BB : L->blocks())
 836     if (llvm::any_of(*BB, [](Instruction &I) {
 837           return I.mayHaveSideEffects();
 838         }))
 839       return false;
 840
 841   return true;
 842 }
 843
 844 //===----------------------------------------------------------------------===//
 845 //  IV Widening - Extend the width of an IV to cover its widest uses.
 846 //===----------------------------------------------------------------------===//
 847
 848 namespace {
 849
 850 // Collect information about induction variables that are used by sign/zero
 851 // extend operations. This information is recorded by CollectExtend and provides
 852 // the input to WidenIV.
 853 struct WideIVInfo {
 854   PHINode *NarrowIV = nullptr;
 855
 856   // Widest integer type created [sz]ext
 857   Type *WidestNativeType = nullptr;
 858
 859   // Was a sext user seen before a zext?
 860   bool IsSigned = false;
 861 };
 862
 863 } // end anonymous namespace
 864
 865 /// Update information about the induction variable that is extended by this
 866 /// sign or zero extend operation. This is used to determine the final width of
 867 /// the IV before actually widening it.
 868 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
 869                         const TargetTransformInfo *TTI) {
 870   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
 871   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
 872     return;
 873
 874   Type *Ty = Cast->getType();
 875   uint64_t Width = SE->getTypeSizeInBits(Ty);
 876   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
 877     return;
 878
 879   // Check that `Cast` actually extends the induction variable (we rely on this
 880   // later).  This takes care of cases where `Cast` is extending a truncation of
 881   // the narrow induction variable, and thus can end up being narrower than the
 882   // "narrow" induction variable.
 883   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
 884   if (NarrowIVWidth >= Width)
 885     return;
 886
 887   // Cast is either an sext or zext up to this point.
 888   // We should not widen an indvar if arithmetics on the wider indvar are more
 889   // expensive than those on the narrower indvar. We check only the cost of ADD
 890   // because at least an ADD is required to increment the induction variable. We
 891   // could compute more comprehensively the cost of all instructions on the
 892   // induction variable when necessary.
 893   if (TTI &&
 894       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
 895           TTI->getArithmeticInstrCost(Instruction::Add,
 896                                       Cast->getOperand(0)->getType())) {
 897     return;
 898   }
 899
 900   if (!WI.WidestNativeType) {
 901     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 902     WI.IsSigned = IsSigned;
 903     return;
 904   }
 905
 906   // We extend the IV to satisfy the sign of its first user, arbitrarily.
 907   if (WI.IsSigned != IsSigned)
 908     return;
 909
 910   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
 911     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 912 }
 913
 914 namespace {
 915
 916 /// Record a link in the Narrow IV def-use chain along with the WideIV that
 917 /// computes the same value as the Narrow IV def.  This avoids caching Use*
 918 /// pointers.
 919 struct NarrowIVDefUse {
 920   Instruction *NarrowDef = nullptr;
 921   Instruction *NarrowUse = nullptr;
 922   Instruction *WideDef = nullptr;
 923
 924   // True if the narrow def is never negative.  Tracking this information lets
 925   // us use a sign extension instead of a zero extension or vice versa, when
 926   // profitable and legal.
 927   bool NeverNegative = false;
 928
 929   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
 930                  bool NeverNegative)
 931       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
 932         NeverNegative(NeverNegative) {}
 933 };
 934
 935 /// The goal of this transform is to remove sign and zero extends without
 936 /// creating any new induction variables. To do this, it creates a new phi of
 937 /// the wider type and redirects all users, either removing extends or inserting
 938 /// truncs whenever we stop propagating the type.
 939 class WidenIV {
 940   // Parameters
 941   PHINode *OrigPhi;
 942   Type *WideType;
 943
 944   // Context
 945   LoopInfo        *LI;
 946   Loop            *L;
 947   ScalarEvolution *SE;
 948   DominatorTree   *DT;
 949
 950   // Does the module have any calls to the llvm.experimental.guard intrinsic
 951   // at all? If not we can avoid scanning instructions looking for guards.
 952   bool HasGuards;
 953
 954   // Result
 955   PHINode *WidePhi = nullptr;
 956   Instruction *WideInc = nullptr;
 957   const SCEV *WideIncExpr = nullptr;
 958   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 959
 960   SmallPtrSet<Instruction *,16> Widened;
 961   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
 962
 963   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
 964
 965   // A map tracking the kind of extension used to widen each narrow IV
 966   // and narrow IV user.
 967   // Key: pointer to a narrow IV or IV user.
 968   // Value: the kind of extension used to widen this Instruction.
 969   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
 970
 971   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
 972
 973   // A map with control-dependent ranges for post increment IV uses. The key is
 974   // a pair of IV def and a use of this def denoting the context. The value is
 975   // a ConstantRange representing possible values of the def at the given
 976   // context.
 977   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
 978
 979   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
 980                                               Instruction *UseI) {
 981     DefUserPair Key(Def, UseI);
 982     auto It = PostIncRangeInfos.find(Key);
 983     return It == PostIncRangeInfos.end()
 984                ? Optional<ConstantRange>(None)
 985                : Optional<ConstantRange>(It->second);
 986   }
 987
 988   void calculatePostIncRanges(PHINode *OrigPhi);
 989   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
 990
 991   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
 992     DefUserPair Key(Def, UseI);
 993     auto It = PostIncRangeInfos.find(Key);
 994     if (It == PostIncRangeInfos.end())
 995       PostIncRangeInfos.insert({Key, R});
 996     else
 997       It->second = R.intersectWith(It->second);
 998   }
 999
1000 public:
1001   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1002           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1003           bool HasGuards)
1004       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1005         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1006         HasGuards(HasGuards), DeadInsts(DI) {
1007     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1008     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1009   }
1010
1011   PHINode *createWideIV(SCEVExpander &Rewriter);
1012
1013 protected:
1014   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1015                           Instruction *Use);
1016
1017   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1018   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1019                                      const SCEVAddRecExpr *WideAR);
1020   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1021
1022   ExtendKind getExtendKind(Instruction *I);
1023
1024   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1025
1026   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1027
1028   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1029
1030   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1031                               unsigned OpCode) const;
1032
1033   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1034
1035   bool widenLoopCompare(NarrowIVDefUse DU);
1036   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1037   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1038
1039   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1040 };
1041
1042 } // end anonymous namespace
1043
1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1045                                  bool IsSigned, Instruction *Use) {
1046   // Set the debug location and conservative insertion point.
1047   IRBuilder<> Builder(Use);
1048   // Hoist the insertion point into loop preheaders as far as possible.
1049   for (const Loop *L = LI->getLoopFor(Use->getParent());
1050        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1051        L = L->getParentLoop())
1052     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1053
1054   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1055                     Builder.CreateZExt(NarrowOper, WideType);
1056 }
1057
1058 /// Instantiate a wide operation to replace a narrow operation. This only needs
1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1060 /// 0 for any operation we decide not to clone.
1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1062                                   const SCEVAddRecExpr *WideAR) {
1063   unsigned Opcode = DU.NarrowUse->getOpcode();
1064   switch (Opcode) {
1065   default:
1066     return nullptr;
1067   case Instruction::Add:
1068   case Instruction::Mul:
1069   case Instruction::UDiv:
1070   case Instruction::Sub:
1071     return cloneArithmeticIVUser(DU, WideAR);
1072
1073   case Instruction::And:
1074   case Instruction::Or:
1075   case Instruction::Xor:
1076   case Instruction::Shl:
1077   case Instruction::LShr:
1078   case Instruction::AShr:
1079     return cloneBitwiseIVUser(DU);
1080   }
1081 }
1082
1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1084   Instruction *NarrowUse = DU.NarrowUse;
1085   Instruction *NarrowDef = DU.NarrowDef;
1086   Instruction *WideDef = DU.WideDef;
1087
1088   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1089
1090   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1091   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1092   // invariant and will be folded or hoisted. If it actually comes from a
1093   // widened IV, it should be removed during a future call to widenIVUse.
1094   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1095   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1096                    ? WideDef
1097                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1098                                       IsSigned, NarrowUse);
1099   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1100                    ? WideDef
1101                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1102                                       IsSigned, NarrowUse);
1103
1104   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1105   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1106                                         NarrowBO->getName());
1107   IRBuilder<> Builder(NarrowUse);
1108   Builder.Insert(WideBO);
1109   WideBO->copyIRFlags(NarrowBO);
1110   return WideBO;
1111 }
1112
1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1114                                             const SCEVAddRecExpr *WideAR) {
1115   Instruction *NarrowUse = DU.NarrowUse;
1116   Instruction *NarrowDef = DU.NarrowDef;
1117   Instruction *WideDef = DU.WideDef;
1118
1119   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1120
1121   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1122
1123   // We're trying to find X such that
1124   //
1125   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1126   //
1127   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1128   // and check using SCEV if any of them are correct.
1129
1130   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1131   // correct solution to X.
1132   auto GuessNonIVOperand = [&](bool SignExt) {
1133     const SCEV *WideLHS;
1134     const SCEV *WideRHS;
1135
1136     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1137       if (SignExt)
1138         return SE->getSignExtendExpr(S, Ty);
1139       return SE->getZeroExtendExpr(S, Ty);
1140     };
1141
1142     if (IVOpIdx == 0) {
1143       WideLHS = SE->getSCEV(WideDef);
1144       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1145       WideRHS = GetExtend(NarrowRHS, WideType);
1146     } else {
1147       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1148       WideLHS = GetExtend(NarrowLHS, WideType);
1149       WideRHS = SE->getSCEV(WideDef);
1150     }
1151
1152     // WideUse is "WideDef `op.wide` X" as described in the comment.
1153     const SCEV *WideUse = nullptr;
1154
1155     switch (NarrowUse->getOpcode()) {
1156     default:
1157       llvm_unreachable("No other possibility!");
1158
1159     case Instruction::Add:
1160       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1161       break;
1162
1163     case Instruction::Mul:
1164       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1165       break;
1166
1167     case Instruction::UDiv:
1168       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1169       break;
1170
1171     case Instruction::Sub:
1172       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1173       break;
1174     }
1175
1176     return WideUse == WideAR;
1177   };
1178
1179   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1180   if (!GuessNonIVOperand(SignExtend)) {
1181     SignExtend = !SignExtend;
1182     if (!GuessNonIVOperand(SignExtend))
1183       return nullptr;
1184   }
1185
1186   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1187                    ? WideDef
1188                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1189                                       SignExtend, NarrowUse);
1190   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1191                    ? WideDef
1192                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1193                                       SignExtend, NarrowUse);
1194
1195   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1196   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1197                                         NarrowBO->getName());
1198
1199   IRBuilder<> Builder(NarrowUse);
1200   Builder.Insert(WideBO);
1201   WideBO->copyIRFlags(NarrowBO);
1202   return WideBO;
1203 }
1204
1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1206   auto It = ExtendKindMap.find(I);
1207   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1208   return It->second;
1209 }
1210
1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1212                                      unsigned OpCode) const {
1213   if (OpCode == Instruction::Add)
1214     return SE->getAddExpr(LHS, RHS);
1215   if (OpCode == Instruction::Sub)
1216     return SE->getMinusSCEV(LHS, RHS);
1217   if (OpCode == Instruction::Mul)
1218     return SE->getMulExpr(LHS, RHS);
1219
1220   llvm_unreachable("Unsupported opcode.");
1221 }
1222
1223 /// No-wrap operations can transfer sign extension of their result to their
1224 /// operands. Generate the SCEV value for the widened operation without
1225 /// actually modifying the IR yet. If the expression after extending the
1226 /// operands is an AddRec for this loop, return the AddRec and the kind of
1227 /// extension used.
1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1229   // Handle the common case of add<nsw/nuw>
1230   const unsigned OpCode = DU.NarrowUse->getOpcode();
1231   // Only Add/Sub/Mul instructions supported yet.
1232   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1233       OpCode != Instruction::Mul)
1234     return {nullptr, Unknown};
1235
1236   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1237   // if extending the other will lead to a recurrence.
1238   const unsigned ExtendOperIdx =
1239       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1240   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1241
1242   const SCEV *ExtendOperExpr = nullptr;
1243   const OverflowingBinaryOperator *OBO =
1244     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1245   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1246   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1247     ExtendOperExpr = SE->getSignExtendExpr(
1248       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1249   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1250     ExtendOperExpr = SE->getZeroExtendExpr(
1251       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252   else
1253     return {nullptr, Unknown};
1254
1255   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1256   // flags. This instruction may be guarded by control flow that the no-wrap
1257   // behavior depends on. Non-control-equivalent instructions can be mapped to
1258   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1259   // semantics to those operations.
1260   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1261   const SCEV *rhs = ExtendOperExpr;
1262
1263   // Let's swap operands to the initial order for the case of non-commutative
1264   // operations, like SUB. See PR21014.
1265   if (ExtendOperIdx == 0)
1266     std::swap(lhs, rhs);
1267   const SCEVAddRecExpr *AddRec =
1268       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1269
1270   if (!AddRec || AddRec->getLoop() != L)
1271     return {nullptr, Unknown};
1272
1273   return {AddRec, ExtKind};
1274 }
1275
1276 /// Is this instruction potentially interesting for further simplification after
1277 /// widening it's type? In other words, can the extend be safely hoisted out of
1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1279 /// so, return the extended recurrence and the kind of extension used. Otherwise
1280 /// return {nullptr, Unknown}.
1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1282   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1283     return {nullptr, Unknown};
1284
1285   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1286   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1287       SE->getTypeSizeInBits(WideType)) {
1288     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1289     // index. So don't follow this use.
1290     return {nullptr, Unknown};
1291   }
1292
1293   const SCEV *WideExpr;
1294   ExtendKind ExtKind;
1295   if (DU.NeverNegative) {
1296     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297     if (isa<SCEVAddRecExpr>(WideExpr))
1298       ExtKind = SignExtended;
1299     else {
1300       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1301       ExtKind = ZeroExtended;
1302     }
1303   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1304     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1305     ExtKind = SignExtended;
1306   } else {
1307     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1308     ExtKind = ZeroExtended;
1309   }
1310   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1311   if (!AddRec || AddRec->getLoop() != L)
1312     return {nullptr, Unknown};
1313   return {AddRec, ExtKind};
1314 }
1315
1316 /// This IV user cannot be widened. Replace this use of the original narrow IV
1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1319   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1320   if (!InsertPt)
1321     return;
1322   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1323                     << *DU.NarrowUse << "\n");
1324   IRBuilder<> Builder(InsertPt);
1325   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1326   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1327 }
1328
1329 /// If the narrow use is a compare instruction, then widen the compare
1330 //  (and possibly the other operand).  The extend operation is hoisted into the
1331 // loop preheader as far as possible.
1332 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1333   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1334   if (!Cmp)
1335     return false;
1336
1337   // We can legally widen the comparison in the following two cases:
1338   //
1339   //  - The signedness of the IV extension and comparison match
1340   //
1341   //  - The narrow IV is always positive (and thus its sign extension is equal
1342   //    to its zero extension).  For instance, let's say we're zero extending
1343   //    %narrow for the following use
1344   //
1345   //      icmp slt i32 %narrow, %val   ... (A)
1346   //
1347   //    and %narrow is always positive.  Then
1348   //
1349   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1350   //          == icmp slt i32 zext(%narrow), sext(%val)
1351   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1352   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1353     return false;
1354
1355   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1356   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1357   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1358   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1359
1360   // Widen the compare instruction.
1361   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1362   if (!InsertPt)
1363     return false;
1364   IRBuilder<> Builder(InsertPt);
1365   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1366
1367   // Widen the other operand of the compare, if necessary.
1368   if (CastWidth < IVWidth) {
1369     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1370     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1371   }
1372   return true;
1373 }
1374
1375 /// If the narrow use is an instruction whose two operands are the defining
1376 /// instruction of DU and a load instruction, then we have the following:
1377 /// if the load is hoisted outside the loop, then we do not reach this function
1378 /// as scalar evolution analysis works fine in widenIVUse with variables
1379 /// hoisted outside the loop and efficient code is subsequently generated by
1380 /// not emitting truncate instructions. But when the load is not hoisted
1381 /// (whether due to limitation in alias analysis or due to a true legality),
1382 /// then scalar evolution can not proceed with loop variant values and
1383 /// inefficient code is generated. This function handles the non-hoisted load
1384 /// special case by making the optimization generate the same type of code for
1385 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1386 /// instruction). This special case is important especially when the induction
1387 /// variables are affecting addressing mode in code generation.
1388 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1389   Instruction *NarrowUse = DU.NarrowUse;
1390   Instruction *NarrowDef = DU.NarrowDef;
1391   Instruction *WideDef = DU.WideDef;
1392
1393   // Handle the common case of add<nsw/nuw>
1394   const unsigned OpCode = NarrowUse->getOpcode();
1395   // Only Add/Sub/Mul instructions are supported.
1396   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1397       OpCode != Instruction::Mul)
1398     return false;
1399
1400   // The operand that is not defined by NarrowDef of DU. Let's call it the
1401   // other operand.
1402   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1403   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1404          "bad DU");
1405
1406   const SCEV *ExtendOperExpr = nullptr;
1407   const OverflowingBinaryOperator *OBO =
1408     cast<OverflowingBinaryOperator>(NarrowUse);
1409   ExtendKind ExtKind = getExtendKind(NarrowDef);
1410   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1411     ExtendOperExpr = SE->getSignExtendExpr(
1412       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1413   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1414     ExtendOperExpr = SE->getZeroExtendExpr(
1415       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1416   else
1417     return false;
1418
1419   // We are interested in the other operand being a load instruction.
1420   // But, we should look into relaxing this restriction later on.
1421   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1422   if (I && I->getOpcode() != Instruction::Load)
1423     return false;
1424
1425   // Verifying that Defining operand is an AddRec
1426   const SCEV *Op1 = SE->getSCEV(WideDef);
1427   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1428   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1429     return false;
1430   // Verifying that other operand is an Extend.
1431   if (ExtKind == SignExtended) {
1432     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1433       return false;
1434   } else {
1435     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1436       return false;
1437   }
1438
1439   if (ExtKind == SignExtended) {
1440     for (Use &U : NarrowUse->uses()) {
1441       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1442       if (!User || User->getType() != WideType)
1443         return false;
1444     }
1445   } else { // ExtKind == ZeroExtended
1446     for (Use &U : NarrowUse->uses()) {
1447       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1448       if (!User || User->getType() != WideType)
1449         return false;
1450     }
1451   }
1452
1453   return true;
1454 }
1455
1456 /// Special Case for widening with variant Loads (see
1457 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1458 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1459   Instruction *NarrowUse = DU.NarrowUse;
1460   Instruction *NarrowDef = DU.NarrowDef;
1461   Instruction *WideDef = DU.WideDef;
1462
1463   ExtendKind ExtKind = getExtendKind(NarrowDef);
1464
1465   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1466
1467   // Generating a widening use instruction.
1468   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1469                    ? WideDef
1470                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1471                                       ExtKind, NarrowUse);
1472   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1473                    ? WideDef
1474                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1475                                       ExtKind, NarrowUse);
1476
1477   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1478   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1479                                         NarrowBO->getName());
1480   IRBuilder<> Builder(NarrowUse);
1481   Builder.Insert(WideBO);
1482   WideBO->copyIRFlags(NarrowBO);
1483
1484   if (ExtKind == SignExtended)
1485     ExtendKindMap[NarrowUse] = SignExtended;
1486   else
1487     ExtendKindMap[NarrowUse] = ZeroExtended;
1488
1489   // Update the Use.
1490   if (ExtKind == SignExtended) {
1491     for (Use &U : NarrowUse->uses()) {
1492       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1493       if (User && User->getType() == WideType) {
1494         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1495                           << *WideBO << "\n");
1496         ++NumElimExt;
1497         User->replaceAllUsesWith(WideBO);
1498         DeadInsts.emplace_back(User);
1499       }
1500     }
1501   } else { // ExtKind == ZeroExtended
1502     for (Use &U : NarrowUse->uses()) {
1503       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1504       if (User && User->getType() == WideType) {
1505         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1506                           << *WideBO << "\n");
1507         ++NumElimExt;
1508         User->replaceAllUsesWith(WideBO);
1509         DeadInsts.emplace_back(User);
1510       }
1511     }
1512   }
1513 }
1514
1515 /// Determine whether an individual user of the narrow IV can be widened. If so,
1516 /// return the wide clone of the user.
1517 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1518   assert(ExtendKindMap.count(DU.NarrowDef) &&
1519          "Should already know the kind of extension used to widen NarrowDef");
1520
1521   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1522   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1523     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1524       // For LCSSA phis, sink the truncate outside the loop.
1525       // After SimplifyCFG most loop exit targets have a single predecessor.
1526       // Otherwise fall back to a truncate within the loop.
1527       if (UsePhi->getNumOperands() != 1)
1528         truncateIVUse(DU, DT, LI);
1529       else {
1530         // Widening the PHI requires us to insert a trunc.  The logical place
1531         // for this trunc is in the same BB as the PHI.  This is not possible if
1532         // the BB is terminated by a catchswitch.
1533         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1534           return nullptr;
1535
1536         PHINode *WidePhi =
1537           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1538                           UsePhi);
1539         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1540         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1541         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1542         UsePhi->replaceAllUsesWith(Trunc);
1543         DeadInsts.emplace_back(UsePhi);
1544         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1545                           << *WidePhi << "\n");
1546       }
1547       return nullptr;
1548     }
1549   }
1550
1551   // This narrow use can be widened by a sext if it's non-negative or its narrow
1552   // def was widended by a sext. Same for zext.
1553   auto canWidenBySExt = [&]() {
1554     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1555   };
1556   auto canWidenByZExt = [&]() {
1557     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1558   };
1559
1560   // Our raison d'etre! Eliminate sign and zero extension.
1561   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1562       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1563     Value *NewDef = DU.WideDef;
1564     if (DU.NarrowUse->getType() != WideType) {
1565       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1566       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1567       if (CastWidth < IVWidth) {
1568         // The cast isn't as wide as the IV, so insert a Trunc.
1569         IRBuilder<> Builder(DU.NarrowUse);
1570         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1571       }
1572       else {
1573         // A wider extend was hidden behind a narrower one. This may induce
1574         // another round of IV widening in which the intermediate IV becomes
1575         // dead. It should be very rare.
1576         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1577                           << " not wide enough to subsume " << *DU.NarrowUse
1578                           << "\n");
1579         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1580         NewDef = DU.NarrowUse;
1581       }
1582     }
1583     if (NewDef != DU.NarrowUse) {
1584       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1585                         << " replaced by " << *DU.WideDef << "\n");
1586       ++NumElimExt;
1587       DU.NarrowUse->replaceAllUsesWith(NewDef);
1588       DeadInsts.emplace_back(DU.NarrowUse);
1589     }
1590     // Now that the extend is gone, we want to expose it's uses for potential
1591     // further simplification. We don't need to directly inform SimplifyIVUsers
1592     // of the new users, because their parent IV will be processed later as a
1593     // new loop phi. If we preserved IVUsers analysis, we would also want to
1594     // push the uses of WideDef here.
1595
1596     // No further widening is needed. The deceased [sz]ext had done it for us.
1597     return nullptr;
1598   }
1599
1600   // Does this user itself evaluate to a recurrence after widening?
1601   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1602   if (!WideAddRec.first)
1603     WideAddRec = getWideRecurrence(DU);
1604
1605   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1606   if (!WideAddRec.first) {
1607     // If use is a loop condition, try to promote the condition instead of
1608     // truncating the IV first.
1609     if (widenLoopCompare(DU))
1610       return nullptr;
1611
1612     // We are here about to generate a truncate instruction that may hurt
1613     // performance because the scalar evolution expression computed earlier
1614     // in WideAddRec.first does not indicate a polynomial induction expression.
1615     // In that case, look at the operands of the use instruction to determine
1616     // if we can still widen the use instead of truncating its operand.
1617     if (widenWithVariantLoadUse(DU)) {
1618       widenWithVariantLoadUseCodegen(DU);
1619       return nullptr;
1620     }
1621
1622     // This user does not evaluate to a recurrence after widening, so don't
1623     // follow it. Instead insert a Trunc to kill off the original use,
1624     // eventually isolating the original narrow IV so it can be removed.
1625     truncateIVUse(DU, DT, LI);
1626     return nullptr;
1627   }
1628   // Assume block terminators cannot evaluate to a recurrence. We can't to
1629   // insert a Trunc after a terminator if there happens to be a critical edge.
1630   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1631          "SCEV is not expected to evaluate a block terminator");
1632
1633   // Reuse the IV increment that SCEVExpander created as long as it dominates
1634   // NarrowUse.
1635   Instruction *WideUse = nullptr;
1636   if (WideAddRec.first == WideIncExpr &&
1637       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1638     WideUse = WideInc;
1639   else {
1640     WideUse = cloneIVUser(DU, WideAddRec.first);
1641     if (!WideUse)
1642       return nullptr;
1643   }
1644   // Evaluation of WideAddRec ensured that the narrow expression could be
1645   // extended outside the loop without overflow. This suggests that the wide use
1646   // evaluates to the same expression as the extended narrow use, but doesn't
1647   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1648   // where it fails, we simply throw away the newly created wide use.
1649   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1650     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1651                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1652                       << "\n");
1653     DeadInsts.emplace_back(WideUse);
1654     return nullptr;
1655   }
1656
1657   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1658   // Returning WideUse pushes it on the worklist.
1659   return WideUse;
1660 }
1661
1662 /// Add eligible users of NarrowDef to NarrowIVUsers.
1663 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1664   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1665   bool NonNegativeDef =
1666       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1667                            SE->getConstant(NarrowSCEV->getType(), 0));
1668   for (User *U : NarrowDef->users()) {
1669     Instruction *NarrowUser = cast<Instruction>(U);
1670
1671     // Handle data flow merges and bizarre phi cycles.
1672     if (!Widened.insert(NarrowUser).second)
1673       continue;
1674
1675     bool NonNegativeUse = false;
1676     if (!NonNegativeDef) {
1677       // We might have a control-dependent range information for this context.
1678       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1679         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1680     }
1681
1682     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1683                                NonNegativeDef || NonNegativeUse);
1684   }
1685 }
1686
1687 /// Process a single induction variable. First use the SCEVExpander to create a
1688 /// wide induction variable that evaluates to the same recurrence as the
1689 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1690 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1691 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1692 ///
1693 /// It would be simpler to delete uses as they are processed, but we must avoid
1694 /// invalidating SCEV expressions.
1695 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1696   // Is this phi an induction variable?
1697   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1698   if (!AddRec)
1699     return nullptr;
1700
1701   // Widen the induction variable expression.
1702   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1703                                ? SE->getSignExtendExpr(AddRec, WideType)
1704                                : SE->getZeroExtendExpr(AddRec, WideType);
1705
1706   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1707          "Expect the new IV expression to preserve its type");
1708
1709   // Can the IV be extended outside the loop without overflow?
1710   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1711   if (!AddRec || AddRec->getLoop() != L)
1712     return nullptr;
1713
1714   // An AddRec must have loop-invariant operands. Since this AddRec is
1715   // materialized by a loop header phi, the expression cannot have any post-loop
1716   // operands, so they must dominate the loop header.
1717   assert(
1718       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1719       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1720       "Loop header phi recurrence inputs do not dominate the loop");
1721
1722   // Iterate over IV uses (including transitive ones) looking for IV increments
1723   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1724   // the increment calculate control-dependent range information basing on
1725   // dominating conditions inside of the loop (e.g. a range check inside of the
1726   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1727   //
1728   // Control-dependent range information is later used to prove that a narrow
1729   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1730   // this on demand because when pushNarrowIVUsers needs this information some
1731   // of the dominating conditions might be already widened.
1732   if (UsePostIncrementRanges)
1733     calculatePostIncRanges(OrigPhi);
1734
1735   // The rewriter provides a value for the desired IV expression. This may
1736   // either find an existing phi or materialize a new one. Either way, we
1737   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1738   // of the phi-SCC dominates the loop entry.
1739   Instruction *InsertPt = &L->getHeader()->front();
1740   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1741
1742   // Remembering the WideIV increment generated by SCEVExpander allows
1743   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1744   // employ a general reuse mechanism because the call above is the only call to
1745   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1746   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1747     WideInc =
1748       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1749     WideIncExpr = SE->getSCEV(WideInc);
1750     // Propagate the debug location associated with the original loop increment
1751     // to the new (widened) increment.
1752     auto *OrigInc =
1753       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1754     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1755   }
1756
1757   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1758   ++NumWidened;
1759
1760   // Traverse the def-use chain using a worklist starting at the original IV.
1761   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1762
1763   Widened.insert(OrigPhi);
1764   pushNarrowIVUsers(OrigPhi, WidePhi);
1765
1766   while (!NarrowIVUsers.empty()) {
1767     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1768
1769     // Process a def-use edge. This may replace the use, so don't hold a
1770     // use_iterator across it.
1771     Instruction *WideUse = widenIVUse(DU, Rewriter);
1772
1773     // Follow all def-use edges from the previous narrow use.
1774     if (WideUse)
1775       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1776
1777     // widenIVUse may have removed the def-use edge.
1778     if (DU.NarrowDef->use_empty())
1779       DeadInsts.emplace_back(DU.NarrowDef);
1780   }
1781
1782   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1783   // evaluate the same recurrence, we can just copy the debug info over.
1784   SmallVector<DbgValueInst *, 1> DbgValues;
1785   llvm::findDbgValues(DbgValues, OrigPhi);
1786   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1787                                      ValueAsMetadata::get(WidePhi));
1788   for (auto &DbgValue : DbgValues)
1789     DbgValue->setOperand(0, MDPhi);
1790   return WidePhi;
1791 }
1792
1793 /// Calculates control-dependent range for the given def at the given context
1794 /// by looking at dominating conditions inside of the loop
1795 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1796                                     Instruction *NarrowUser) {
1797   using namespace llvm::PatternMatch;
1798
1799   Value *NarrowDefLHS;
1800   const APInt *NarrowDefRHS;
1801   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1802                                  m_APInt(NarrowDefRHS))) ||
1803       !NarrowDefRHS->isNonNegative())
1804     return;
1805
1806   auto UpdateRangeFromCondition = [&] (Value *Condition,
1807                                        bool TrueDest) {
1808     CmpInst::Predicate Pred;
1809     Value *CmpRHS;
1810     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1811                                  m_Value(CmpRHS))))
1812       return;
1813
1814     CmpInst::Predicate P =
1815             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1816
1817     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1818     auto CmpConstrainedLHSRange =
1819             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1820     auto NarrowDefRange =
1821             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1822
1823     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1824   };
1825
1826   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1827     if (!HasGuards)
1828       return;
1829
1830     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1831                                      Ctx->getParent()->rend())) {
1832       Value *C = nullptr;
1833       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1834         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1835     }
1836   };
1837
1838   UpdateRangeFromGuards(NarrowUser);
1839
1840   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1841   // If NarrowUserBB is statically unreachable asking dominator queries may
1842   // yield surprising results. (e.g. the block may not have a dom tree node)
1843   if (!DT->isReachableFromEntry(NarrowUserBB))
1844     return;
1845
1846   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1847        L->contains(DTB->getBlock());
1848        DTB = DTB->getIDom()) {
1849     auto *BB = DTB->getBlock();
1850     auto *TI = BB->getTerminator();
1851     UpdateRangeFromGuards(TI);
1852
1853     auto *BI = dyn_cast<BranchInst>(TI);
1854     if (!BI || !BI->isConditional())
1855       continue;
1856
1857     auto *TrueSuccessor = BI->getSuccessor(0);
1858     auto *FalseSuccessor = BI->getSuccessor(1);
1859
1860     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1861       return BBE.isSingleEdge() &&
1862              DT->dominates(BBE, NarrowUser->getParent());
1863     };
1864
1865     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1866       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1867
1868     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1869       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1870   }
1871 }
1872
1873 /// Calculates PostIncRangeInfos map for the given IV
1874 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1875   SmallPtrSet<Instruction *, 16> Visited;
1876   SmallVector<Instruction *, 6> Worklist;
1877   Worklist.push_back(OrigPhi);
1878   Visited.insert(OrigPhi);
1879
1880   while (!Worklist.empty()) {
1881     Instruction *NarrowDef = Worklist.pop_back_val();
1882
1883     for (Use &U : NarrowDef->uses()) {
1884       auto *NarrowUser = cast<Instruction>(U.getUser());
1885
1886       // Don't go looking outside the current loop.
1887       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1888       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1889         continue;
1890
1891       if (!Visited.insert(NarrowUser).second)
1892         continue;
1893
1894       Worklist.push_back(NarrowUser);
1895
1896       calculatePostIncRange(NarrowDef, NarrowUser);
1897     }
1898   }
1899 }
1900
1901 //===----------------------------------------------------------------------===//
1902 //  Live IV Reduction - Minimize IVs live across the loop.
1903 //===----------------------------------------------------------------------===//
1904
1905 //===----------------------------------------------------------------------===//
1906 //  Simplification of IV users based on SCEV evaluation.
1907 //===----------------------------------------------------------------------===//
1908
1909 namespace {
1910
1911 class IndVarSimplifyVisitor : public IVVisitor {
1912   ScalarEvolution *SE;
1913   const TargetTransformInfo *TTI;
1914   PHINode *IVPhi;
1915
1916 public:
1917   WideIVInfo WI;
1918
1919   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1920                         const TargetTransformInfo *TTI,
1921                         const DominatorTree *DTree)
1922     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1923     DT = DTree;
1924     WI.NarrowIV = IVPhi;
1925   }
1926
1927   // Implement the interface used by simplifyUsersOfIV.
1928   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1929 };
1930
1931 } // end anonymous namespace
1932
1933 /// Iteratively perform simplification on a worklist of IV users. Each
1934 /// successive simplification may push more users which may themselves be
1935 /// candidates for simplification.
1936 ///
1937 /// Sign/Zero extend elimination is interleaved with IV simplification.
1938 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1939                                        SCEVExpander &Rewriter,
1940                                        LoopInfo *LI) {
1941   SmallVector<WideIVInfo, 8> WideIVs;
1942
1943   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1944           Intrinsic::getName(Intrinsic::experimental_guard));
1945   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1946
1947   SmallVector<PHINode*, 8> LoopPhis;
1948   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1949     LoopPhis.push_back(cast<PHINode>(I));
1950   }
1951   // Each round of simplification iterates through the SimplifyIVUsers worklist
1952   // for all current phis, then determines whether any IVs can be
1953   // widened. Widening adds new phis to LoopPhis, inducing another round of
1954   // simplification on the wide IVs.
1955   bool Changed = false;
1956   while (!LoopPhis.empty()) {
1957     // Evaluate as many IV expressions as possible before widening any IVs. This
1958     // forces SCEV to set no-wrap flags before evaluating sign/zero
1959     // extension. The first time SCEV attempts to normalize sign/zero extension,
1960     // the result becomes final. So for the most predictable results, we delay
1961     // evaluation of sign/zero extend evaluation until needed, and avoid running
1962     // other SCEV based analysis prior to simplifyAndExtend.
1963     do {
1964       PHINode *CurrIV = LoopPhis.pop_back_val();
1965
1966       // Information about sign/zero extensions of CurrIV.
1967       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1968
1969       Changed |=
1970           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1971
1972       if (Visitor.WI.WidestNativeType) {
1973         WideIVs.push_back(Visitor.WI);
1974       }
1975     } while(!LoopPhis.empty());
1976
1977     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1978       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1979       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1980         Changed = true;
1981         LoopPhis.push_back(WidePhi);
1982       }
1983     }
1984   }
1985   return Changed;
1986 }
1987
1988 //===----------------------------------------------------------------------===//
1989 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1990 //===----------------------------------------------------------------------===//
1991
1992 /// Given an Value which is hoped to be part of an add recurance in the given
1993 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1994 /// that this is less general than SCEVs AddRec checking.
1995 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1996   Instruction *IncI = dyn_cast<Instruction>(IncV);
1997   if (!IncI)
1998     return nullptr;
1999
2000   switch (IncI->getOpcode()) {
2001   case Instruction::Add:
2002   case Instruction::Sub:
2003     break;
2004   case Instruction::GetElementPtr:
2005     // An IV counter must preserve its type.
2006     if (IncI->getNumOperands() == 2)
2007       break;
2008     LLVM_FALLTHROUGH;
2009   default:
2010     return nullptr;
2011   }
2012
2013   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2014   if (Phi && Phi->getParent() == L->getHeader()) {
2015     if (L->isLoopInvariant(IncI->getOperand(1)))
2016       return Phi;
2017     return nullptr;
2018   }
2019   if (IncI->getOpcode() == Instruction::GetElementPtr)
2020     return nullptr;
2021
2022   // Allow add/sub to be commuted.
2023   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2024   if (Phi && Phi->getParent() == L->getHeader()) {
2025     if (L->isLoopInvariant(IncI->getOperand(0)))
2026       return Phi;
2027   }
2028   return nullptr;
2029 }
2030
2031 /// Whether the current loop exit test is based on this value.  Currently this
2032 /// is limited to a direct use in the loop condition.
2033 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2034   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2035   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2036   // TODO: Allow non-icmp loop test.
2037   if (!ICmp)
2038     return false;
2039
2040   // TODO: Allow indirect use.
2041   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2042 }
2043
2044 /// linearFunctionTestReplace policy. Return true unless we can show that the
2045 /// current exit test is already sufficiently canonical.
2046 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2047   assert(L->getLoopLatch() && "Must be in simplified form");
2048
2049   // Avoid converting a constant or loop invariant test back to a runtime
2050   // test.  This is critical for when SCEV's cached ExitCount is less precise
2051   // than the current IR (such as after we've proven a particular exit is
2052   // actually dead and thus the BE count never reaches our ExitCount.)
2053   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2054   if (L->isLoopInvariant(BI->getCondition()))
2055     return false;
2056
2057   // Do LFTR to simplify the exit condition to an ICMP.
2058   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2059   if (!Cond)
2060     return true;
2061
2062   // Do LFTR to simplify the exit ICMP to EQ/NE
2063   ICmpInst::Predicate Pred = Cond->getPredicate();
2064   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2065     return true;
2066
2067   // Look for a loop invariant RHS
2068   Value *LHS = Cond->getOperand(0);
2069   Value *RHS = Cond->getOperand(1);
2070   if (!L->isLoopInvariant(RHS)) {
2071     if (!L->isLoopInvariant(LHS))
2072       return true;
2073     std::swap(LHS, RHS);
2074   }
2075   // Look for a simple IV counter LHS
2076   PHINode *Phi = dyn_cast<PHINode>(LHS);
2077   if (!Phi)
2078     Phi = getLoopPhiForCounter(LHS, L);
2079
2080   if (!Phi)
2081     return true;
2082
2083   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2084   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2085   if (Idx < 0)
2086     return true;
2087
2088   // Do LFTR if the exit condition's IV is *not* a simple counter.
2089   Value *IncV = Phi->getIncomingValue(Idx);
2090   return Phi != getLoopPhiForCounter(IncV, L);
2091 }
2092
2093 /// Return true if undefined behavior would provable be executed on the path to
2094 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2095 /// anything about whether OnPathTo is actually executed or whether Root is
2096 /// actually poison.  This can be used to assess whether a new use of Root can
2097 /// be added at a location which is control equivalent with OnPathTo (such as
2098 /// immediately before it) without introducing UB which didn't previously
2099 /// exist.  Note that a false result conveys no information.
2100 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2101                                           Instruction *OnPathTo,
2102                                           DominatorTree *DT) {
2103   // Basic approach is to assume Root is poison, propagate poison forward
2104   // through all users we can easily track, and then check whether any of those
2105   // users are provable UB and must execute before out exiting block might
2106   // exit.
2107
2108   // The set of all recursive users we've visited (which are assumed to all be
2109   // poison because of said visit)
2110   SmallSet<const Value *, 16> KnownPoison;
2111   SmallVector<const Instruction*, 16> Worklist;
2112   Worklist.push_back(Root);
2113   while (!Worklist.empty()) {
2114     const Instruction *I = Worklist.pop_back_val();
2115
2116     // If we know this must trigger UB on a path leading our target.
2117     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2118       return true;
2119
2120     // If we can't analyze propagation through this instruction, just skip it
2121     // and transitive users.  Safe as false is a conservative result.
2122     if (!propagatesFullPoison(I) && I != Root)
2123       continue;
2124
2125     if (KnownPoison.insert(I).second)
2126       for (const User *User : I->users())
2127         Worklist.push_back(cast<Instruction>(User));
2128   }
2129
2130   // Might be non-UB, or might have a path we couldn't prove must execute on
2131   // way to exiting bb.
2132   return false;
2133 }
2134
2135 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2136 /// down to checking that all operands are constant and listing instructions
2137 /// that may hide undef.
2138 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2139                                unsigned Depth) {
2140   if (isa<Constant>(V))
2141     return !isa<UndefValue>(V);
2142
2143   if (Depth >= 6)
2144     return false;
2145
2146   // Conservatively handle non-constant non-instructions. For example, Arguments
2147   // may be undef.
2148   Instruction *I = dyn_cast<Instruction>(V);
2149   if (!I)
2150     return false;
2151
2152   // Load and return values may be undef.
2153   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2154     return false;
2155
2156   // Optimistically handle other instructions.
2157   for (Value *Op : I->operands()) {
2158     if (!Visited.insert(Op).second)
2159       continue;
2160     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2161       return false;
2162   }
2163   return true;
2164 }
2165
2166 /// Return true if the given value is concrete. We must prove that undef can
2167 /// never reach it.
2168 ///
2169 /// TODO: If we decide that this is a good approach to checking for undef, we
2170 /// may factor it into a common location.
2171 static bool hasConcreteDef(Value *V) {
2172   SmallPtrSet<Value*, 8> Visited;
2173   Visited.insert(V);
2174   return hasConcreteDefImpl(V, Visited, 0);
2175 }
2176
2177 /// Return true if this IV has any uses other than the (soon to be rewritten)
2178 /// loop exit test.
2179 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2180   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2181   Value *IncV = Phi->getIncomingValue(LatchIdx);
2182
2183   for (User *U : Phi->users())
2184     if (U != Cond && U != IncV) return false;
2185
2186   for (User *U : IncV->users())
2187     if (U != Cond && U != Phi) return false;
2188   return true;
2189 }
2190
2191 /// Return true if the given phi is a "counter" in L.  A counter is an
2192 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2193 /// step of 1.  Note that L must have exactly one latch.
2194 static bool isLoopCounter(PHINode* Phi, Loop *L,
2195                           ScalarEvolution *SE) {
2196   assert(Phi->getParent() == L->getHeader());
2197   assert(L->getLoopLatch());
2198
2199   if (!SE->isSCEVable(Phi->getType()))
2200     return false;
2201
2202   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2203   if (!AR || AR->getLoop() != L || !AR->isAffine())
2204     return false;
2205
2206   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2207   if (!Step || !Step->isOne())
2208     return false;
2209
2210   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2211   Value *IncV = Phi->getIncomingValue(LatchIdx);
2212   return (getLoopPhiForCounter(IncV, L) == Phi);
2213 }
2214
2215 /// Search the loop header for a loop counter (anadd rec w/step of one)
2216 /// suitable for use by LFTR.  If multiple counters are available, select the
2217 /// "best" one based profitable heuristics.
2218 ///
2219 /// BECount may be an i8* pointer type. The pointer difference is already
2220 /// valid count without scaling the address stride, so it remains a pointer
2221 /// expression as far as SCEV is concerned.
2222 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2223                                 const SCEV *BECount,
2224                                 ScalarEvolution *SE, DominatorTree *DT) {
2225   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2226
2227   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2228
2229   // Loop over all of the PHI nodes, looking for a simple counter.
2230   PHINode *BestPhi = nullptr;
2231   const SCEV *BestInit = nullptr;
2232   BasicBlock *LatchBlock = L->getLoopLatch();
2233   assert(LatchBlock && "Must be in simplified form");
2234   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2235
2236   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2237     PHINode *Phi = cast<PHINode>(I);
2238     if (!isLoopCounter(Phi, L, SE))
2239       continue;
2240
2241     // Avoid comparing an integer IV against a pointer Limit.
2242     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2243       continue;
2244
2245     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2246
2247     // AR may be a pointer type, while BECount is an integer type.
2248     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2249     // AR may not be a narrower type, or we may never exit.
2250     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2251     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2252       continue;
2253
2254     // Avoid reusing a potentially undef value to compute other values that may
2255     // have originally had a concrete definition.
2256     if (!hasConcreteDef(Phi)) {
2257       // We explicitly allow unknown phis as long as they are already used by
2258       // the loop exit test.  This is legal since performing LFTR could not
2259       // increase the number of undef users.
2260       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2261       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2262           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2263         continue;
2264     }
2265
2266     // Avoid introducing undefined behavior due to poison which didn't exist in
2267     // the original program.  (Annoyingly, the rules for poison and undef
2268     // propagation are distinct, so this does NOT cover the undef case above.)
2269     // We have to ensure that we don't introduce UB by introducing a use on an
2270     // iteration where said IV produces poison.  Our strategy here differs for
2271     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2272     // see code in linearFunctionTestReplace.  For pointers, we restrict
2273     // transforms as there is no good way to reinfer inbounds once lost.
2274     if (!Phi->getType()->isIntegerTy() &&
2275         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2276       continue;
2277
2278     const SCEV *Init = AR->getStart();
2279
2280     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2281       // Don't force a live loop counter if another IV can be used.
2282       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2283         continue;
2284
2285       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2286       // also prefers integer to pointer IVs.
2287       if (BestInit->isZero() != Init->isZero()) {
2288         if (BestInit->isZero())
2289           continue;
2290       }
2291       // If two IVs both count from zero or both count from nonzero then the
2292       // narrower is likely a dead phi that has been widened. Use the wider phi
2293       // to allow the other to be eliminated.
2294       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2295         continue;
2296     }
2297     BestPhi = Phi;
2298     BestInit = Init;
2299   }
2300   return BestPhi;
2301 }
2302
2303 /// Insert an IR expression which computes the value held by the IV IndVar
2304 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2305 /// is taken ExitCount times.
2306 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2307                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2308                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2309   assert(isLoopCounter(IndVar, L, SE));
2310   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2311   const SCEV *IVInit = AR->getStart();
2312
2313   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2314   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2315   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2316   // the existing GEPs whenever possible.
2317   if (IndVar->getType()->isPointerTy() &&
2318       !ExitCount->getType()->isPointerTy()) {
2319     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2320     // signed value. ExitCount on the other hand represents the loop trip count,
2321     // which is an unsigned value. FindLoopCounter only allows induction
2322     // variables that have a positive unit stride of one. This means we don't
2323     // have to handle the case of negative offsets (yet) and just need to zero
2324     // extend ExitCount.
2325     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2326     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2327     if (UsePostInc)
2328       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2329
2330     // Expand the code for the iteration count.
2331     assert(SE->isLoopInvariant(IVOffset, L) &&
2332            "Computed iteration count is not loop invariant!");
2333
2334     // We could handle pointer IVs other than i8*, but we need to compensate for
2335     // gep index scaling.
2336     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2337                              cast<PointerType>(IndVar->getType())
2338                                  ->getElementType())->isOne() &&
2339            "unit stride pointer IV must be i8*");
2340
2341     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2342     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2343     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2344   } else {
2345     // In any other case, convert both IVInit and ExitCount to integers before
2346     // comparing. This may result in SCEV expansion of pointers, but in practice
2347     // SCEV will fold the pointer arithmetic away as such:
2348     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2349     //
2350     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2351     // for simple memset-style loops.
2352     //
2353     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2354     // were generated on top of case #2, which is not expected.
2355
2356     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2357     // For unit stride, IVCount = Start + ExitCount with 2's complement
2358     // overflow.
2359
2360     // For integer IVs, truncate the IV before computing IVInit + BECount,
2361     // unless we know apriori that the limit must be a constant when evaluated
2362     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2363     // of the IV in the loop over a (potentially) expensive expansion of the
2364     // widened exit count add(zext(add)) expression.
2365     if (SE->getTypeSizeInBits(IVInit->getType())
2366         > SE->getTypeSizeInBits(ExitCount->getType())) {
2367       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2368         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2369       else
2370         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2371     }
2372
2373     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2374
2375     if (UsePostInc)
2376       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2377
2378     // Expand the code for the iteration count.
2379     assert(SE->isLoopInvariant(IVLimit, L) &&
2380            "Computed iteration count is not loop invariant!");
2381     // Ensure that we generate the same type as IndVar, or a smaller integer
2382     // type. In the presence of null pointer values, we have an integer type
2383     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2384     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2385       IndVar->getType() : ExitCount->getType();
2386     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2387     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2388   }
2389 }
2390
2391 /// This method rewrites the exit condition of the loop to be a canonical !=
2392 /// comparison against the incremented loop induction variable.  This pass is
2393 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2394 /// determine a loop-invariant trip count of the loop, which is actually a much
2395 /// broader range than just linear tests.
2396 bool IndVarSimplify::
2397 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2398                           const SCEV *ExitCount,
2399                           PHINode *IndVar, SCEVExpander &Rewriter) {
2400   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2401   assert(isLoopCounter(IndVar, L, SE));
2402   Instruction * const IncVar =
2403     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2404
2405   // Initialize CmpIndVar to the preincremented IV.
2406   Value *CmpIndVar = IndVar;
2407   bool UsePostInc = false;
2408
2409   // If the exiting block is the same as the backedge block, we prefer to
2410   // compare against the post-incremented value, otherwise we must compare
2411   // against the preincremented value.
2412   if (ExitingBB == L->getLoopLatch()) {
2413     // For pointer IVs, we chose to not strip inbounds which requires us not
2414     // to add a potentially UB introducing use.  We need to either a) show
2415     // the loop test we're modifying is already in post-inc form, or b) show
2416     // that adding a use must not introduce UB.
2417     bool SafeToPostInc =
2418         IndVar->getType()->isIntegerTy() ||
2419         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2420         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2421     if (SafeToPostInc) {
2422       UsePostInc = true;
2423       CmpIndVar = IncVar;
2424     }
2425   }
2426
2427   // It may be necessary to drop nowrap flags on the incrementing instruction
2428   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2429   // case the increment might have previously been poison on the last iteration
2430   // only) or if LFTR switches to a different IV that was previously dynamically
2431   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2432   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2433   // check), because the pre-inc addrec flags may be adopted from the original
2434   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2435   // TODO: This handling is inaccurate for one case: If we switch to a
2436   // dynamically dead IV that wraps on the first loop iteration only, which is
2437   // not covered by the post-inc addrec. (If the new IV was not dynamically
2438   // dead, it could not be poison on the first iteration in the first place.)
2439   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2440     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2441     if (BO->hasNoUnsignedWrap())
2442       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2443     if (BO->hasNoSignedWrap())
2444       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2445   }
2446
2447   Value *ExitCnt = genLoopLimit(
2448       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2449   assert(ExitCnt->getType()->isPointerTy() ==
2450              IndVar->getType()->isPointerTy() &&
2451          "genLoopLimit missed a cast");
2452
2453   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2454   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2455   ICmpInst::Predicate P;
2456   if (L->contains(BI->getSuccessor(0)))
2457     P = ICmpInst::ICMP_NE;
2458   else
2459     P = ICmpInst::ICMP_EQ;
2460
2461   IRBuilder<> Builder(BI);
2462
2463   // The new loop exit condition should reuse the debug location of the
2464   // original loop exit condition.
2465   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2466     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2467
2468   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2469   // avoid the expensive expansion of the limit expression in the wider type,
2470   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2471   // since we know (from the exit count bitwidth), that we can't self-wrap in
2472   // the narrower type.
2473   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2474   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2475   if (CmpIndVarSize > ExitCntSize) {
2476     assert(!CmpIndVar->getType()->isPointerTy() &&
2477            !ExitCnt->getType()->isPointerTy());
2478
2479     // Before resorting to actually inserting the truncate, use the same
2480     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2481     // the other side of the comparison instead.  We still evaluate the limit
2482     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2483     // a truncate within in.
2484     bool Extended = false;
2485     const SCEV *IV = SE->getSCEV(CmpIndVar);
2486     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2487                                                   ExitCnt->getType());
2488     const SCEV *ZExtTrunc =
2489       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2490
2491     if (ZExtTrunc == IV) {
2492       Extended = true;
2493       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2494                                    "wide.trip.count");
2495     } else {
2496       const SCEV *SExtTrunc =
2497         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2498       if (SExtTrunc == IV) {
2499         Extended = true;
2500         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2501                                      "wide.trip.count");
2502       }
2503     }
2504
2505     if (Extended) {
2506       bool Discard;
2507       L->makeLoopInvariant(ExitCnt, Discard);
2508     } else
2509       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2510                                       "lftr.wideiv");
2511   }
2512   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2513                     << "      LHS:" << *CmpIndVar << '\n'
2514                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2515                     << "\n"
2516                     << "      RHS:\t" << *ExitCnt << "\n"
2517                     << "ExitCount:\t" << *ExitCount << "\n"
2518                     << "  was: " << *BI->getCondition() << "\n");
2519
2520   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2521   Value *OrigCond = BI->getCondition();
2522   // It's tempting to use replaceAllUsesWith here to fully replace the old
2523   // comparison, but that's not immediately safe, since users of the old
2524   // comparison may not be dominated by the new comparison. Instead, just
2525   // update the branch to use the new comparison; in the common case this
2526   // will make old comparison dead.
2527   BI->setCondition(Cond);
2528   DeadInsts.push_back(OrigCond);
2529
2530   ++NumLFTR;
2531   return true;
2532 }
2533
2534 //===----------------------------------------------------------------------===//
2535 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2536 //===----------------------------------------------------------------------===//
2537
2538 /// If there's a single exit block, sink any loop-invariant values that
2539 /// were defined in the preheader but not used inside the loop into the
2540 /// exit block to reduce register pressure in the loop.
2541 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2542   BasicBlock *ExitBlock = L->getExitBlock();
2543   if (!ExitBlock) return false;
2544
2545   BasicBlock *Preheader = L->getLoopPreheader();
2546   if (!Preheader) return false;
2547
2548   bool MadeAnyChanges = false;
2549   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2550   BasicBlock::iterator I(Preheader->getTerminator());
2551   while (I != Preheader->begin()) {
2552     --I;
2553     // New instructions were inserted at the end of the preheader.
2554     if (isa<PHINode>(I))
2555       break;
2556
2557     // Don't move instructions which might have side effects, since the side
2558     // effects need to complete before instructions inside the loop.  Also don't
2559     // move instructions which might read memory, since the loop may modify
2560     // memory. Note that it's okay if the instruction might have undefined
2561     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2562     // block.
2563     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2564       continue;
2565
2566     // Skip debug info intrinsics.
2567     if (isa<DbgInfoIntrinsic>(I))
2568       continue;
2569
2570     // Skip eh pad instructions.
2571     if (I->isEHPad())
2572       continue;
2573
2574     // Don't sink alloca: we never want to sink static alloca's out of the
2575     // entry block, and correctly sinking dynamic alloca's requires
2576     // checks for stacksave/stackrestore intrinsics.
2577     // FIXME: Refactor this check somehow?
2578     if (isa<AllocaInst>(I))
2579       continue;
2580
2581     // Determine if there is a use in or before the loop (direct or
2582     // otherwise).
2583     bool UsedInLoop = false;
2584     for (Use &U : I->uses()) {
2585       Instruction *User = cast<Instruction>(U.getUser());
2586       BasicBlock *UseBB = User->getParent();
2587       if (PHINode *P = dyn_cast<PHINode>(User)) {
2588         unsigned i =
2589           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2590         UseBB = P->getIncomingBlock(i);
2591       }
2592       if (UseBB == Preheader || L->contains(UseBB)) {
2593         UsedInLoop = true;
2594         break;
2595       }
2596     }
2597
2598     // If there is, the def must remain in the preheader.
2599     if (UsedInLoop)
2600       continue;
2601
2602     // Otherwise, sink it to the exit block.
2603     Instruction *ToMove = &*I;
2604     bool Done = false;
2605
2606     if (I != Preheader->begin()) {
2607       // Skip debug info intrinsics.
2608       do {
2609         --I;
2610       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2611
2612       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2613         Done = true;
2614     } else {
2615       Done = true;
2616     }
2617
2618     MadeAnyChanges = true;
2619     ToMove->moveBefore(*ExitBlock, InsertPt);
2620     if (Done) break;
2621     InsertPt = ToMove->getIterator();
2622   }
2623
2624   return MadeAnyChanges;
2625 }
2626
2627 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2628   SmallVector<BasicBlock*, 16> ExitingBlocks;
2629   L->getExitingBlocks(ExitingBlocks);
2630
2631   // Form an expression for the maximum exit count possible for this loop. We
2632   // merge the max and exact information to approximate a version of
2633   // getMaxBackedgeTakenInfo which isn't restricted to just constants.
2634   // TODO: factor this out as a version of getMaxBackedgeTakenCount which
2635   // isn't guaranteed to return a constant.
2636   SmallVector<const SCEV*, 4> ExitCounts;
2637   const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
2638   if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2639     ExitCounts.push_back(MaxConstEC);
2640   for (BasicBlock *ExitingBB : ExitingBlocks) {
2641     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2642     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2643       assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2644              "We should only have known counts for exiting blocks that "
2645              "dominate latch!");
2646       ExitCounts.push_back(ExitCount);
2647     }
2648   }
2649   if (ExitCounts.empty())
2650     return false;
2651   const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2652
2653   bool Changed = false;
2654   for (BasicBlock *ExitingBB : ExitingBlocks) {
2655     // If our exitting block exits multiple loops, we can only rewrite the
2656     // innermost one.  Otherwise, we're changing how many times the innermost
2657     // loop runs before it exits.
2658     if (LI->getLoopFor(ExitingBB) != L)
2659       continue;
2660
2661     // Can't rewrite non-branch yet.
2662     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2663     if (!BI)
2664       continue;
2665
2666     // If already constant, nothing to do.
2667     if (isa<Constant>(BI->getCondition()))
2668       continue;
2669
2670     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2671     if (isa<SCEVCouldNotCompute>(ExitCount))
2672       continue;
2673
2674     // If we know we'd exit on the first iteration, rewrite the exit to
2675     // reflect this.  This does not imply the loop must exit through this
2676     // exit; there may be an earlier one taken on the first iteration.
2677     // TODO: Given we know the backedge can't be taken, we should go ahead
2678     // and break it.  Or at least, kill all the header phis and simplify.
2679     if (ExitCount->isZero()) {
2680       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2681       auto *OldCond = BI->getCondition();
2682       auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2683         ConstantInt::getFalse(OldCond->getType());
2684       BI->setCondition(NewCond);
2685       if (OldCond->use_empty())
2686         DeadInsts.push_back(OldCond);
2687       Changed = true;
2688       continue;
2689     }
2690
2691     // If we end up with a pointer exit count, bail.
2692     if (!ExitCount->getType()->isIntegerTy() ||
2693         !MaxExitCount->getType()->isIntegerTy())
2694       return false;
2695
2696     Type *WiderType =
2697       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2698     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2699     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2700     assert(MaxExitCount->getType() == ExitCount->getType());
2701
2702     // Can we prove that some other exit must be taken strictly before this
2703     // one?  TODO: handle cases where ule is known, and equality is covered
2704     // by a dominating exit
2705     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2706                                      MaxExitCount, ExitCount)) {
2707       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2708       auto *OldCond = BI->getCondition();
2709       auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2710         ConstantInt::getTrue(OldCond->getType());
2711       BI->setCondition(NewCond);
2712       if (OldCond->use_empty())
2713         DeadInsts.push_back(OldCond);
2714       Changed = true;
2715       continue;
2716     }
2717
2718     // TODO: If we can prove that the exiting iteration is equal to the exit
2719     // count for this exit and that no previous exit oppurtunities exist within
2720     // the loop, then we can discharge all other exits.  (May fall out of
2721     // previous TODO.)
2722
2723     // TODO: If we can't prove any relation between our exit count and the
2724     // loops exit count, but taking this exit doesn't require actually running
2725     // the loop (i.e. no side effects, no computed values used in exit), then
2726     // we can replace the exit test with a loop invariant test which exits on
2727     // the first iteration.
2728   }
2729   return Changed;
2730 }
2731
2732 //===----------------------------------------------------------------------===//
2733 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2734 //===----------------------------------------------------------------------===//
2735
2736 bool IndVarSimplify::run(Loop *L) {
2737   // We need (and expect!) the incoming loop to be in LCSSA.
2738   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2739          "LCSSA required to run indvars!");
2740   bool Changed = false;
2741
2742   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2743   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2744   //    canonicalization can be a pessimization without LSR to "clean up"
2745   //    afterwards.
2746   //  - We depend on having a preheader; in particular,
2747   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2748   //    and we're in trouble if we can't find the induction variable even when
2749   //    we've manually inserted one.
2750   //  - LFTR relies on having a single backedge.
2751   if (!L->isLoopSimplifyForm())
2752     return false;
2753
2754   // If there are any floating-point recurrences, attempt to
2755   // transform them to use integer recurrences.
2756   Changed |= rewriteNonIntegerIVs(L);
2757
2758   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2759
2760   // Create a rewriter object which we'll use to transform the code with.
2761   SCEVExpander Rewriter(*SE, DL, "indvars");
2762 #ifndef NDEBUG
2763   Rewriter.setDebugType(DEBUG_TYPE);
2764 #endif
2765
2766   // Eliminate redundant IV users.
2767   //
2768   // Simplification works best when run before other consumers of SCEV. We
2769   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2770   // other expressions involving loop IVs have been evaluated. This helps SCEV
2771   // set no-wrap flags before normalizing sign/zero extension.
2772   Rewriter.disableCanonicalMode();
2773   Changed |= simplifyAndExtend(L, Rewriter, LI);
2774
2775   // Check to see if this loop has a computable loop-invariant execution count.
2776   // If so, this means that we can compute the final value of any expressions
2777   // that are recurrent in the loop, and substitute the exit values from the
2778   // loop into any instructions outside of the loop that use the final values of
2779   // the current expressions.
2780   //
2781   if (ReplaceExitValue != NeverRepl &&
2782       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2783     Changed |= rewriteLoopExitValues(L, Rewriter);
2784
2785   // Eliminate redundant IV cycles.
2786   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2787
2788   Changed |= optimizeLoopExits(L);
2789
2790   // If we have a trip count expression, rewrite the loop's exit condition
2791   // using it.
2792   if (!DisableLFTR) {
2793     SmallVector<BasicBlock*, 16> ExitingBlocks;
2794     L->getExitingBlocks(ExitingBlocks);
2795     for (BasicBlock *ExitingBB : ExitingBlocks) {
2796       // Can't rewrite non-branch yet.
2797       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2798         continue;
2799
2800       // If our exitting block exits multiple loops, we can only rewrite the
2801       // innermost one.  Otherwise, we're changing how many times the innermost
2802       // loop runs before it exits.
2803       if (LI->getLoopFor(ExitingBB) != L)
2804         continue;
2805
2806       if (!needsLFTR(L, ExitingBB))
2807         continue;
2808
2809       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2810       if (isa<SCEVCouldNotCompute>(ExitCount))
2811         continue;
2812
2813       // This was handled above, but as we form SCEVs, we can sometimes refine
2814       // existing ones; this allows exit counts to be folded to zero which
2815       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2816       // until stable to handle cases like this better.
2817       if (ExitCount->isZero())
2818         continue;
2819
2820       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2821       if (!IndVar)
2822         continue;
2823
2824       // Avoid high cost expansions.  Note: This heuristic is questionable in
2825       // that our definition of "high cost" is not exactly principled.
2826       if (Rewriter.isHighCostExpansion(ExitCount, L))
2827         continue;
2828
2829       // Check preconditions for proper SCEVExpander operation. SCEV does not
2830       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2831       // any pass that uses the SCEVExpander must do it. This does not work
2832       // well for loop passes because SCEVExpander makes assumptions about
2833       // all loops, while LoopPassManager only forces the current loop to be
2834       // simplified.
2835       //
2836       // FIXME: SCEV expansion has no way to bail out, so the caller must
2837       // explicitly check any assumptions made by SCEV. Brittle.
2838       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2839       if (!AR || AR->getLoop()->getLoopPreheader())
2840         Changed |= linearFunctionTestReplace(L, ExitingBB,
2841                                              ExitCount, IndVar,
2842                                              Rewriter);
2843     }
2844   }
2845   // Clear the rewriter cache, because values that are in the rewriter's cache
2846   // can be deleted in the loop below, causing the AssertingVH in the cache to
2847   // trigger.
2848   Rewriter.clear();
2849
2850   // Now that we're done iterating through lists, clean up any instructions
2851   // which are now dead.
2852   while (!DeadInsts.empty())
2853     if (Instruction *Inst =
2854             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2855       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2856
2857   // The Rewriter may not be used from this point on.
2858
2859   // Loop-invariant instructions in the preheader that aren't used in the
2860   // loop may be sunk below the loop to reduce register pressure.
2861   Changed |= sinkUnusedInvariants(L);
2862
2863   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2864   // trip count and therefore can further simplify exit values in addition to
2865   // rewriteLoopExitValues.
2866   Changed |= rewriteFirstIterationLoopExitValues(L);
2867
2868   // Clean up dead instructions.
2869   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2870
2871   // Check a post-condition.
2872   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2873          "Indvars did not preserve LCSSA!");
2874
2875   // Verify that LFTR, and any other change have not interfered with SCEV's
2876   // ability to compute trip count.
2877 #ifndef NDEBUG
2878   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2879     SE->forgetLoop(L);
2880     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2881     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2882         SE->getTypeSizeInBits(NewBECount->getType()))
2883       NewBECount = SE->getTruncateOrNoop(NewBECount,
2884                                          BackedgeTakenCount->getType());
2885     else
2886       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2887                                                  NewBECount->getType());
2888     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2889   }
2890 #endif
2891
2892   return Changed;
2893 }
2894
2895 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2896                                           LoopStandardAnalysisResults &AR,
2897                                           LPMUpdater &) {
2898   Function *F = L.getHeader()->getParent();
2899   const DataLayout &DL = F->getParent()->getDataLayout();
2900
2901   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2902   if (!IVS.run(&L))
2903     return PreservedAnalyses::all();
2904
2905   auto PA = getLoopPassPreservedAnalyses();
2906   PA.preserveSet<CFGAnalyses>();
2907   return PA;
2908 }
2909
2910 namespace {
2911
2912 struct IndVarSimplifyLegacyPass : public LoopPass {
2913   static char ID; // Pass identification, replacement for typeid
2914
2915   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2916     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2917   }
2918
2919   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2920     if (skipLoop(L))
2921       return false;
2922
2923     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2924     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2925     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2926     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2927     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2928     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2929     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2930     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2931
2932     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2933     return IVS.run(L);
2934   }
2935
2936   void getAnalysisUsage(AnalysisUsage &AU) const override {
2937     AU.setPreservesCFG();
2938     getLoopAnalysisUsage(AU);
2939   }
2940 };
2941
2942 } // end anonymous namespace
2943
2944 char IndVarSimplifyLegacyPass::ID = 0;
2945
2946 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2947                       "Induction Variable Simplification", false, false)
2948 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2949 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2950                     "Induction Variable Simplification", false, false)
2951
2952 Pass *llvm::createIndVarSimplifyPass() {
2953   return new IndVarSimplifyLegacyPass();
2954 }