contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

   1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This transformation analyzes and transforms the induction variables (and
  10 // computations derived from them) into simpler forms suitable for subsequent
  11 // analysis and transformation.
  12 //
  13 // If the trip count of a loop is computable, this pass also makes the following
  14 // changes:
  15 //   1. The exit condition for the loop is canonicalized to compare the
  16 //      induction value against the exit value.  This turns loops like:
  17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
  18 //   2. Any use outside of the loop of an expression derived from the indvar
  19 //      is changed to compute the derived value outside of the loop, eliminating
  20 //      the dependence on the exit value of the induction variable.  If the only
  21 //      purpose of the loop is to compute the exit value of some derived
  22 //      expression, this transformation will make the loop dead.
  23 //
  24 //===----------------------------------------------------------------------===//
  25
  26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
  27 #include "llvm/ADT/APFloat.h"
  28 #include "llvm/ADT/APInt.h"
  29 #include "llvm/ADT/ArrayRef.h"
  30 #include "llvm/ADT/DenseMap.h"
  31 #include "llvm/ADT/None.h"
  32 #include "llvm/ADT/Optional.h"
  33 #include "llvm/ADT/STLExtras.h"
  34 #include "llvm/ADT/SmallPtrSet.h"
  35 #include "llvm/ADT/SmallSet.h"
  36 #include "llvm/ADT/SmallVector.h"
  37 #include "llvm/ADT/Statistic.h"
  38 #include "llvm/ADT/iterator_range.h"
  39 #include "llvm/Analysis/LoopInfo.h"
  40 #include "llvm/Analysis/LoopPass.h"
  41 #include "llvm/Analysis/MemorySSA.h"
  42 #include "llvm/Analysis/MemorySSAUpdater.h"
  43 #include "llvm/Analysis/ScalarEvolution.h"
  44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  45 #include "llvm/Analysis/TargetLibraryInfo.h"
  46 #include "llvm/Analysis/TargetTransformInfo.h"
  47 #include "llvm/Analysis/ValueTracking.h"
  48 #include "llvm/IR/BasicBlock.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/ConstantRange.h"
  51 #include "llvm/IR/Constants.h"
  52 #include "llvm/IR/DataLayout.h"
  53 #include "llvm/IR/DerivedTypes.h"
  54 #include "llvm/IR/Dominators.h"
  55 #include "llvm/IR/Function.h"
  56 #include "llvm/IR/IRBuilder.h"
  57 #include "llvm/IR/InstrTypes.h"
  58 #include "llvm/IR/Instruction.h"
  59 #include "llvm/IR/Instructions.h"
  60 #include "llvm/IR/IntrinsicInst.h"
  61 #include "llvm/IR/Intrinsics.h"
  62 #include "llvm/IR/Module.h"
  63 #include "llvm/IR/Operator.h"
  64 #include "llvm/IR/PassManager.h"
  65 #include "llvm/IR/PatternMatch.h"
  66 #include "llvm/IR/Type.h"
  67 #include "llvm/IR/Use.h"
  68 #include "llvm/IR/User.h"
  69 #include "llvm/IR/Value.h"
  70 #include "llvm/IR/ValueHandle.h"
  71 #include "llvm/InitializePasses.h"
  72 #include "llvm/Pass.h"
  73 #include "llvm/Support/Casting.h"
  74 #include "llvm/Support/CommandLine.h"
  75 #include "llvm/Support/Compiler.h"
  76 #include "llvm/Support/Debug.h"
  77 #include "llvm/Support/ErrorHandling.h"
  78 #include "llvm/Support/MathExtras.h"
  79 #include "llvm/Support/raw_ostream.h"
  80 #include "llvm/Transforms/Scalar.h"
  81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
  82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  83 #include "llvm/Transforms/Utils/Local.h"
  84 #include "llvm/Transforms/Utils/LoopUtils.h"
  85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
  86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
  87 #include <cassert>
  88 #include <cstdint>
  89 #include <utility>
  90
  91 using namespace llvm;
  92
  93 #define DEBUG_TYPE "indvars"
  94
  95 STATISTIC(NumWidened     , "Number of indvars widened");
  96 STATISTIC(NumReplaced    , "Number of exit values replaced");
  97 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
  98 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
  99 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
 100
 101 // Trip count verification can be enabled by default under NDEBUG if we
 102 // implement a strong expression equivalence checker in SCEV. Until then, we
 103 // use the verify-indvars flag, which may assert in some cases.
 104 static cl::opt<bool> VerifyIndvars(
 105     "verify-indvars", cl::Hidden,
 106     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
 107              "effect in release builds. (Note: this adds additional SCEV "
 108              "queries potentially changing the analysis result)"));
 109
 110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
 111     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
 112     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
 113     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
 114                clEnumValN(OnlyCheapRepl, "cheap",
 115                           "only replace exit value when the cost is cheap"),
 116                clEnumValN(NoHardUse, "noharduse",
 117                           "only replace exit values when loop def likely dead"),
 118                clEnumValN(AlwaysRepl, "always",
 119                           "always replace exit value whenever possible")));
 120
 121 static cl::opt<bool> UsePostIncrementRanges(
 122   "indvars-post-increment-ranges", cl::Hidden,
 123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
 124   cl::init(true));
 125
 126 static cl::opt<bool>
 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
 128             cl::desc("Disable Linear Function Test Replace optimization"));
 129
 130 static cl::opt<bool>
 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
 132                 cl::desc("Predicate conditions in read only loops"));
 133
 134 namespace {
 135
 136 struct RewritePhi;
 137
 138 class IndVarSimplify {
 139   LoopInfo *LI;
 140   ScalarEvolution *SE;
 141   DominatorTree *DT;
 142   const DataLayout &DL;
 143   TargetLibraryInfo *TLI;
 144   const TargetTransformInfo *TTI;
 145   std::unique_ptr<MemorySSAUpdater> MSSAU;
 146
 147   SmallVector<WeakTrackingVH, 16> DeadInsts;
 148
 149   bool handleFloatingPointIV(Loop *L, PHINode *PH);
 150   bool rewriteNonIntegerIVs(Loop *L);
 151
 152   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
 153   /// Try to eliminate loop exits based on analyzeable exit counts
 154   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
 155   /// Try to form loop invariant tests for loop exits by changing how many
 156   /// iterations of the loop run when that is unobservable.
 157   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
 158
 159   bool rewriteFirstIterationLoopExitValues(Loop *L);
 160
 161   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
 162                                  const SCEV *ExitCount,
 163                                  PHINode *IndVar, SCEVExpander &Rewriter);
 164
 165   bool sinkUnusedInvariants(Loop *L);
 166
 167 public:
 168   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
 169                  const DataLayout &DL, TargetLibraryInfo *TLI,
 170                  TargetTransformInfo *TTI, MemorySSA *MSSA)
 171       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
 172     if (MSSA)
 173       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
 174   }
 175
 176   bool run(Loop *L);
 177 };
 178
 179 } // end anonymous namespace
 180
 181 /// Determine the insertion point for this user. By default, insert immediately
 182 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
 183 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
 184 /// common dominator for the incoming blocks. A nullptr can be returned if no
 185 /// viable location is found: it may happen if User is a PHI and Def only comes
 186 /// to this PHI from unreachable blocks.
 187 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
 188                                           DominatorTree *DT, LoopInfo *LI) {
 189   PHINode *PHI = dyn_cast<PHINode>(User);
 190   if (!PHI)
 191     return User;
 192
 193   Instruction *InsertPt = nullptr;
 194   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
 195     if (PHI->getIncomingValue(i) != Def)
 196       continue;
 197
 198     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
 199
 200     if (!DT->isReachableFromEntry(InsertBB))
 201       continue;
 202
 203     if (!InsertPt) {
 204       InsertPt = InsertBB->getTerminator();
 205       continue;
 206     }
 207     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
 208     InsertPt = InsertBB->getTerminator();
 209   }
 210
 211   // If we have skipped all inputs, it means that Def only comes to Phi from
 212   // unreachable blocks.
 213   if (!InsertPt)
 214     return nullptr;
 215
 216   auto *DefI = dyn_cast<Instruction>(Def);
 217   if (!DefI)
 218     return InsertPt;
 219
 220   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
 221
 222   auto *L = LI->getLoopFor(DefI->getParent());
 223   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
 224
 225   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
 226     if (LI->getLoopFor(DTN->getBlock()) == L)
 227       return DTN->getBlock()->getTerminator();
 228
 229   llvm_unreachable("DefI dominates InsertPt!");
 230 }
 231
 232 //===----------------------------------------------------------------------===//
 233 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
 234 //===----------------------------------------------------------------------===//
 235
 236 /// Convert APF to an integer, if possible.
 237 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
 238   bool isExact = false;
 239   // See if we can convert this to an int64_t
 240   uint64_t UIntVal;
 241   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
 242                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
 243       !isExact)
 244     return false;
 245   IntVal = UIntVal;
 246   return true;
 247 }
 248
 249 /// If the loop has floating induction variable then insert corresponding
 250 /// integer induction variable if possible.
 251 /// For example,
 252 /// for(double i = 0; i < 10000; ++i)
 253 ///   bar(i)
 254 /// is converted into
 255 /// for(int i = 0; i < 10000; ++i)
 256 ///   bar((double)i);
 257 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
 258   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
 259   unsigned BackEdge     = IncomingEdge^1;
 260
 261   // Check incoming value.
 262   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 263
 264   int64_t InitValue;
 265   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
 266     return false;
 267
 268   // Check IV increment. Reject this PN if increment operation is not
 269   // an add or increment value can not be represented by an integer.
 270   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
 271   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
 272
 273   // If this is not an add of the PHI with a constantfp, or if the constant fp
 274   // is not an integer, bail out.
 275   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
 276   int64_t IncValue;
 277   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
 278       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
 279     return false;
 280
 281   // Check Incr uses. One user is PN and the other user is an exit condition
 282   // used by the conditional terminator.
 283   Value::user_iterator IncrUse = Incr->user_begin();
 284   Instruction *U1 = cast<Instruction>(*IncrUse++);
 285   if (IncrUse == Incr->user_end()) return false;
 286   Instruction *U2 = cast<Instruction>(*IncrUse++);
 287   if (IncrUse != Incr->user_end()) return false;
 288
 289   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
 290   // only used by a branch, we can't transform it.
 291   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
 292   if (!Compare)
 293     Compare = dyn_cast<FCmpInst>(U2);
 294   if (!Compare || !Compare->hasOneUse() ||
 295       !isa<BranchInst>(Compare->user_back()))
 296     return false;
 297
 298   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
 299
 300   // We need to verify that the branch actually controls the iteration count
 301   // of the loop.  If not, the new IV can overflow and no one will notice.
 302   // The branch block must be in the loop and one of the successors must be out
 303   // of the loop.
 304   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
 305   if (!L->contains(TheBr->getParent()) ||
 306       (L->contains(TheBr->getSuccessor(0)) &&
 307        L->contains(TheBr->getSuccessor(1))))
 308     return false;
 309
 310   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
 311   // transform it.
 312   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
 313   int64_t ExitValue;
 314   if (ExitValueVal == nullptr ||
 315       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
 316     return false;
 317
 318   // Find new predicate for integer comparison.
 319   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
 320   switch (Compare->getPredicate()) {
 321   default: return false;  // Unknown comparison.
 322   case CmpInst::FCMP_OEQ:
 323   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
 324   case CmpInst::FCMP_ONE:
 325   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
 326   case CmpInst::FCMP_OGT:
 327   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
 328   case CmpInst::FCMP_OGE:
 329   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
 330   case CmpInst::FCMP_OLT:
 331   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
 332   case CmpInst::FCMP_OLE:
 333   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
 334   }
 335
 336   // We convert the floating point induction variable to a signed i32 value if
 337   // we can.  This is only safe if the comparison will not overflow in a way
 338   // that won't be trapped by the integer equivalent operations.  Check for this
 339   // now.
 340   // TODO: We could use i64 if it is native and the range requires it.
 341
 342   // The start/stride/exit values must all fit in signed i32.
 343   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
 344     return false;
 345
 346   // If not actually striding (add x, 0.0), avoid touching the code.
 347   if (IncValue == 0)
 348     return false;
 349
 350   // Positive and negative strides have different safety conditions.
 351   if (IncValue > 0) {
 352     // If we have a positive stride, we require the init to be less than the
 353     // exit value.
 354     if (InitValue >= ExitValue)
 355       return false;
 356
 357     uint32_t Range = uint32_t(ExitValue-InitValue);
 358     // Check for infinite loop, either:
 359     // while (i <= Exit) or until (i > Exit)
 360     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
 361       if (++Range == 0) return false;  // Range overflows.
 362     }
 363
 364     unsigned Leftover = Range % uint32_t(IncValue);
 365
 366     // If this is an equality comparison, we require that the strided value
 367     // exactly land on the exit value, otherwise the IV condition will wrap
 368     // around and do things the fp IV wouldn't.
 369     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 370         Leftover != 0)
 371       return false;
 372
 373     // If the stride would wrap around the i32 before exiting, we can't
 374     // transform the IV.
 375     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
 376       return false;
 377   } else {
 378     // If we have a negative stride, we require the init to be greater than the
 379     // exit value.
 380     if (InitValue <= ExitValue)
 381       return false;
 382
 383     uint32_t Range = uint32_t(InitValue-ExitValue);
 384     // Check for infinite loop, either:
 385     // while (i >= Exit) or until (i < Exit)
 386     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
 387       if (++Range == 0) return false;  // Range overflows.
 388     }
 389
 390     unsigned Leftover = Range % uint32_t(-IncValue);
 391
 392     // If this is an equality comparison, we require that the strided value
 393     // exactly land on the exit value, otherwise the IV condition will wrap
 394     // around and do things the fp IV wouldn't.
 395     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
 396         Leftover != 0)
 397       return false;
 398
 399     // If the stride would wrap around the i32 before exiting, we can't
 400     // transform the IV.
 401     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
 402       return false;
 403   }
 404
 405   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 406
 407   // Insert new integer induction variable.
 408   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
 409   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
 410                       PN->getIncomingBlock(IncomingEdge));
 411
 412   Value *NewAdd =
 413     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
 414                               Incr->getName()+".int", Incr);
 415   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 416
 417   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
 418                                       ConstantInt::get(Int32Ty, ExitValue),
 419                                       Compare->getName());
 420
 421   // In the following deletions, PN may become dead and may be deleted.
 422   // Use a WeakTrackingVH to observe whether this happens.
 423   WeakTrackingVH WeakPH = PN;
 424
 425   // Delete the old floating point exit comparison.  The branch starts using the
 426   // new comparison.
 427   NewCompare->takeName(Compare);
 428   Compare->replaceAllUsesWith(NewCompare);
 429   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
 430
 431   // Delete the old floating point increment.
 432   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
 433   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
 434
 435   // If the FP induction variable still has uses, this is because something else
 436   // in the loop uses its value.  In order to canonicalize the induction
 437   // variable, we chose to eliminate the IV and rewrite it in terms of an
 438   // int->fp cast.
 439   //
 440   // We give preference to sitofp over uitofp because it is faster on most
 441   // platforms.
 442   if (WeakPH) {
 443     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
 444                                  &*PN->getParent()->getFirstInsertionPt());
 445     PN->replaceAllUsesWith(Conv);
 446     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
 447   }
 448   return true;
 449 }
 450
 451 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
 452   // First step.  Check to see if there are any floating-point recurrences.
 453   // If there are, change them into integer recurrences, permitting analysis by
 454   // the SCEV routines.
 455   BasicBlock *Header = L->getHeader();
 456
 457   SmallVector<WeakTrackingVH, 8> PHIs;
 458   for (PHINode &PN : Header->phis())
 459     PHIs.push_back(&PN);
 460
 461   bool Changed = false;
 462   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
 463     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
 464       Changed |= handleFloatingPointIV(L, PN);
 465
 466   // If the loop previously had floating-point IV, ScalarEvolution
 467   // may not have been able to compute a trip count. Now that we've done some
 468   // re-writing, the trip count may be computable.
 469   if (Changed)
 470     SE->forgetLoop(L);
 471   return Changed;
 472 }
 473
 474 //===---------------------------------------------------------------------===//
 475 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
 476 // they will exit at the first iteration.
 477 //===---------------------------------------------------------------------===//
 478
 479 /// Check to see if this loop has loop invariant conditions which lead to loop
 480 /// exits. If so, we know that if the exit path is taken, it is at the first
 481 /// loop iteration. This lets us predict exit values of PHI nodes that live in
 482 /// loop header.
 483 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
 484   // Verify the input to the pass is already in LCSSA form.
 485   assert(L->isLCSSAForm(*DT));
 486
 487   SmallVector<BasicBlock *, 8> ExitBlocks;
 488   L->getUniqueExitBlocks(ExitBlocks);
 489
 490   bool MadeAnyChanges = false;
 491   for (auto *ExitBB : ExitBlocks) {
 492     // If there are no more PHI nodes in this exit block, then no more
 493     // values defined inside the loop are used on this path.
 494     for (PHINode &PN : ExitBB->phis()) {
 495       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
 496            IncomingValIdx != E; ++IncomingValIdx) {
 497         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
 498
 499         // Can we prove that the exit must run on the first iteration if it
 500         // runs at all?  (i.e. early exits are fine for our purposes, but
 501         // traces which lead to this exit being taken on the 2nd iteration
 502         // aren't.)  Note that this is about whether the exit branch is
 503         // executed, not about whether it is taken.
 504         if (!L->getLoopLatch() ||
 505             !DT->dominates(IncomingBB, L->getLoopLatch()))
 506           continue;
 507
 508         // Get condition that leads to the exit path.
 509         auto *TermInst = IncomingBB->getTerminator();
 510
 511         Value *Cond = nullptr;
 512         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
 513           // Must be a conditional branch, otherwise the block
 514           // should not be in the loop.
 515           Cond = BI->getCondition();
 516         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
 517           Cond = SI->getCondition();
 518         else
 519           continue;
 520
 521         if (!L->isLoopInvariant(Cond))
 522           continue;
 523
 524         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
 525
 526         // Only deal with PHIs in the loop header.
 527         if (!ExitVal || ExitVal->getParent() != L->getHeader())
 528           continue;
 529
 530         // If ExitVal is a PHI on the loop header, then we know its
 531         // value along this exit because the exit can only be taken
 532         // on the first iteration.
 533         auto *LoopPreheader = L->getLoopPreheader();
 534         assert(LoopPreheader && "Invalid loop");
 535         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
 536         if (PreheaderIdx != -1) {
 537           assert(ExitVal->getParent() == L->getHeader() &&
 538                  "ExitVal must be in loop header");
 539           MadeAnyChanges = true;
 540           PN.setIncomingValue(IncomingValIdx,
 541                               ExitVal->getIncomingValue(PreheaderIdx));
 542         }
 543       }
 544     }
 545   }
 546   return MadeAnyChanges;
 547 }
 548
 549 //===----------------------------------------------------------------------===//
 550 //  IV Widening - Extend the width of an IV to cover its widest uses.
 551 //===----------------------------------------------------------------------===//
 552
 553 namespace {
 554
 555 // Collect information about induction variables that are used by sign/zero
 556 // extend operations. This information is recorded by CollectExtend and provides
 557 // the input to WidenIV.
 558 struct WideIVInfo {
 559   PHINode *NarrowIV = nullptr;
 560
 561   // Widest integer type created [sz]ext
 562   Type *WidestNativeType = nullptr;
 563
 564   // Was a sext user seen before a zext?
 565   bool IsSigned = false;
 566 };
 567
 568 } // end anonymous namespace
 569
 570 /// Update information about the induction variable that is extended by this
 571 /// sign or zero extend operation. This is used to determine the final width of
 572 /// the IV before actually widening it.
 573 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
 574                         const TargetTransformInfo *TTI) {
 575   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
 576   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
 577     return;
 578
 579   Type *Ty = Cast->getType();
 580   uint64_t Width = SE->getTypeSizeInBits(Ty);
 581   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
 582     return;
 583
 584   // Check that `Cast` actually extends the induction variable (we rely on this
 585   // later).  This takes care of cases where `Cast` is extending a truncation of
 586   // the narrow induction variable, and thus can end up being narrower than the
 587   // "narrow" induction variable.
 588   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
 589   if (NarrowIVWidth >= Width)
 590     return;
 591
 592   // Cast is either an sext or zext up to this point.
 593   // We should not widen an indvar if arithmetics on the wider indvar are more
 594   // expensive than those on the narrower indvar. We check only the cost of ADD
 595   // because at least an ADD is required to increment the induction variable. We
 596   // could compute more comprehensively the cost of all instructions on the
 597   // induction variable when necessary.
 598   if (TTI &&
 599       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
 600           TTI->getArithmeticInstrCost(Instruction::Add,
 601                                       Cast->getOperand(0)->getType())) {
 602     return;
 603   }
 604
 605   if (!WI.WidestNativeType) {
 606     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 607     WI.IsSigned = IsSigned;
 608     return;
 609   }
 610
 611   // We extend the IV to satisfy the sign of its first user, arbitrarily.
 612   if (WI.IsSigned != IsSigned)
 613     return;
 614
 615   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
 616     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 617 }
 618
 619 namespace {
 620
 621 /// Record a link in the Narrow IV def-use chain along with the WideIV that
 622 /// computes the same value as the Narrow IV def.  This avoids caching Use*
 623 /// pointers.
 624 struct NarrowIVDefUse {
 625   Instruction *NarrowDef = nullptr;
 626   Instruction *NarrowUse = nullptr;
 627   Instruction *WideDef = nullptr;
 628
 629   // True if the narrow def is never negative.  Tracking this information lets
 630   // us use a sign extension instead of a zero extension or vice versa, when
 631   // profitable and legal.
 632   bool NeverNegative = false;
 633
 634   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
 635                  bool NeverNegative)
 636       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
 637         NeverNegative(NeverNegative) {}
 638 };
 639
 640 /// The goal of this transform is to remove sign and zero extends without
 641 /// creating any new induction variables. To do this, it creates a new phi of
 642 /// the wider type and redirects all users, either removing extends or inserting
 643 /// truncs whenever we stop propagating the type.
 644 class WidenIV {
 645   // Parameters
 646   PHINode *OrigPhi;
 647   Type *WideType;
 648
 649   // Context
 650   LoopInfo        *LI;
 651   Loop            *L;
 652   ScalarEvolution *SE;
 653   DominatorTree   *DT;
 654
 655   // Does the module have any calls to the llvm.experimental.guard intrinsic
 656   // at all? If not we can avoid scanning instructions looking for guards.
 657   bool HasGuards;
 658
 659   // Result
 660   PHINode *WidePhi = nullptr;
 661   Instruction *WideInc = nullptr;
 662   const SCEV *WideIncExpr = nullptr;
 663   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 664
 665   SmallPtrSet<Instruction *,16> Widened;
 666   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
 667
 668   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
 669
 670   // A map tracking the kind of extension used to widen each narrow IV
 671   // and narrow IV user.
 672   // Key: pointer to a narrow IV or IV user.
 673   // Value: the kind of extension used to widen this Instruction.
 674   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
 675
 676   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
 677
 678   // A map with control-dependent ranges for post increment IV uses. The key is
 679   // a pair of IV def and a use of this def denoting the context. The value is
 680   // a ConstantRange representing possible values of the def at the given
 681   // context.
 682   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
 683
 684   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
 685                                               Instruction *UseI) {
 686     DefUserPair Key(Def, UseI);
 687     auto It = PostIncRangeInfos.find(Key);
 688     return It == PostIncRangeInfos.end()
 689                ? Optional<ConstantRange>(None)
 690                : Optional<ConstantRange>(It->second);
 691   }
 692
 693   void calculatePostIncRanges(PHINode *OrigPhi);
 694   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
 695
 696   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
 697     DefUserPair Key(Def, UseI);
 698     auto It = PostIncRangeInfos.find(Key);
 699     if (It == PostIncRangeInfos.end())
 700       PostIncRangeInfos.insert({Key, R});
 701     else
 702       It->second = R.intersectWith(It->second);
 703   }
 704
 705 public:
 706   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
 707           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
 708           bool HasGuards)
 709       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
 710         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
 711         HasGuards(HasGuards), DeadInsts(DI) {
 712     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
 713     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
 714   }
 715
 716   PHINode *createWideIV(SCEVExpander &Rewriter);
 717
 718 protected:
 719   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
 720                           Instruction *Use);
 721
 722   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
 723   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
 724                                      const SCEVAddRecExpr *WideAR);
 725   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
 726
 727   ExtendKind getExtendKind(Instruction *I);
 728
 729   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
 730
 731   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
 732
 733   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
 734
 735   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
 736                               unsigned OpCode) const;
 737
 738   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
 739
 740   bool widenLoopCompare(NarrowIVDefUse DU);
 741   bool widenWithVariantUse(NarrowIVDefUse DU);
 742   void widenWithVariantUseCodegen(NarrowIVDefUse DU);
 743
 744   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
 745 };
 746
 747 } // end anonymous namespace
 748
 749 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
 750                                  bool IsSigned, Instruction *Use) {
 751   // Set the debug location and conservative insertion point.
 752   IRBuilder<> Builder(Use);
 753   // Hoist the insertion point into loop preheaders as far as possible.
 754   for (const Loop *L = LI->getLoopFor(Use->getParent());
 755        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
 756        L = L->getParentLoop())
 757     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
 758
 759   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
 760                     Builder.CreateZExt(NarrowOper, WideType);
 761 }
 762
 763 /// Instantiate a wide operation to replace a narrow operation. This only needs
 764 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
 765 /// 0 for any operation we decide not to clone.
 766 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
 767                                   const SCEVAddRecExpr *WideAR) {
 768   unsigned Opcode = DU.NarrowUse->getOpcode();
 769   switch (Opcode) {
 770   default:
 771     return nullptr;
 772   case Instruction::Add:
 773   case Instruction::Mul:
 774   case Instruction::UDiv:
 775   case Instruction::Sub:
 776     return cloneArithmeticIVUser(DU, WideAR);
 777
 778   case Instruction::And:
 779   case Instruction::Or:
 780   case Instruction::Xor:
 781   case Instruction::Shl:
 782   case Instruction::LShr:
 783   case Instruction::AShr:
 784     return cloneBitwiseIVUser(DU);
 785   }
 786 }
 787
 788 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
 789   Instruction *NarrowUse = DU.NarrowUse;
 790   Instruction *NarrowDef = DU.NarrowDef;
 791   Instruction *WideDef = DU.WideDef;
 792
 793   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
 794
 795   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
 796   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
 797   // invariant and will be folded or hoisted. If it actually comes from a
 798   // widened IV, it should be removed during a future call to widenIVUse.
 799   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
 800   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
 801                    ? WideDef
 802                    : createExtendInst(NarrowUse->getOperand(0), WideType,
 803                                       IsSigned, NarrowUse);
 804   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
 805                    ? WideDef
 806                    : createExtendInst(NarrowUse->getOperand(1), WideType,
 807                                       IsSigned, NarrowUse);
 808
 809   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
 810   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
 811                                         NarrowBO->getName());
 812   IRBuilder<> Builder(NarrowUse);
 813   Builder.Insert(WideBO);
 814   WideBO->copyIRFlags(NarrowBO);
 815   return WideBO;
 816 }
 817
 818 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
 819                                             const SCEVAddRecExpr *WideAR) {
 820   Instruction *NarrowUse = DU.NarrowUse;
 821   Instruction *NarrowDef = DU.NarrowDef;
 822   Instruction *WideDef = DU.WideDef;
 823
 824   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
 825
 826   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
 827
 828   // We're trying to find X such that
 829   //
 830   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
 831   //
 832   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
 833   // and check using SCEV if any of them are correct.
 834
 835   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
 836   // correct solution to X.
 837   auto GuessNonIVOperand = [&](bool SignExt) {
 838     const SCEV *WideLHS;
 839     const SCEV *WideRHS;
 840
 841     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
 842       if (SignExt)
 843         return SE->getSignExtendExpr(S, Ty);
 844       return SE->getZeroExtendExpr(S, Ty);
 845     };
 846
 847     if (IVOpIdx == 0) {
 848       WideLHS = SE->getSCEV(WideDef);
 849       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
 850       WideRHS = GetExtend(NarrowRHS, WideType);
 851     } else {
 852       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
 853       WideLHS = GetExtend(NarrowLHS, WideType);
 854       WideRHS = SE->getSCEV(WideDef);
 855     }
 856
 857     // WideUse is "WideDef `op.wide` X" as described in the comment.
 858     const SCEV *WideUse = nullptr;
 859
 860     switch (NarrowUse->getOpcode()) {
 861     default:
 862       llvm_unreachable("No other possibility!");
 863
 864     case Instruction::Add:
 865       WideUse = SE->getAddExpr(WideLHS, WideRHS);
 866       break;
 867
 868     case Instruction::Mul:
 869       WideUse = SE->getMulExpr(WideLHS, WideRHS);
 870       break;
 871
 872     case Instruction::UDiv:
 873       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
 874       break;
 875
 876     case Instruction::Sub:
 877       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
 878       break;
 879     }
 880
 881     return WideUse == WideAR;
 882   };
 883
 884   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
 885   if (!GuessNonIVOperand(SignExtend)) {
 886     SignExtend = !SignExtend;
 887     if (!GuessNonIVOperand(SignExtend))
 888       return nullptr;
 889   }
 890
 891   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
 892                    ? WideDef
 893                    : createExtendInst(NarrowUse->getOperand(0), WideType,
 894                                       SignExtend, NarrowUse);
 895   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
 896                    ? WideDef
 897                    : createExtendInst(NarrowUse->getOperand(1), WideType,
 898                                       SignExtend, NarrowUse);
 899
 900   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
 901   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
 902                                         NarrowBO->getName());
 903
 904   IRBuilder<> Builder(NarrowUse);
 905   Builder.Insert(WideBO);
 906   WideBO->copyIRFlags(NarrowBO);
 907   return WideBO;
 908 }
 909
 910 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
 911   auto It = ExtendKindMap.find(I);
 912   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
 913   return It->second;
 914 }
 915
 916 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
 917                                      unsigned OpCode) const {
 918   if (OpCode == Instruction::Add)
 919     return SE->getAddExpr(LHS, RHS);
 920   if (OpCode == Instruction::Sub)
 921     return SE->getMinusSCEV(LHS, RHS);
 922   if (OpCode == Instruction::Mul)
 923     return SE->getMulExpr(LHS, RHS);
 924
 925   llvm_unreachable("Unsupported opcode.");
 926 }
 927
 928 /// No-wrap operations can transfer sign extension of their result to their
 929 /// operands. Generate the SCEV value for the widened operation without
 930 /// actually modifying the IR yet. If the expression after extending the
 931 /// operands is an AddRec for this loop, return the AddRec and the kind of
 932 /// extension used.
 933 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
 934   // Handle the common case of add<nsw/nuw>
 935   const unsigned OpCode = DU.NarrowUse->getOpcode();
 936   // Only Add/Sub/Mul instructions supported yet.
 937   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
 938       OpCode != Instruction::Mul)
 939     return {nullptr, Unknown};
 940
 941   // One operand (NarrowDef) has already been extended to WideDef. Now determine
 942   // if extending the other will lead to a recurrence.
 943   const unsigned ExtendOperIdx =
 944       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
 945   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
 946
 947   const SCEV *ExtendOperExpr = nullptr;
 948   const OverflowingBinaryOperator *OBO =
 949     cast<OverflowingBinaryOperator>(DU.NarrowUse);
 950   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
 951   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
 952     ExtendOperExpr = SE->getSignExtendExpr(
 953       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
 954   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
 955     ExtendOperExpr = SE->getZeroExtendExpr(
 956       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
 957   else
 958     return {nullptr, Unknown};
 959
 960   // When creating this SCEV expr, don't apply the current operations NSW or NUW
 961   // flags. This instruction may be guarded by control flow that the no-wrap
 962   // behavior depends on. Non-control-equivalent instructions can be mapped to
 963   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
 964   // semantics to those operations.
 965   const SCEV *lhs = SE->getSCEV(DU.WideDef);
 966   const SCEV *rhs = ExtendOperExpr;
 967
 968   // Let's swap operands to the initial order for the case of non-commutative
 969   // operations, like SUB. See PR21014.
 970   if (ExtendOperIdx == 0)
 971     std::swap(lhs, rhs);
 972   const SCEVAddRecExpr *AddRec =
 973       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
 974
 975   if (!AddRec || AddRec->getLoop() != L)
 976     return {nullptr, Unknown};
 977
 978   return {AddRec, ExtKind};
 979 }
 980
 981 /// Is this instruction potentially interesting for further simplification after
 982 /// widening it's type? In other words, can the extend be safely hoisted out of
 983 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
 984 /// so, return the extended recurrence and the kind of extension used. Otherwise
 985 /// return {nullptr, Unknown}.
 986 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
 987   if (!SE->isSCEVable(DU.NarrowUse->getType()))
 988     return {nullptr, Unknown};
 989
 990   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
 991   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
 992       SE->getTypeSizeInBits(WideType)) {
 993     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
 994     // index. So don't follow this use.
 995     return {nullptr, Unknown};
 996   }
 997
 998   const SCEV *WideExpr;
 999   ExtendKind ExtKind;
1000   if (DU.NeverNegative) {
1001     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1002     if (isa<SCEVAddRecExpr>(WideExpr))
1003       ExtKind = SignExtended;
1004     else {
1005       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1006       ExtKind = ZeroExtended;
1007     }
1008   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1009     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1010     ExtKind = SignExtended;
1011   } else {
1012     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1013     ExtKind = ZeroExtended;
1014   }
1015   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1016   if (!AddRec || AddRec->getLoop() != L)
1017     return {nullptr, Unknown};
1018   return {AddRec, ExtKind};
1019 }
1020
1021 /// This IV user cannot be widened. Replace this use of the original narrow IV
1022 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1023 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1024   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1025   if (!InsertPt)
1026     return;
1027   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1028                     << *DU.NarrowUse << "\n");
1029   IRBuilder<> Builder(InsertPt);
1030   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1031   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1032 }
1033
1034 /// If the narrow use is a compare instruction, then widen the compare
1035 //  (and possibly the other operand).  The extend operation is hoisted into the
1036 // loop preheader as far as possible.
1037 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1038   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1039   if (!Cmp)
1040     return false;
1041
1042   // We can legally widen the comparison in the following two cases:
1043   //
1044   //  - The signedness of the IV extension and comparison match
1045   //
1046   //  - The narrow IV is always positive (and thus its sign extension is equal
1047   //    to its zero extension).  For instance, let's say we're zero extending
1048   //    %narrow for the following use
1049   //
1050   //      icmp slt i32 %narrow, %val   ... (A)
1051   //
1052   //    and %narrow is always positive.  Then
1053   //
1054   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1055   //          == icmp slt i32 zext(%narrow), sext(%val)
1056   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1057   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1058     return false;
1059
1060   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1061   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1062   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1063   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1064
1065   // Widen the compare instruction.
1066   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1067   if (!InsertPt)
1068     return false;
1069   IRBuilder<> Builder(InsertPt);
1070   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1071
1072   // Widen the other operand of the compare, if necessary.
1073   if (CastWidth < IVWidth) {
1074     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1075     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1076   }
1077   return true;
1078 }
1079
1080 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
1081 // will not work when:
1082 //    1) SCEV traces back to an instruction inside the loop that SCEV can not
1083 // expand, eg. add %indvar, (load %addr)
1084 //    2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
1085 // While SCEV fails to avoid trunc, we can still try to use instruction
1086 // combining approach to prove trunc is not required. This can be further
1087 // extended with other instruction combining checks, but for now we handle the
1088 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
1089 //
1090 // Src:
1091 //   %c = sub nsw %b, %indvar
1092 //   %d = sext %c to i64
1093 // Dst:
1094 //   %indvar.ext1 = sext %indvar to i64
1095 //   %m = sext %b to i64
1096 //   %d = sub nsw i64 %m, %indvar.ext1
1097 // Therefore, as long as the result of add/sub/mul is extended to wide type, no
1098 // trunc is required regardless of how %b is generated. This pattern is common
1099 // when calculating address in 64 bit architecture
1100 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
1101   Instruction *NarrowUse = DU.NarrowUse;
1102   Instruction *NarrowDef = DU.NarrowDef;
1103   Instruction *WideDef = DU.WideDef;
1104
1105   // Handle the common case of add<nsw/nuw>
1106   const unsigned OpCode = NarrowUse->getOpcode();
1107   // Only Add/Sub/Mul instructions are supported.
1108   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1109       OpCode != Instruction::Mul)
1110     return false;
1111
1112   // The operand that is not defined by NarrowDef of DU. Let's call it the
1113   // other operand.
1114   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1115   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1116          "bad DU");
1117
1118   const SCEV *ExtendOperExpr = nullptr;
1119   const OverflowingBinaryOperator *OBO =
1120     cast<OverflowingBinaryOperator>(NarrowUse);
1121   ExtendKind ExtKind = getExtendKind(NarrowDef);
1122   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1123     ExtendOperExpr = SE->getSignExtendExpr(
1124       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1125   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1126     ExtendOperExpr = SE->getZeroExtendExpr(
1127       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1128   else
1129     return false;
1130
1131   // Verifying that Defining operand is an AddRec
1132   const SCEV *Op1 = SE->getSCEV(WideDef);
1133   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1134   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1135     return false;
1136   // Verifying that other operand is an Extend.
1137   if (ExtKind == SignExtended) {
1138     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1139       return false;
1140   } else {
1141     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1142       return false;
1143   }
1144
1145   if (ExtKind == SignExtended) {
1146     for (Use &U : NarrowUse->uses()) {
1147       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1148       if (!User || User->getType() != WideType)
1149         return false;
1150     }
1151   } else { // ExtKind == ZeroExtended
1152     for (Use &U : NarrowUse->uses()) {
1153       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1154       if (!User || User->getType() != WideType)
1155         return false;
1156     }
1157   }
1158
1159   return true;
1160 }
1161
1162 /// Special Case for widening with loop variant (see
1163 /// WidenIV::widenWithVariant). This is the code generation part.
1164 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
1165   Instruction *NarrowUse = DU.NarrowUse;
1166   Instruction *NarrowDef = DU.NarrowDef;
1167   Instruction *WideDef = DU.WideDef;
1168
1169   ExtendKind ExtKind = getExtendKind(NarrowDef);
1170
1171   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1172
1173   // Generating a widening use instruction.
1174   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1175                    ? WideDef
1176                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1177                                       ExtKind, NarrowUse);
1178   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1179                    ? WideDef
1180                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1181                                       ExtKind, NarrowUse);
1182
1183   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1184   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1185                                         NarrowBO->getName());
1186   IRBuilder<> Builder(NarrowUse);
1187   Builder.Insert(WideBO);
1188   WideBO->copyIRFlags(NarrowBO);
1189
1190   assert(ExtKind != Unknown && "Unknown ExtKind not handled");
1191
1192   ExtendKindMap[NarrowUse] = ExtKind;
1193
1194   for (Use &U : NarrowUse->uses()) {
1195     Instruction *User = nullptr;
1196     if (ExtKind == SignExtended)
1197       User = dyn_cast<SExtInst>(U.getUser());
1198     else
1199       User = dyn_cast<ZExtInst>(U.getUser());
1200     if (User && User->getType() == WideType) {
1201       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1202                         << *WideBO << "\n");
1203       ++NumElimExt;
1204       User->replaceAllUsesWith(WideBO);
1205       DeadInsts.emplace_back(User);
1206     }
1207   }
1208 }
1209
1210 /// Determine whether an individual user of the narrow IV can be widened. If so,
1211 /// return the wide clone of the user.
1212 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1213   assert(ExtendKindMap.count(DU.NarrowDef) &&
1214          "Should already know the kind of extension used to widen NarrowDef");
1215
1216   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1217   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1218     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1219       // For LCSSA phis, sink the truncate outside the loop.
1220       // After SimplifyCFG most loop exit targets have a single predecessor.
1221       // Otherwise fall back to a truncate within the loop.
1222       if (UsePhi->getNumOperands() != 1)
1223         truncateIVUse(DU, DT, LI);
1224       else {
1225         // Widening the PHI requires us to insert a trunc.  The logical place
1226         // for this trunc is in the same BB as the PHI.  This is not possible if
1227         // the BB is terminated by a catchswitch.
1228         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1229           return nullptr;
1230
1231         PHINode *WidePhi =
1232           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1233                           UsePhi);
1234         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1235         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1236         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1237         UsePhi->replaceAllUsesWith(Trunc);
1238         DeadInsts.emplace_back(UsePhi);
1239         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1240                           << *WidePhi << "\n");
1241       }
1242       return nullptr;
1243     }
1244   }
1245
1246   // This narrow use can be widened by a sext if it's non-negative or its narrow
1247   // def was widended by a sext. Same for zext.
1248   auto canWidenBySExt = [&]() {
1249     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1250   };
1251   auto canWidenByZExt = [&]() {
1252     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1253   };
1254
1255   // Our raison d'etre! Eliminate sign and zero extension.
1256   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1257       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1258     Value *NewDef = DU.WideDef;
1259     if (DU.NarrowUse->getType() != WideType) {
1260       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1261       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1262       if (CastWidth < IVWidth) {
1263         // The cast isn't as wide as the IV, so insert a Trunc.
1264         IRBuilder<> Builder(DU.NarrowUse);
1265         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1266       }
1267       else {
1268         // A wider extend was hidden behind a narrower one. This may induce
1269         // another round of IV widening in which the intermediate IV becomes
1270         // dead. It should be very rare.
1271         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1272                           << " not wide enough to subsume " << *DU.NarrowUse
1273                           << "\n");
1274         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1275         NewDef = DU.NarrowUse;
1276       }
1277     }
1278     if (NewDef != DU.NarrowUse) {
1279       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1280                         << " replaced by " << *DU.WideDef << "\n");
1281       ++NumElimExt;
1282       DU.NarrowUse->replaceAllUsesWith(NewDef);
1283       DeadInsts.emplace_back(DU.NarrowUse);
1284     }
1285     // Now that the extend is gone, we want to expose it's uses for potential
1286     // further simplification. We don't need to directly inform SimplifyIVUsers
1287     // of the new users, because their parent IV will be processed later as a
1288     // new loop phi. If we preserved IVUsers analysis, we would also want to
1289     // push the uses of WideDef here.
1290
1291     // No further widening is needed. The deceased [sz]ext had done it for us.
1292     return nullptr;
1293   }
1294
1295   // Does this user itself evaluate to a recurrence after widening?
1296   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1297   if (!WideAddRec.first)
1298     WideAddRec = getWideRecurrence(DU);
1299
1300   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1301   if (!WideAddRec.first) {
1302     // If use is a loop condition, try to promote the condition instead of
1303     // truncating the IV first.
1304     if (widenLoopCompare(DU))
1305       return nullptr;
1306
1307     // We are here about to generate a truncate instruction that may hurt
1308     // performance because the scalar evolution expression computed earlier
1309     // in WideAddRec.first does not indicate a polynomial induction expression.
1310     // In that case, look at the operands of the use instruction to determine
1311     // if we can still widen the use instead of truncating its operand.
1312     if (widenWithVariantUse(DU)) {
1313       widenWithVariantUseCodegen(DU);
1314       return nullptr;
1315     }
1316
1317     // This user does not evaluate to a recurrence after widening, so don't
1318     // follow it. Instead insert a Trunc to kill off the original use,
1319     // eventually isolating the original narrow IV so it can be removed.
1320     truncateIVUse(DU, DT, LI);
1321     return nullptr;
1322   }
1323   // Assume block terminators cannot evaluate to a recurrence. We can't to
1324   // insert a Trunc after a terminator if there happens to be a critical edge.
1325   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1326          "SCEV is not expected to evaluate a block terminator");
1327
1328   // Reuse the IV increment that SCEVExpander created as long as it dominates
1329   // NarrowUse.
1330   Instruction *WideUse = nullptr;
1331   if (WideAddRec.first == WideIncExpr &&
1332       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1333     WideUse = WideInc;
1334   else {
1335     WideUse = cloneIVUser(DU, WideAddRec.first);
1336     if (!WideUse)
1337       return nullptr;
1338   }
1339   // Evaluation of WideAddRec ensured that the narrow expression could be
1340   // extended outside the loop without overflow. This suggests that the wide use
1341   // evaluates to the same expression as the extended narrow use, but doesn't
1342   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1343   // where it fails, we simply throw away the newly created wide use.
1344   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1345     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1346                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1347                       << "\n");
1348     DeadInsts.emplace_back(WideUse);
1349     return nullptr;
1350   }
1351
1352   // if we reached this point then we are going to replace
1353   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1354   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1355
1356   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1357   // Returning WideUse pushes it on the worklist.
1358   return WideUse;
1359 }
1360
1361 /// Add eligible users of NarrowDef to NarrowIVUsers.
1362 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1363   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1364   bool NonNegativeDef =
1365       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1366                            SE->getConstant(NarrowSCEV->getType(), 0));
1367   for (User *U : NarrowDef->users()) {
1368     Instruction *NarrowUser = cast<Instruction>(U);
1369
1370     // Handle data flow merges and bizarre phi cycles.
1371     if (!Widened.insert(NarrowUser).second)
1372       continue;
1373
1374     bool NonNegativeUse = false;
1375     if (!NonNegativeDef) {
1376       // We might have a control-dependent range information for this context.
1377       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1378         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1379     }
1380
1381     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1382                                NonNegativeDef || NonNegativeUse);
1383   }
1384 }
1385
1386 /// Process a single induction variable. First use the SCEVExpander to create a
1387 /// wide induction variable that evaluates to the same recurrence as the
1388 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1389 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1390 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1391 ///
1392 /// It would be simpler to delete uses as they are processed, but we must avoid
1393 /// invalidating SCEV expressions.
1394 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1395   // Is this phi an induction variable?
1396   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1397   if (!AddRec)
1398     return nullptr;
1399
1400   // Widen the induction variable expression.
1401   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1402                                ? SE->getSignExtendExpr(AddRec, WideType)
1403                                : SE->getZeroExtendExpr(AddRec, WideType);
1404
1405   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1406          "Expect the new IV expression to preserve its type");
1407
1408   // Can the IV be extended outside the loop without overflow?
1409   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1410   if (!AddRec || AddRec->getLoop() != L)
1411     return nullptr;
1412
1413   // An AddRec must have loop-invariant operands. Since this AddRec is
1414   // materialized by a loop header phi, the expression cannot have any post-loop
1415   // operands, so they must dominate the loop header.
1416   assert(
1417       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1418       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1419       "Loop header phi recurrence inputs do not dominate the loop");
1420
1421   // Iterate over IV uses (including transitive ones) looking for IV increments
1422   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1423   // the increment calculate control-dependent range information basing on
1424   // dominating conditions inside of the loop (e.g. a range check inside of the
1425   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1426   //
1427   // Control-dependent range information is later used to prove that a narrow
1428   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1429   // this on demand because when pushNarrowIVUsers needs this information some
1430   // of the dominating conditions might be already widened.
1431   if (UsePostIncrementRanges)
1432     calculatePostIncRanges(OrigPhi);
1433
1434   // The rewriter provides a value for the desired IV expression. This may
1435   // either find an existing phi or materialize a new one. Either way, we
1436   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1437   // of the phi-SCC dominates the loop entry.
1438   Instruction *InsertPt = &L->getHeader()->front();
1439   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1440
1441   // Remembering the WideIV increment generated by SCEVExpander allows
1442   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1443   // employ a general reuse mechanism because the call above is the only call to
1444   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1445   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1446     WideInc =
1447       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1448     WideIncExpr = SE->getSCEV(WideInc);
1449     // Propagate the debug location associated with the original loop increment
1450     // to the new (widened) increment.
1451     auto *OrigInc =
1452       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1453     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1454   }
1455
1456   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1457   ++NumWidened;
1458
1459   // Traverse the def-use chain using a worklist starting at the original IV.
1460   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1461
1462   Widened.insert(OrigPhi);
1463   pushNarrowIVUsers(OrigPhi, WidePhi);
1464
1465   while (!NarrowIVUsers.empty()) {
1466     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1467
1468     // Process a def-use edge. This may replace the use, so don't hold a
1469     // use_iterator across it.
1470     Instruction *WideUse = widenIVUse(DU, Rewriter);
1471
1472     // Follow all def-use edges from the previous narrow use.
1473     if (WideUse)
1474       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1475
1476     // widenIVUse may have removed the def-use edge.
1477     if (DU.NarrowDef->use_empty())
1478       DeadInsts.emplace_back(DU.NarrowDef);
1479   }
1480
1481   // Attach any debug information to the new PHI.
1482   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1483
1484   return WidePhi;
1485 }
1486
1487 /// Calculates control-dependent range for the given def at the given context
1488 /// by looking at dominating conditions inside of the loop
1489 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1490                                     Instruction *NarrowUser) {
1491   using namespace llvm::PatternMatch;
1492
1493   Value *NarrowDefLHS;
1494   const APInt *NarrowDefRHS;
1495   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1496                                  m_APInt(NarrowDefRHS))) ||
1497       !NarrowDefRHS->isNonNegative())
1498     return;
1499
1500   auto UpdateRangeFromCondition = [&] (Value *Condition,
1501                                        bool TrueDest) {
1502     CmpInst::Predicate Pred;
1503     Value *CmpRHS;
1504     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1505                                  m_Value(CmpRHS))))
1506       return;
1507
1508     CmpInst::Predicate P =
1509             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1510
1511     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1512     auto CmpConstrainedLHSRange =
1513             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1514     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1515         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1516
1517     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1518   };
1519
1520   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1521     if (!HasGuards)
1522       return;
1523
1524     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1525                                      Ctx->getParent()->rend())) {
1526       Value *C = nullptr;
1527       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1528         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1529     }
1530   };
1531
1532   UpdateRangeFromGuards(NarrowUser);
1533
1534   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1535   // If NarrowUserBB is statically unreachable asking dominator queries may
1536   // yield surprising results. (e.g. the block may not have a dom tree node)
1537   if (!DT->isReachableFromEntry(NarrowUserBB))
1538     return;
1539
1540   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1541        L->contains(DTB->getBlock());
1542        DTB = DTB->getIDom()) {
1543     auto *BB = DTB->getBlock();
1544     auto *TI = BB->getTerminator();
1545     UpdateRangeFromGuards(TI);
1546
1547     auto *BI = dyn_cast<BranchInst>(TI);
1548     if (!BI || !BI->isConditional())
1549       continue;
1550
1551     auto *TrueSuccessor = BI->getSuccessor(0);
1552     auto *FalseSuccessor = BI->getSuccessor(1);
1553
1554     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1555       return BBE.isSingleEdge() &&
1556              DT->dominates(BBE, NarrowUser->getParent());
1557     };
1558
1559     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1560       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1561
1562     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1563       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1564   }
1565 }
1566
1567 /// Calculates PostIncRangeInfos map for the given IV
1568 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1569   SmallPtrSet<Instruction *, 16> Visited;
1570   SmallVector<Instruction *, 6> Worklist;
1571   Worklist.push_back(OrigPhi);
1572   Visited.insert(OrigPhi);
1573
1574   while (!Worklist.empty()) {
1575     Instruction *NarrowDef = Worklist.pop_back_val();
1576
1577     for (Use &U : NarrowDef->uses()) {
1578       auto *NarrowUser = cast<Instruction>(U.getUser());
1579
1580       // Don't go looking outside the current loop.
1581       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1582       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1583         continue;
1584
1585       if (!Visited.insert(NarrowUser).second)
1586         continue;
1587
1588       Worklist.push_back(NarrowUser);
1589
1590       calculatePostIncRange(NarrowDef, NarrowUser);
1591     }
1592   }
1593 }
1594
1595 //===----------------------------------------------------------------------===//
1596 //  Live IV Reduction - Minimize IVs live across the loop.
1597 //===----------------------------------------------------------------------===//
1598
1599 //===----------------------------------------------------------------------===//
1600 //  Simplification of IV users based on SCEV evaluation.
1601 //===----------------------------------------------------------------------===//
1602
1603 namespace {
1604
1605 class IndVarSimplifyVisitor : public IVVisitor {
1606   ScalarEvolution *SE;
1607   const TargetTransformInfo *TTI;
1608   PHINode *IVPhi;
1609
1610 public:
1611   WideIVInfo WI;
1612
1613   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1614                         const TargetTransformInfo *TTI,
1615                         const DominatorTree *DTree)
1616     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1617     DT = DTree;
1618     WI.NarrowIV = IVPhi;
1619   }
1620
1621   // Implement the interface used by simplifyUsersOfIV.
1622   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1623 };
1624
1625 } // end anonymous namespace
1626
1627 /// Iteratively perform simplification on a worklist of IV users. Each
1628 /// successive simplification may push more users which may themselves be
1629 /// candidates for simplification.
1630 ///
1631 /// Sign/Zero extend elimination is interleaved with IV simplification.
1632 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1633                                        SCEVExpander &Rewriter,
1634                                        LoopInfo *LI) {
1635   SmallVector<WideIVInfo, 8> WideIVs;
1636
1637   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1638           Intrinsic::getName(Intrinsic::experimental_guard));
1639   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1640
1641   SmallVector<PHINode*, 8> LoopPhis;
1642   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1643     LoopPhis.push_back(cast<PHINode>(I));
1644   }
1645   // Each round of simplification iterates through the SimplifyIVUsers worklist
1646   // for all current phis, then determines whether any IVs can be
1647   // widened. Widening adds new phis to LoopPhis, inducing another round of
1648   // simplification on the wide IVs.
1649   bool Changed = false;
1650   while (!LoopPhis.empty()) {
1651     // Evaluate as many IV expressions as possible before widening any IVs. This
1652     // forces SCEV to set no-wrap flags before evaluating sign/zero
1653     // extension. The first time SCEV attempts to normalize sign/zero extension,
1654     // the result becomes final. So for the most predictable results, we delay
1655     // evaluation of sign/zero extend evaluation until needed, and avoid running
1656     // other SCEV based analysis prior to simplifyAndExtend.
1657     do {
1658       PHINode *CurrIV = LoopPhis.pop_back_val();
1659
1660       // Information about sign/zero extensions of CurrIV.
1661       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1662
1663       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
1664                                    &Visitor);
1665
1666       if (Visitor.WI.WidestNativeType) {
1667         WideIVs.push_back(Visitor.WI);
1668       }
1669     } while(!LoopPhis.empty());
1670
1671     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1672       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1673       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1674         Changed = true;
1675         LoopPhis.push_back(WidePhi);
1676       }
1677     }
1678   }
1679   return Changed;
1680 }
1681
1682 //===----------------------------------------------------------------------===//
1683 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1684 //===----------------------------------------------------------------------===//
1685
1686 /// Given an Value which is hoped to be part of an add recurance in the given
1687 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1688 /// that this is less general than SCEVs AddRec checking.
1689 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1690   Instruction *IncI = dyn_cast<Instruction>(IncV);
1691   if (!IncI)
1692     return nullptr;
1693
1694   switch (IncI->getOpcode()) {
1695   case Instruction::Add:
1696   case Instruction::Sub:
1697     break;
1698   case Instruction::GetElementPtr:
1699     // An IV counter must preserve its type.
1700     if (IncI->getNumOperands() == 2)
1701       break;
1702     LLVM_FALLTHROUGH;
1703   default:
1704     return nullptr;
1705   }
1706
1707   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
1708   if (Phi && Phi->getParent() == L->getHeader()) {
1709     if (L->isLoopInvariant(IncI->getOperand(1)))
1710       return Phi;
1711     return nullptr;
1712   }
1713   if (IncI->getOpcode() == Instruction::GetElementPtr)
1714     return nullptr;
1715
1716   // Allow add/sub to be commuted.
1717   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
1718   if (Phi && Phi->getParent() == L->getHeader()) {
1719     if (L->isLoopInvariant(IncI->getOperand(0)))
1720       return Phi;
1721   }
1722   return nullptr;
1723 }
1724
1725 /// Whether the current loop exit test is based on this value.  Currently this
1726 /// is limited to a direct use in the loop condition.
1727 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
1728   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1729   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1730   // TODO: Allow non-icmp loop test.
1731   if (!ICmp)
1732     return false;
1733
1734   // TODO: Allow indirect use.
1735   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
1736 }
1737
1738 /// linearFunctionTestReplace policy. Return true unless we can show that the
1739 /// current exit test is already sufficiently canonical.
1740 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
1741   assert(L->getLoopLatch() && "Must be in simplified form");
1742
1743   // Avoid converting a constant or loop invariant test back to a runtime
1744   // test.  This is critical for when SCEV's cached ExitCount is less precise
1745   // than the current IR (such as after we've proven a particular exit is
1746   // actually dead and thus the BE count never reaches our ExitCount.)
1747   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1748   if (L->isLoopInvariant(BI->getCondition()))
1749     return false;
1750
1751   // Do LFTR to simplify the exit condition to an ICMP.
1752   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1753   if (!Cond)
1754     return true;
1755
1756   // Do LFTR to simplify the exit ICMP to EQ/NE
1757   ICmpInst::Predicate Pred = Cond->getPredicate();
1758   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
1759     return true;
1760
1761   // Look for a loop invariant RHS
1762   Value *LHS = Cond->getOperand(0);
1763   Value *RHS = Cond->getOperand(1);
1764   if (!L->isLoopInvariant(RHS)) {
1765     if (!L->isLoopInvariant(LHS))
1766       return true;
1767     std::swap(LHS, RHS);
1768   }
1769   // Look for a simple IV counter LHS
1770   PHINode *Phi = dyn_cast<PHINode>(LHS);
1771   if (!Phi)
1772     Phi = getLoopPhiForCounter(LHS, L);
1773
1774   if (!Phi)
1775     return true;
1776
1777   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
1778   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
1779   if (Idx < 0)
1780     return true;
1781
1782   // Do LFTR if the exit condition's IV is *not* a simple counter.
1783   Value *IncV = Phi->getIncomingValue(Idx);
1784   return Phi != getLoopPhiForCounter(IncV, L);
1785 }
1786
1787 /// Return true if undefined behavior would provable be executed on the path to
1788 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
1789 /// anything about whether OnPathTo is actually executed or whether Root is
1790 /// actually poison.  This can be used to assess whether a new use of Root can
1791 /// be added at a location which is control equivalent with OnPathTo (such as
1792 /// immediately before it) without introducing UB which didn't previously
1793 /// exist.  Note that a false result conveys no information.
1794 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
1795                                           Instruction *OnPathTo,
1796                                           DominatorTree *DT) {
1797   // Basic approach is to assume Root is poison, propagate poison forward
1798   // through all users we can easily track, and then check whether any of those
1799   // users are provable UB and must execute before out exiting block might
1800   // exit.
1801
1802   // The set of all recursive users we've visited (which are assumed to all be
1803   // poison because of said visit)
1804   SmallSet<const Value *, 16> KnownPoison;
1805   SmallVector<const Instruction*, 16> Worklist;
1806   Worklist.push_back(Root);
1807   while (!Worklist.empty()) {
1808     const Instruction *I = Worklist.pop_back_val();
1809
1810     // If we know this must trigger UB on a path leading our target.
1811     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
1812       return true;
1813
1814     // If we can't analyze propagation through this instruction, just skip it
1815     // and transitive users.  Safe as false is a conservative result.
1816     if (!propagatesPoison(I) && I != Root)
1817       continue;
1818
1819     if (KnownPoison.insert(I).second)
1820       for (const User *User : I->users())
1821         Worklist.push_back(cast<Instruction>(User));
1822   }
1823
1824   // Might be non-UB, or might have a path we couldn't prove must execute on
1825   // way to exiting bb.
1826   return false;
1827 }
1828
1829 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
1830 /// down to checking that all operands are constant and listing instructions
1831 /// that may hide undef.
1832 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
1833                                unsigned Depth) {
1834   if (isa<Constant>(V))
1835     return !isa<UndefValue>(V);
1836
1837   if (Depth >= 6)
1838     return false;
1839
1840   // Conservatively handle non-constant non-instructions. For example, Arguments
1841   // may be undef.
1842   Instruction *I = dyn_cast<Instruction>(V);
1843   if (!I)
1844     return false;
1845
1846   // Load and return values may be undef.
1847   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
1848     return false;
1849
1850   // Optimistically handle other instructions.
1851   for (Value *Op : I->operands()) {
1852     if (!Visited.insert(Op).second)
1853       continue;
1854     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
1855       return false;
1856   }
1857   return true;
1858 }
1859
1860 /// Return true if the given value is concrete. We must prove that undef can
1861 /// never reach it.
1862 ///
1863 /// TODO: If we decide that this is a good approach to checking for undef, we
1864 /// may factor it into a common location.
1865 static bool hasConcreteDef(Value *V) {
1866   SmallPtrSet<Value*, 8> Visited;
1867   Visited.insert(V);
1868   return hasConcreteDefImpl(V, Visited, 0);
1869 }
1870
1871 /// Return true if this IV has any uses other than the (soon to be rewritten)
1872 /// loop exit test.
1873 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
1874   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
1875   Value *IncV = Phi->getIncomingValue(LatchIdx);
1876
1877   for (User *U : Phi->users())
1878     if (U != Cond && U != IncV) return false;
1879
1880   for (User *U : IncV->users())
1881     if (U != Cond && U != Phi) return false;
1882   return true;
1883 }
1884
1885 /// Return true if the given phi is a "counter" in L.  A counter is an
1886 /// add recurance (of integer or pointer type) with an arbitrary start, and a
1887 /// step of 1.  Note that L must have exactly one latch.
1888 static bool isLoopCounter(PHINode* Phi, Loop *L,
1889                           ScalarEvolution *SE) {
1890   assert(Phi->getParent() == L->getHeader());
1891   assert(L->getLoopLatch());
1892
1893   if (!SE->isSCEVable(Phi->getType()))
1894     return false;
1895
1896   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1897   if (!AR || AR->getLoop() != L || !AR->isAffine())
1898     return false;
1899
1900   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1901   if (!Step || !Step->isOne())
1902     return false;
1903
1904   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
1905   Value *IncV = Phi->getIncomingValue(LatchIdx);
1906   return (getLoopPhiForCounter(IncV, L) == Phi);
1907 }
1908
1909 /// Search the loop header for a loop counter (anadd rec w/step of one)
1910 /// suitable for use by LFTR.  If multiple counters are available, select the
1911 /// "best" one based profitable heuristics.
1912 ///
1913 /// BECount may be an i8* pointer type. The pointer difference is already
1914 /// valid count without scaling the address stride, so it remains a pointer
1915 /// expression as far as SCEV is concerned.
1916 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
1917                                 const SCEV *BECount,
1918                                 ScalarEvolution *SE, DominatorTree *DT) {
1919   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
1920
1921   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
1922
1923   // Loop over all of the PHI nodes, looking for a simple counter.
1924   PHINode *BestPhi = nullptr;
1925   const SCEV *BestInit = nullptr;
1926   BasicBlock *LatchBlock = L->getLoopLatch();
1927   assert(LatchBlock && "Must be in simplified form");
1928   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1929
1930   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1931     PHINode *Phi = cast<PHINode>(I);
1932     if (!isLoopCounter(Phi, L, SE))
1933       continue;
1934
1935     // Avoid comparing an integer IV against a pointer Limit.
1936     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
1937       continue;
1938
1939     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
1940
1941     // AR may be a pointer type, while BECount is an integer type.
1942     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
1943     // AR may not be a narrower type, or we may never exit.
1944     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
1945     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
1946       continue;
1947
1948     // Avoid reusing a potentially undef value to compute other values that may
1949     // have originally had a concrete definition.
1950     if (!hasConcreteDef(Phi)) {
1951       // We explicitly allow unknown phis as long as they are already used by
1952       // the loop exit test.  This is legal since performing LFTR could not
1953       // increase the number of undef users.
1954       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
1955       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
1956           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
1957         continue;
1958     }
1959
1960     // Avoid introducing undefined behavior due to poison which didn't exist in
1961     // the original program.  (Annoyingly, the rules for poison and undef
1962     // propagation are distinct, so this does NOT cover the undef case above.)
1963     // We have to ensure that we don't introduce UB by introducing a use on an
1964     // iteration where said IV produces poison.  Our strategy here differs for
1965     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
1966     // see code in linearFunctionTestReplace.  For pointers, we restrict
1967     // transforms as there is no good way to reinfer inbounds once lost.
1968     if (!Phi->getType()->isIntegerTy() &&
1969         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
1970       continue;
1971
1972     const SCEV *Init = AR->getStart();
1973
1974     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
1975       // Don't force a live loop counter if another IV can be used.
1976       if (AlmostDeadIV(Phi, LatchBlock, Cond))
1977         continue;
1978
1979       // Prefer to count-from-zero. This is a more "canonical" counter form. It
1980       // also prefers integer to pointer IVs.
1981       if (BestInit->isZero() != Init->isZero()) {
1982         if (BestInit->isZero())
1983           continue;
1984       }
1985       // If two IVs both count from zero or both count from nonzero then the
1986       // narrower is likely a dead phi that has been widened. Use the wider phi
1987       // to allow the other to be eliminated.
1988       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
1989         continue;
1990     }
1991     BestPhi = Phi;
1992     BestInit = Init;
1993   }
1994   return BestPhi;
1995 }
1996
1997 /// Insert an IR expression which computes the value held by the IV IndVar
1998 /// (which must be an loop counter w/unit stride) after the backedge of loop L
1999 /// is taken ExitCount times.
2000 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2001                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2002                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2003   assert(isLoopCounter(IndVar, L, SE));
2004   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2005   const SCEV *IVInit = AR->getStart();
2006
2007   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2008   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2009   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2010   // the existing GEPs whenever possible.
2011   if (IndVar->getType()->isPointerTy() &&
2012       !ExitCount->getType()->isPointerTy()) {
2013     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2014     // signed value. ExitCount on the other hand represents the loop trip count,
2015     // which is an unsigned value. FindLoopCounter only allows induction
2016     // variables that have a positive unit stride of one. This means we don't
2017     // have to handle the case of negative offsets (yet) and just need to zero
2018     // extend ExitCount.
2019     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2020     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2021     if (UsePostInc)
2022       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2023
2024     // Expand the code for the iteration count.
2025     assert(SE->isLoopInvariant(IVOffset, L) &&
2026            "Computed iteration count is not loop invariant!");
2027
2028     // We could handle pointer IVs other than i8*, but we need to compensate for
2029     // gep index scaling.
2030     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2031                              cast<PointerType>(IndVar->getType())
2032                                  ->getElementType())->isOne() &&
2033            "unit stride pointer IV must be i8*");
2034
2035     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2036     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2037     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2038   } else {
2039     // In any other case, convert both IVInit and ExitCount to integers before
2040     // comparing. This may result in SCEV expansion of pointers, but in practice
2041     // SCEV will fold the pointer arithmetic away as such:
2042     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2043     //
2044     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2045     // for simple memset-style loops.
2046     //
2047     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2048     // were generated on top of case #2, which is not expected.
2049
2050     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2051     // For unit stride, IVCount = Start + ExitCount with 2's complement
2052     // overflow.
2053
2054     // For integer IVs, truncate the IV before computing IVInit + BECount,
2055     // unless we know apriori that the limit must be a constant when evaluated
2056     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2057     // of the IV in the loop over a (potentially) expensive expansion of the
2058     // widened exit count add(zext(add)) expression.
2059     if (SE->getTypeSizeInBits(IVInit->getType())
2060         > SE->getTypeSizeInBits(ExitCount->getType())) {
2061       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2062         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2063       else
2064         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2065     }
2066
2067     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2068
2069     if (UsePostInc)
2070       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2071
2072     // Expand the code for the iteration count.
2073     assert(SE->isLoopInvariant(IVLimit, L) &&
2074            "Computed iteration count is not loop invariant!");
2075     // Ensure that we generate the same type as IndVar, or a smaller integer
2076     // type. In the presence of null pointer values, we have an integer type
2077     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2078     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2079       IndVar->getType() : ExitCount->getType();
2080     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2081     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2082   }
2083 }
2084
2085 /// This method rewrites the exit condition of the loop to be a canonical !=
2086 /// comparison against the incremented loop induction variable.  This pass is
2087 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2088 /// determine a loop-invariant trip count of the loop, which is actually a much
2089 /// broader range than just linear tests.
2090 bool IndVarSimplify::
2091 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2092                           const SCEV *ExitCount,
2093                           PHINode *IndVar, SCEVExpander &Rewriter) {
2094   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2095   assert(isLoopCounter(IndVar, L, SE));
2096   Instruction * const IncVar =
2097     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2098
2099   // Initialize CmpIndVar to the preincremented IV.
2100   Value *CmpIndVar = IndVar;
2101   bool UsePostInc = false;
2102
2103   // If the exiting block is the same as the backedge block, we prefer to
2104   // compare against the post-incremented value, otherwise we must compare
2105   // against the preincremented value.
2106   if (ExitingBB == L->getLoopLatch()) {
2107     // For pointer IVs, we chose to not strip inbounds which requires us not
2108     // to add a potentially UB introducing use.  We need to either a) show
2109     // the loop test we're modifying is already in post-inc form, or b) show
2110     // that adding a use must not introduce UB.
2111     bool SafeToPostInc =
2112         IndVar->getType()->isIntegerTy() ||
2113         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2114         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2115     if (SafeToPostInc) {
2116       UsePostInc = true;
2117       CmpIndVar = IncVar;
2118     }
2119   }
2120
2121   // It may be necessary to drop nowrap flags on the incrementing instruction
2122   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2123   // case the increment might have previously been poison on the last iteration
2124   // only) or if LFTR switches to a different IV that was previously dynamically
2125   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2126   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2127   // check), because the pre-inc addrec flags may be adopted from the original
2128   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2129   // TODO: This handling is inaccurate for one case: If we switch to a
2130   // dynamically dead IV that wraps on the first loop iteration only, which is
2131   // not covered by the post-inc addrec. (If the new IV was not dynamically
2132   // dead, it could not be poison on the first iteration in the first place.)
2133   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2134     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2135     if (BO->hasNoUnsignedWrap())
2136       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2137     if (BO->hasNoSignedWrap())
2138       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2139   }
2140
2141   Value *ExitCnt = genLoopLimit(
2142       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2143   assert(ExitCnt->getType()->isPointerTy() ==
2144              IndVar->getType()->isPointerTy() &&
2145          "genLoopLimit missed a cast");
2146
2147   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2148   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2149   ICmpInst::Predicate P;
2150   if (L->contains(BI->getSuccessor(0)))
2151     P = ICmpInst::ICMP_NE;
2152   else
2153     P = ICmpInst::ICMP_EQ;
2154
2155   IRBuilder<> Builder(BI);
2156
2157   // The new loop exit condition should reuse the debug location of the
2158   // original loop exit condition.
2159   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2160     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2161
2162   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2163   // avoid the expensive expansion of the limit expression in the wider type,
2164   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2165   // since we know (from the exit count bitwidth), that we can't self-wrap in
2166   // the narrower type.
2167   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2168   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2169   if (CmpIndVarSize > ExitCntSize) {
2170     assert(!CmpIndVar->getType()->isPointerTy() &&
2171            !ExitCnt->getType()->isPointerTy());
2172
2173     // Before resorting to actually inserting the truncate, use the same
2174     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2175     // the other side of the comparison instead.  We still evaluate the limit
2176     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2177     // a truncate within in.
2178     bool Extended = false;
2179     const SCEV *IV = SE->getSCEV(CmpIndVar);
2180     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2181                                                   ExitCnt->getType());
2182     const SCEV *ZExtTrunc =
2183       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2184
2185     if (ZExtTrunc == IV) {
2186       Extended = true;
2187       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2188                                    "wide.trip.count");
2189     } else {
2190       const SCEV *SExtTrunc =
2191         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2192       if (SExtTrunc == IV) {
2193         Extended = true;
2194         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2195                                      "wide.trip.count");
2196       }
2197     }
2198
2199     if (Extended) {
2200       bool Discard;
2201       L->makeLoopInvariant(ExitCnt, Discard);
2202     } else
2203       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2204                                       "lftr.wideiv");
2205   }
2206   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2207                     << "      LHS:" << *CmpIndVar << '\n'
2208                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2209                     << "\n"
2210                     << "      RHS:\t" << *ExitCnt << "\n"
2211                     << "ExitCount:\t" << *ExitCount << "\n"
2212                     << "  was: " << *BI->getCondition() << "\n");
2213
2214   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2215   Value *OrigCond = BI->getCondition();
2216   // It's tempting to use replaceAllUsesWith here to fully replace the old
2217   // comparison, but that's not immediately safe, since users of the old
2218   // comparison may not be dominated by the new comparison. Instead, just
2219   // update the branch to use the new comparison; in the common case this
2220   // will make old comparison dead.
2221   BI->setCondition(Cond);
2222   DeadInsts.emplace_back(OrigCond);
2223
2224   ++NumLFTR;
2225   return true;
2226 }
2227
2228 //===----------------------------------------------------------------------===//
2229 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2230 //===----------------------------------------------------------------------===//
2231
2232 /// If there's a single exit block, sink any loop-invariant values that
2233 /// were defined in the preheader but not used inside the loop into the
2234 /// exit block to reduce register pressure in the loop.
2235 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2236   BasicBlock *ExitBlock = L->getExitBlock();
2237   if (!ExitBlock) return false;
2238
2239   BasicBlock *Preheader = L->getLoopPreheader();
2240   if (!Preheader) return false;
2241
2242   bool MadeAnyChanges = false;
2243   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2244   BasicBlock::iterator I(Preheader->getTerminator());
2245   while (I != Preheader->begin()) {
2246     --I;
2247     // New instructions were inserted at the end of the preheader.
2248     if (isa<PHINode>(I))
2249       break;
2250
2251     // Don't move instructions which might have side effects, since the side
2252     // effects need to complete before instructions inside the loop.  Also don't
2253     // move instructions which might read memory, since the loop may modify
2254     // memory. Note that it's okay if the instruction might have undefined
2255     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2256     // block.
2257     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2258       continue;
2259
2260     // Skip debug info intrinsics.
2261     if (isa<DbgInfoIntrinsic>(I))
2262       continue;
2263
2264     // Skip eh pad instructions.
2265     if (I->isEHPad())
2266       continue;
2267
2268     // Don't sink alloca: we never want to sink static alloca's out of the
2269     // entry block, and correctly sinking dynamic alloca's requires
2270     // checks for stacksave/stackrestore intrinsics.
2271     // FIXME: Refactor this check somehow?
2272     if (isa<AllocaInst>(I))
2273       continue;
2274
2275     // Determine if there is a use in or before the loop (direct or
2276     // otherwise).
2277     bool UsedInLoop = false;
2278     for (Use &U : I->uses()) {
2279       Instruction *User = cast<Instruction>(U.getUser());
2280       BasicBlock *UseBB = User->getParent();
2281       if (PHINode *P = dyn_cast<PHINode>(User)) {
2282         unsigned i =
2283           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2284         UseBB = P->getIncomingBlock(i);
2285       }
2286       if (UseBB == Preheader || L->contains(UseBB)) {
2287         UsedInLoop = true;
2288         break;
2289       }
2290     }
2291
2292     // If there is, the def must remain in the preheader.
2293     if (UsedInLoop)
2294       continue;
2295
2296     // Otherwise, sink it to the exit block.
2297     Instruction *ToMove = &*I;
2298     bool Done = false;
2299
2300     if (I != Preheader->begin()) {
2301       // Skip debug info intrinsics.
2302       do {
2303         --I;
2304       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2305
2306       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2307         Done = true;
2308     } else {
2309       Done = true;
2310     }
2311
2312     MadeAnyChanges = true;
2313     ToMove->moveBefore(*ExitBlock, InsertPt);
2314     if (Done) break;
2315     InsertPt = ToMove->getIterator();
2316   }
2317
2318   return MadeAnyChanges;
2319 }
2320
2321 /// Return a symbolic upper bound for the backedge taken count of the loop.
2322 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2323 /// an arbitrary expression as opposed to only constants.
2324 /// TODO: Move into the ScalarEvolution class.
2325 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2326                                             DominatorTree &DT, Loop *L) {
2327   SmallVector<BasicBlock*, 16> ExitingBlocks;
2328   L->getExitingBlocks(ExitingBlocks);
2329
2330   // Form an expression for the maximum exit count possible for this loop. We
2331   // merge the max and exact information to approximate a version of
2332   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2333   SmallVector<const SCEV*, 4> ExitCounts;
2334   for (BasicBlock *ExitingBB : ExitingBlocks) {
2335     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2336     if (isa<SCEVCouldNotCompute>(ExitCount))
2337       ExitCount = SE.getExitCount(L, ExitingBB,
2338                                   ScalarEvolution::ConstantMaximum);
2339     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2340       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2341              "We should only have known counts for exiting blocks that "
2342              "dominate latch!");
2343       ExitCounts.push_back(ExitCount);
2344     }
2345   }
2346   if (ExitCounts.empty())
2347     return SE.getCouldNotCompute();
2348   return SE.getUMinFromMismatchedTypes(ExitCounts);
2349 }
2350
2351 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2352   SmallVector<BasicBlock*, 16> ExitingBlocks;
2353   L->getExitingBlocks(ExitingBlocks);
2354
2355   // Remove all exits which aren't both rewriteable and analyzeable.
2356   auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2357     // If our exitting block exits multiple loops, we can only rewrite the
2358     // innermost one.  Otherwise, we're changing how many times the innermost
2359     // loop runs before it exits.
2360     if (LI->getLoopFor(ExitingBB) != L)
2361       return true;
2362
2363     // Can't rewrite non-branch yet.
2364     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2365     if (!BI)
2366       return true;
2367
2368     // If already constant, nothing to do.
2369     if (isa<Constant>(BI->getCondition()))
2370       return true;
2371
2372     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2373     if (isa<SCEVCouldNotCompute>(ExitCount))
2374       return true;
2375     return false;
2376   });
2377   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2378
2379   if (ExitingBlocks.empty())
2380     return false;
2381
2382   // Get a symbolic upper bound on the loop backedge taken count.
2383   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2384   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2385     return false;
2386
2387   // Visit our exit blocks in order of dominance.  We know from the fact that
2388   // all exits (left) are analyzeable that the must be a total dominance order
2389   // between them as each must dominate the latch.  The visit order only
2390   // matters for the provably equal case.
2391   llvm::sort(ExitingBlocks,
2392              [&](BasicBlock *A, BasicBlock *B) {
2393                // std::sort sorts in ascending order, so we want the inverse of
2394                // the normal dominance relation.
2395                if (A == B) return false;
2396                if (DT->properlyDominates(A, B)) return true;
2397                if (DT->properlyDominates(B, A)) return false;
2398                llvm_unreachable("expected total dominance order!");
2399              });
2400 #ifdef ASSERT
2401   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2402     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2403   }
2404 #endif
2405
2406   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2407     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2408     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2409     auto *OldCond = BI->getCondition();
2410     auto *NewCond = ConstantInt::get(OldCond->getType(),
2411                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2412     BI->setCondition(NewCond);
2413     if (OldCond->use_empty())
2414       DeadInsts.emplace_back(OldCond);
2415   };
2416
2417   bool Changed = false;
2418   SmallSet<const SCEV*, 8> DominatingExitCounts;
2419   for (BasicBlock *ExitingBB : ExitingBlocks) {
2420     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2421     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2422
2423     // If we know we'd exit on the first iteration, rewrite the exit to
2424     // reflect this.  This does not imply the loop must exit through this
2425     // exit; there may be an earlier one taken on the first iteration.
2426     // TODO: Given we know the backedge can't be taken, we should go ahead
2427     // and break it.  Or at least, kill all the header phis and simplify.
2428     if (ExitCount->isZero()) {
2429       FoldExit(ExitingBB, true);
2430       Changed = true;
2431       continue;
2432     }
2433
2434     // If we end up with a pointer exit count, bail.  Note that we can end up
2435     // with a pointer exit count for one exiting block, and not for another in
2436     // the same loop.
2437     if (!ExitCount->getType()->isIntegerTy() ||
2438         !MaxExitCount->getType()->isIntegerTy())
2439       continue;
2440
2441     Type *WiderType =
2442       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2443     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2444     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2445     assert(MaxExitCount->getType() == ExitCount->getType());
2446
2447     // Can we prove that some other exit must be taken strictly before this
2448     // one?
2449     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2450                                      MaxExitCount, ExitCount)) {
2451       FoldExit(ExitingBB, false);
2452       Changed = true;
2453       continue;
2454     }
2455
2456     // As we run, keep track of which exit counts we've encountered.  If we
2457     // find a duplicate, we've found an exit which would have exited on the
2458     // exiting iteration, but (from the visit order) strictly follows another
2459     // which does the same and is thus dead.
2460     if (!DominatingExitCounts.insert(ExitCount).second) {
2461       FoldExit(ExitingBB, false);
2462       Changed = true;
2463       continue;
2464     }
2465
2466     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2467     // here.  If we kept track of the min of dominanting exits so far, we could
2468     // discharge exits with EC >= MDEC. This is less powerful than the existing
2469     // transform (since later exits aren't considered), but potentially more
2470     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2471     // or a >u b.  Such a case is not currently known.
2472   }
2473   return Changed;
2474 }
2475
2476 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2477   SmallVector<BasicBlock*, 16> ExitingBlocks;
2478   L->getExitingBlocks(ExitingBlocks);
2479
2480   // Finally, see if we can rewrite our exit conditions into a loop invariant
2481   // form. If we have a read-only loop, and we can tell that we must exit down
2482   // a path which does not need any of the values computed within the loop, we
2483   // can rewrite the loop to exit on the first iteration.  Note that this
2484   // doesn't either a) tell us the loop exits on the first iteration (unless
2485   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2486   // This transformation looks a lot like a restricted form of dead loop
2487   // elimination, but restricted to read-only loops and without neccesssarily
2488   // needing to kill the loop entirely.
2489   if (!LoopPredication)
2490     return false;
2491
2492   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2493     return false;
2494
2495   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2496   // through *explicit* control flow.  We have to eliminate the possibility of
2497   // implicit exits (see below) before we know it's truly exact.
2498   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2499   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2500       !SE->isLoopInvariant(ExactBTC, L) ||
2501       !isSafeToExpand(ExactBTC, *SE))
2502     return false;
2503
2504   // If we end up with a pointer exit count, bail.  It may be unsized.
2505   if (!ExactBTC->getType()->isIntegerTy())
2506     return false;
2507
2508   auto BadExit = [&](BasicBlock *ExitingBB) {
2509     // If our exiting block exits multiple loops, we can only rewrite the
2510     // innermost one.  Otherwise, we're changing how many times the innermost
2511     // loop runs before it exits.
2512     if (LI->getLoopFor(ExitingBB) != L)
2513       return true;
2514
2515     // Can't rewrite non-branch yet.
2516     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2517     if (!BI)
2518       return true;
2519
2520     // If already constant, nothing to do.
2521     if (isa<Constant>(BI->getCondition()))
2522       return true;
2523
2524     // If the exit block has phis, we need to be able to compute the values
2525     // within the loop which contains them.  This assumes trivially lcssa phis
2526     // have already been removed; TODO: generalize
2527     BasicBlock *ExitBlock =
2528     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2529     if (!ExitBlock->phis().empty())
2530       return true;
2531
2532     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2533     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2534     if (!SE->isLoopInvariant(ExitCount, L) ||
2535         !isSafeToExpand(ExitCount, *SE))
2536       return true;
2537
2538     // If we end up with a pointer exit count, bail.  It may be unsized.
2539     if (!ExitCount->getType()->isIntegerTy())
2540       return true;
2541
2542     return false;
2543   };
2544
2545   // If we have any exits which can't be predicated themselves, than we can't
2546   // predicate any exit which isn't guaranteed to execute before it.  Consider
2547   // two exits (a) and (b) which would both exit on the same iteration.  If we
2548   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2549   // we could convert a loop from exiting through (a) to one exiting through
2550   // (b).  Note that this problem exists only for exits with the same exit
2551   // count, and we could be more aggressive when exit counts are known inequal.
2552   llvm::sort(ExitingBlocks,
2553             [&](BasicBlock *A, BasicBlock *B) {
2554               // std::sort sorts in ascending order, so we want the inverse of
2555               // the normal dominance relation, plus a tie breaker for blocks
2556               // unordered by dominance.
2557               if (DT->properlyDominates(A, B)) return true;
2558               if (DT->properlyDominates(B, A)) return false;
2559               return A->getName() < B->getName();
2560             });
2561   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2562   // exits before the backedge).  If they aren't, reasoning about reachability
2563   // is complicated and we choose not to for now.
2564   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2565     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2566       return false;
2567
2568   // Given our sorted total order, we know that exit[j] must be evaluated
2569   // after all exit[i] such j > i.
2570   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2571     if (BadExit(ExitingBlocks[i])) {
2572       ExitingBlocks.resize(i);
2573       break;
2574     }
2575
2576   if (ExitingBlocks.empty())
2577     return false;
2578
2579   // We rely on not being able to reach an exiting block on a later iteration
2580   // then it's statically compute exit count.  The implementaton of
2581   // getExitCount currently has this invariant, but assert it here so that
2582   // breakage is obvious if this ever changes..
2583   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2584         return DT->dominates(ExitingBB, L->getLoopLatch());
2585       }));
2586
2587   // At this point, ExitingBlocks consists of only those blocks which are
2588   // predicatable.  Given that, we know we have at least one exit we can
2589   // predicate if the loop is doesn't have side effects and doesn't have any
2590   // implicit exits (because then our exact BTC isn't actually exact).
2591   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2592   // suggestions on how to improve this?  I can obviously bail out for outer
2593   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2594   // is that enough for *all* side effects?
2595   for (BasicBlock *BB : L->blocks())
2596     for (auto &I : *BB)
2597       // TODO:isGuaranteedToTransfer
2598       if (I.mayHaveSideEffects() || I.mayThrow())
2599         return false;
2600
2601   bool Changed = false;
2602   // Finally, do the actual predication for all predicatable blocks.  A couple
2603   // of notes here:
2604   // 1) We don't bother to constant fold dominated exits with identical exit
2605   //    counts; that's simply a form of CSE/equality propagation and we leave
2606   //    it for dedicated passes.
2607   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2608   //    legality constraints and we leave that to dedicated logic.  We want to
2609   //    predicate even if we can't insert a loop invariant expression as
2610   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2611   //    varying check.
2612   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2613   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2614   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2615   for (BasicBlock *ExitingBB : ExitingBlocks) {
2616     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2617
2618     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2619     Value *NewCond;
2620     if (ExitCount == ExactBTC) {
2621       NewCond = L->contains(BI->getSuccessor(0)) ?
2622         B.getFalse() : B.getTrue();
2623     } else {
2624       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2625       if (!ExactBTCV)
2626         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2627       Value *RHS = ExactBTCV;
2628       if (ECV->getType() != RHS->getType()) {
2629         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2630         ECV = B.CreateZExt(ECV, WiderTy);
2631         RHS = B.CreateZExt(RHS, WiderTy);
2632       }
2633       auto Pred = L->contains(BI->getSuccessor(0)) ?
2634         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2635       NewCond = B.CreateICmp(Pred, ECV, RHS);
2636     }
2637     Value *OldCond = BI->getCondition();
2638     BI->setCondition(NewCond);
2639     if (OldCond->use_empty())
2640       DeadInsts.emplace_back(OldCond);
2641     Changed = true;
2642   }
2643
2644   return Changed;
2645 }
2646
2647 //===----------------------------------------------------------------------===//
2648 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2649 //===----------------------------------------------------------------------===//
2650
2651 bool IndVarSimplify::run(Loop *L) {
2652   // We need (and expect!) the incoming loop to be in LCSSA.
2653   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2654          "LCSSA required to run indvars!");
2655
2656   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2657   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2658   //    canonicalization can be a pessimization without LSR to "clean up"
2659   //    afterwards.
2660   //  - We depend on having a preheader; in particular,
2661   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2662   //    and we're in trouble if we can't find the induction variable even when
2663   //    we've manually inserted one.
2664   //  - LFTR relies on having a single backedge.
2665   if (!L->isLoopSimplifyForm())
2666     return false;
2667
2668 #ifndef NDEBUG
2669   // Used below for a consistency check only
2670   // Note: Since the result returned by ScalarEvolution may depend on the order
2671   // in which previous results are added to its cache, the call to
2672   // getBackedgeTakenCount() may change following SCEV queries.
2673   const SCEV *BackedgeTakenCount;
2674   if (VerifyIndvars)
2675     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2676 #endif
2677
2678   bool Changed = false;
2679   // If there are any floating-point recurrences, attempt to
2680   // transform them to use integer recurrences.
2681   Changed |= rewriteNonIntegerIVs(L);
2682
2683   // Create a rewriter object which we'll use to transform the code with.
2684   SCEVExpander Rewriter(*SE, DL, "indvars");
2685 #ifndef NDEBUG
2686   Rewriter.setDebugType(DEBUG_TYPE);
2687 #endif
2688
2689   // Eliminate redundant IV users.
2690   //
2691   // Simplification works best when run before other consumers of SCEV. We
2692   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2693   // other expressions involving loop IVs have been evaluated. This helps SCEV
2694   // set no-wrap flags before normalizing sign/zero extension.
2695   Rewriter.disableCanonicalMode();
2696   Changed |= simplifyAndExtend(L, Rewriter, LI);
2697
2698   // Check to see if we can compute the final value of any expressions
2699   // that are recurrent in the loop, and substitute the exit values from the
2700   // loop into any instructions outside of the loop that use the final values
2701   // of the current expressions.
2702   if (ReplaceExitValue != NeverRepl) {
2703     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2704                                              ReplaceExitValue, DeadInsts)) {
2705       NumReplaced += Rewrites;
2706       Changed = true;
2707     }
2708   }
2709
2710   // Eliminate redundant IV cycles.
2711   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2712
2713   // Try to eliminate loop exits based on analyzeable exit counts
2714   if (optimizeLoopExits(L, Rewriter))  {
2715     Changed = true;
2716     // Given we've changed exit counts, notify SCEV
2717     SE->forgetLoop(L);
2718   }
2719
2720   // Try to form loop invariant tests for loop exits by changing how many
2721   // iterations of the loop run when that is unobservable.
2722   if (predicateLoopExits(L, Rewriter)) {
2723     Changed = true;
2724     // Given we've changed exit counts, notify SCEV
2725     SE->forgetLoop(L);
2726   }
2727
2728   // If we have a trip count expression, rewrite the loop's exit condition
2729   // using it.
2730   if (!DisableLFTR) {
2731     BasicBlock *PreHeader = L->getLoopPreheader();
2732     BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
2733
2734     SmallVector<BasicBlock*, 16> ExitingBlocks;
2735     L->getExitingBlocks(ExitingBlocks);
2736     for (BasicBlock *ExitingBB : ExitingBlocks) {
2737       // Can't rewrite non-branch yet.
2738       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2739         continue;
2740
2741       // If our exitting block exits multiple loops, we can only rewrite the
2742       // innermost one.  Otherwise, we're changing how many times the innermost
2743       // loop runs before it exits.
2744       if (LI->getLoopFor(ExitingBB) != L)
2745         continue;
2746
2747       if (!needsLFTR(L, ExitingBB))
2748         continue;
2749
2750       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2751       if (isa<SCEVCouldNotCompute>(ExitCount))
2752         continue;
2753
2754       // This was handled above, but as we form SCEVs, we can sometimes refine
2755       // existing ones; this allows exit counts to be folded to zero which
2756       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2757       // until stable to handle cases like this better.
2758       if (ExitCount->isZero())
2759         continue;
2760
2761       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2762       if (!IndVar)
2763         continue;
2764
2765       // Avoid high cost expansions.  Note: This heuristic is questionable in
2766       // that our definition of "high cost" is not exactly principled.
2767       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2768                                        TTI, PreHeaderBR))
2769         continue;
2770
2771       // Check preconditions for proper SCEVExpander operation. SCEV does not
2772       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2773       // any pass that uses the SCEVExpander must do it. This does not work
2774       // well for loop passes because SCEVExpander makes assumptions about
2775       // all loops, while LoopPassManager only forces the current loop to be
2776       // simplified.
2777       //
2778       // FIXME: SCEV expansion has no way to bail out, so the caller must
2779       // explicitly check any assumptions made by SCEV. Brittle.
2780       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2781       if (!AR || AR->getLoop()->getLoopPreheader())
2782         Changed |= linearFunctionTestReplace(L, ExitingBB,
2783                                              ExitCount, IndVar,
2784                                              Rewriter);
2785     }
2786   }
2787   // Clear the rewriter cache, because values that are in the rewriter's cache
2788   // can be deleted in the loop below, causing the AssertingVH in the cache to
2789   // trigger.
2790   Rewriter.clear();
2791
2792   // Now that we're done iterating through lists, clean up any instructions
2793   // which are now dead.
2794   while (!DeadInsts.empty())
2795     if (Instruction *Inst =
2796             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2797       Changed |=
2798           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2799
2800   // The Rewriter may not be used from this point on.
2801
2802   // Loop-invariant instructions in the preheader that aren't used in the
2803   // loop may be sunk below the loop to reduce register pressure.
2804   Changed |= sinkUnusedInvariants(L);
2805
2806   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2807   // trip count and therefore can further simplify exit values in addition to
2808   // rewriteLoopExitValues.
2809   Changed |= rewriteFirstIterationLoopExitValues(L);
2810
2811   // Clean up dead instructions.
2812   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2813
2814   // Check a post-condition.
2815   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2816          "Indvars did not preserve LCSSA!");
2817
2818   // Verify that LFTR, and any other change have not interfered with SCEV's
2819   // ability to compute trip count.  We may have *changed* the exit count, but
2820   // only by reducing it.
2821 #ifndef NDEBUG
2822   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2823     SE->forgetLoop(L);
2824     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2825     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2826         SE->getTypeSizeInBits(NewBECount->getType()))
2827       NewBECount = SE->getTruncateOrNoop(NewBECount,
2828                                          BackedgeTakenCount->getType());
2829     else
2830       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2831                                                  NewBECount->getType());
2832     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2833                                  NewBECount) && "indvars must preserve SCEV");
2834   }
2835   if (VerifyMemorySSA && MSSAU)
2836     MSSAU->getMemorySSA()->verifyMemorySSA();
2837 #endif
2838
2839   return Changed;
2840 }
2841
2842 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2843                                           LoopStandardAnalysisResults &AR,
2844                                           LPMUpdater &) {
2845   Function *F = L.getHeader()->getParent();
2846   const DataLayout &DL = F->getParent()->getDataLayout();
2847
2848   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
2849   if (!IVS.run(&L))
2850     return PreservedAnalyses::all();
2851
2852   auto PA = getLoopPassPreservedAnalyses();
2853   PA.preserveSet<CFGAnalyses>();
2854   if (AR.MSSA)
2855     PA.preserve<MemorySSAAnalysis>();
2856   return PA;
2857 }
2858
2859 namespace {
2860
2861 struct IndVarSimplifyLegacyPass : public LoopPass {
2862   static char ID; // Pass identification, replacement for typeid
2863
2864   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2865     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2866   }
2867
2868   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2869     if (skipLoop(L))
2870       return false;
2871
2872     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2873     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2874     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2875     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2876     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2877     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2878     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2879     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2880     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2881     MemorySSA *MSSA = nullptr;
2882     if (MSSAAnalysis)
2883       MSSA = &MSSAAnalysis->getMSSA();
2884
2885     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
2886     return IVS.run(L);
2887   }
2888
2889   void getAnalysisUsage(AnalysisUsage &AU) const override {
2890     AU.setPreservesCFG();
2891     AU.addPreserved<MemorySSAWrapperPass>();
2892     getLoopAnalysisUsage(AU);
2893   }
2894 };
2895
2896 } // end anonymous namespace
2897
2898 char IndVarSimplifyLegacyPass::ID = 0;
2899
2900 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2901                       "Induction Variable Simplification", false, false)
2902 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2903 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2904                     "Induction Variable Simplification", false, false)
2905
2906 Pass *llvm::createIndVarSimplifyPass() {
2907   return new IndVarSimplifyLegacyPass();
2908 }