contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

   1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "llvm/ADT/DenseMap.h"
  11 #include "llvm/ADT/Sequence.h"
  12 #include "llvm/ADT/SetVector.h"
  13 #include "llvm/ADT/SmallPtrSet.h"
  14 #include "llvm/ADT/SmallVector.h"
  15 #include "llvm/ADT/Statistic.h"
  16 #include "llvm/ADT/STLExtras.h"
  17 #include "llvm/ADT/Twine.h"
  18 #include "llvm/Analysis/AssumptionCache.h"
  19 #include "llvm/Analysis/LoopAnalysisManager.h"
  20 #include "llvm/Analysis/LoopInfo.h"
  21 #include "llvm/Analysis/LoopPass.h"
  22 #include "llvm/IR/BasicBlock.h"
  23 #include "llvm/IR/Constant.h"
  24 #include "llvm/IR/Constants.h"
  25 #include "llvm/IR/Dominators.h"
  26 #include "llvm/IR/Function.h"
  27 #include "llvm/IR/InstrTypes.h"
  28 #include "llvm/IR/Instruction.h"
  29 #include "llvm/IR/Instructions.h"
  30 #include "llvm/IR/Use.h"
  31 #include "llvm/IR/Value.h"
  32 #include "llvm/Pass.h"
  33 #include "llvm/Support/Casting.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Support/GenericDomTree.h"
  37 #include "llvm/Support/raw_ostream.h"
  38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  39 #include "llvm/Transforms/Utils/LoopUtils.h"
  40 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
  41 #include <algorithm>
  42 #include <cassert>
  43 #include <iterator>
  44 #include <utility>
  45
  46 #define DEBUG_TYPE "simple-loop-unswitch"
  47
  48 using namespace llvm;
  49
  50 STATISTIC(NumBranches, "Number of branches unswitched");
  51 STATISTIC(NumSwitches, "Number of switches unswitched");
  52 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
  53
  54 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
  55                                         Constant &Replacement) {
  56   assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
  57
  58   // Replace uses of LIC in the loop with the given constant.
  59   for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
  60     // Grab the use and walk past it so we can clobber it in the use list.
  61     Use *U = &*UI++;
  62     Instruction *UserI = dyn_cast<Instruction>(U->getUser());
  63     if (!UserI || !L.contains(UserI))
  64       continue;
  65
  66     // Replace this use within the loop body.
  67     *U = &Replacement;
  68   }
  69 }
  70
  71 /// Update the dominator tree after removing one exiting predecessor of a loop
  72 /// exit block.
  73 static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
  74                                DominatorTree &DT) {
  75   assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
  76          "Cannot have empty predecessors of the loop exit block if we split "
  77          "off a block to unswitch!");
  78
  79   BasicBlock *IDom = *pred_begin(LoopExitBB);
  80   // Walk all of the other predecessors finding the nearest common dominator
  81   // until all predecessors are covered or we reach the loop header. The loop
  82   // header necessarily dominates all loop exit blocks in loop simplified form
  83   // so we can early-exit the moment we hit that block.
  84   for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB);
  85        PI != PE && IDom != L.getHeader(); ++PI)
  86     IDom = DT.findNearestCommonDominator(IDom, *PI);
  87
  88   DT.changeImmediateDominator(LoopExitBB, IDom);
  89 }
  90
  91 /// Update the dominator tree after unswitching a particular former exit block.
  92 ///
  93 /// This handles the full update of the dominator tree after hoisting a block
  94 /// that previously was an exit block (or split off of an exit block) up to be
  95 /// reached from the new immediate dominator of the preheader.
  96 ///
  97 /// The common case is simple -- we just move the unswitched block to have an
  98 /// immediate dominator of the old preheader. But in complex cases, there may
  99 /// be other blocks reachable from the unswitched block that are immediately
 100 /// dominated by some node between the unswitched one and the old preheader.
 101 /// All of these also need to be hoisted in the dominator tree. We also want to
 102 /// minimize queries to the dominator tree because each step of this
 103 /// invalidates any DFS numbers that would make queries fast.
 104 static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
 105                                   DominatorTree &DT) {
 106   DomTreeNode *OldPHNode = DT[OldPH];
 107   DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
 108   // If the dominator tree has already been updated for this unswitched node,
 109   // we're done. This makes it easier to use this routine if there are multiple
 110   // paths to the same unswitched destination.
 111   if (UnswitchedNode->getIDom() == OldPHNode)
 112     return;
 113
 114   // First collect the domtree nodes that we are hoisting over. These are the
 115   // set of nodes which may have children that need to be hoisted as well.
 116   SmallPtrSet<DomTreeNode *, 4> DomChain;
 117   for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
 118        IDom = IDom->getIDom())
 119     DomChain.insert(IDom);
 120
 121   // The unswitched block ends up immediately dominated by the old preheader --
 122   // regardless of whether it is the loop exit block or split off of the loop
 123   // exit block.
 124   DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
 125
 126   // For everything that moves up the dominator tree, we need to examine the
 127   // dominator frontier to see if it additionally should move up the dominator
 128   // tree. This lambda appends the dominator frontier for a node on the
 129   // worklist.
 130   //
 131   // Note that we don't currently use the IDFCalculator here for two reasons:
 132   // 1) It computes dominator tree levels for the entire function on each run
 133   //    of 'compute'. While this isn't terrible, given that we expect to update
 134   //    relatively small subtrees of the domtree, it isn't necessarily the right
 135   //    tradeoff.
 136   // 2) The interface doesn't fit this usage well. It doesn't operate in
 137   //    append-only, and builds several sets that we don't need.
 138   //
 139   // FIXME: Neither of these issues are a big deal and could be addressed with
 140   // some amount of refactoring of IDFCalculator. That would allow us to share
 141   // the core logic here (which is solving the same core problem).
 142   SmallSetVector<BasicBlock *, 4> Worklist;
 143   SmallVector<DomTreeNode *, 4> DomNodes;
 144   SmallPtrSet<BasicBlock *, 4> DomSet;
 145   auto AppendDomFrontier = [&](DomTreeNode *Node) {
 146     assert(DomNodes.empty() && "Must start with no dominator nodes.");
 147     assert(DomSet.empty() && "Must start with an empty dominator set.");
 148
 149     // First flatten this subtree into sequence of nodes by doing a pre-order
 150     // walk.
 151     DomNodes.push_back(Node);
 152     // We intentionally re-evaluate the size as each node can add new children.
 153     // Because this is a tree walk, this cannot add any duplicates.
 154     for (int i = 0; i < (int)DomNodes.size(); ++i)
 155       DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
 156
 157     // Now create a set of the basic blocks so we can quickly test for
 158     // dominated successors. We could in theory use the DFS numbers of the
 159     // dominator tree for this, but we want this to remain predictably fast
 160     // even while we mutate the dominator tree in ways that would invalidate
 161     // the DFS numbering.
 162     for (DomTreeNode *InnerN : DomNodes)
 163       DomSet.insert(InnerN->getBlock());
 164
 165     // Now re-walk the nodes, appending every successor of every node that isn't
 166     // in the set. Note that we don't append the node itself, even though if it
 167     // is a successor it does not strictly dominate itself and thus it would be
 168     // part of the dominance frontier. The reason we don't append it is that
 169     // the node passed in came *from* the worklist and so it has already been
 170     // processed.
 171     for (DomTreeNode *InnerN : DomNodes)
 172       for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
 173         if (!DomSet.count(SuccBB))
 174           Worklist.insert(SuccBB);
 175
 176     DomNodes.clear();
 177     DomSet.clear();
 178   };
 179
 180   // Append the initial dom frontier nodes.
 181   AppendDomFrontier(UnswitchedNode);
 182
 183   // Walk the worklist. We grow the list in the loop and so must recompute size.
 184   for (int i = 0; i < (int)Worklist.size(); ++i) {
 185     auto *BB = Worklist[i];
 186
 187     DomTreeNode *Node = DT[BB];
 188     assert(!DomChain.count(Node) &&
 189            "Cannot be dominated by a block you can reach!");
 190
 191     // If this block had an immediate dominator somewhere in the chain
 192     // we hoisted over, then its position in the domtree needs to move as it is
 193     // reachable from a node hoisted over this chain.
 194     if (!DomChain.count(Node->getIDom()))
 195       continue;
 196
 197     DT.changeImmediateDominator(Node, OldPHNode);
 198
 199     // Now add this node's dominator frontier to the worklist as well.
 200     AppendDomFrontier(Node);
 201   }
 202 }
 203
 204 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial
 205 /// incoming values along this edge.
 206 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
 207                                          BasicBlock &ExitBB) {
 208   for (Instruction &I : ExitBB) {
 209     auto *PN = dyn_cast<PHINode>(&I);
 210     if (!PN)
 211       // No more PHIs to check.
 212       return true;
 213
 214     // If the incoming value for this edge isn't loop invariant the unswitch
 215     // won't be trivial.
 216     if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
 217       return false;
 218   }
 219   llvm_unreachable("Basic blocks should never be empty!");
 220 }
 221
 222 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
 223 ///
 224 /// Requires that the loop exit and unswitched basic block are the same, and
 225 /// that the exiting block was a unique predecessor of that block. Rewrites the
 226 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
 227 /// PHI nodes from the old preheader that now contains the unswitched
 228 /// terminator.
 229 static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
 230                                                   BasicBlock &OldExitingBB,
 231                                                   BasicBlock &OldPH) {
 232   for (Instruction &I : UnswitchedBB) {
 233     auto *PN = dyn_cast<PHINode>(&I);
 234     if (!PN)
 235       // No more PHIs to check.
 236       break;
 237
 238     // When the loop exit is directly unswitched we just need to update the
 239     // incoming basic block. We loop to handle weird cases with repeated
 240     // incoming blocks, but expect to typically only have one operand here.
 241     for (auto i : seq<int>(0, PN->getNumOperands())) {
 242       assert(PN->getIncomingBlock(i) == &OldExitingBB &&
 243              "Found incoming block different from unique predecessor!");
 244       PN->setIncomingBlock(i, &OldPH);
 245     }
 246   }
 247 }
 248
 249 /// Rewrite the PHI nodes in the loop exit basic block and the split off
 250 /// unswitched block.
 251 ///
 252 /// Because the exit block remains an exit from the loop, this rewrites the
 253 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
 254 /// nodes into the unswitched basic block to select between the value in the
 255 /// old preheader and the loop exit.
 256 static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
 257                                                       BasicBlock &UnswitchedBB,
 258                                                       BasicBlock &OldExitingBB,
 259                                                       BasicBlock &OldPH) {
 260   assert(&ExitBB != &UnswitchedBB &&
 261          "Must have different loop exit and unswitched blocks!");
 262   Instruction *InsertPt = &*UnswitchedBB.begin();
 263   for (Instruction &I : ExitBB) {
 264     auto *PN = dyn_cast<PHINode>(&I);
 265     if (!PN)
 266       // No more PHIs to check.
 267       break;
 268
 269     auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,
 270                                   PN->getName() + ".split", InsertPt);
 271
 272     // Walk backwards over the old PHI node's inputs to minimize the cost of
 273     // removing each one. We have to do this weird loop manually so that we
 274     // create the same number of new incoming edges in the new PHI as we expect
 275     // each case-based edge to be included in the unswitched switch in some
 276     // cases.
 277     // FIXME: This is really, really gross. It would be much cleaner if LLVM
 278     // allowed us to create a single entry for a predecessor block without
 279     // having separate entries for each "edge" even though these edges are
 280     // required to produce identical results.
 281     for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
 282       if (PN->getIncomingBlock(i) != &OldExitingBB)
 283         continue;
 284
 285       Value *Incoming = PN->removeIncomingValue(i);
 286       NewPN->addIncoming(Incoming, &OldPH);
 287     }
 288
 289     // Now replace the old PHI with the new one and wire the old one in as an
 290     // input to the new one.
 291     PN->replaceAllUsesWith(NewPN);
 292     NewPN->addIncoming(PN, &ExitBB);
 293   }
 294 }
 295
 296 /// Unswitch a trivial branch if the condition is loop invariant.
 297 ///
 298 /// This routine should only be called when loop code leading to the branch has
 299 /// been validated as trivial (no side effects). This routine checks if the
 300 /// condition is invariant and one of the successors is a loop exit. This
 301 /// allows us to unswitch without duplicating the loop, making it trivial.
 302 ///
 303 /// If this routine fails to unswitch the branch it returns false.
 304 ///
 305 /// If the branch can be unswitched, this routine splits the preheader and
 306 /// hoists the branch above that split. Preserves loop simplified form
 307 /// (splitting the exit block as necessary). It simplifies the branch within
 308 /// the loop to an unconditional branch but doesn't remove it entirely. Further
 309 /// cleanup can be done with some simplify-cfg like pass.
 310 static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
 311                                   LoopInfo &LI) {
 312   assert(BI.isConditional() && "Can only unswitch a conditional branch!");
 313   DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n");
 314
 315   Value *LoopCond = BI.getCondition();
 316
 317   // Need a trivial loop condition to unswitch.
 318   if (!L.isLoopInvariant(LoopCond))
 319     return false;
 320
 321   // FIXME: We should compute this once at the start and update it!
 322   SmallVector<BasicBlock *, 16> ExitBlocks;
 323   L.getExitBlocks(ExitBlocks);
 324   SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
 325                                              ExitBlocks.end());
 326
 327   // Check to see if a successor of the branch is guaranteed to
 328   // exit through a unique exit block without having any
 329   // side-effects.  If so, determine the value of Cond that causes
 330   // it to do this.
 331   ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
 332   ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
 333   int LoopExitSuccIdx = 0;
 334   auto *LoopExitBB = BI.getSuccessor(0);
 335   if (!ExitBlockSet.count(LoopExitBB)) {
 336     std::swap(CondVal, Replacement);
 337     LoopExitSuccIdx = 1;
 338     LoopExitBB = BI.getSuccessor(1);
 339     if (!ExitBlockSet.count(LoopExitBB))
 340       return false;
 341   }
 342   auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
 343   assert(L.contains(ContinueBB) &&
 344          "Cannot have both successors exit and still be in the loop!");
 345
 346   auto *ParentBB = BI.getParent();
 347   if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB))
 348     return false;
 349
 350   DEBUG(dbgs() << "    unswitching trivial branch when: " << CondVal
 351                << " == " << LoopCond << "\n");
 352
 353   // Split the preheader, so that we know that there is a safe place to insert
 354   // the conditional branch. We will change the preheader to have a conditional
 355   // branch on LoopCond.
 356   BasicBlock *OldPH = L.getLoopPreheader();
 357   BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
 358
 359   // Now that we have a place to insert the conditional branch, create a place
 360   // to branch to: this is the exit block out of the loop that we are
 361   // unswitching. We need to split this if there are other loop predecessors.
 362   // Because the loop is in simplified form, *any* other predecessor is enough.
 363   BasicBlock *UnswitchedBB;
 364   if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
 365     (void)PredBB;
 366     assert(PredBB == BI.getParent() &&
 367            "A branch's parent isn't a predecessor!");
 368     UnswitchedBB = LoopExitBB;
 369   } else {
 370     UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
 371   }
 372
 373   // Now splice the branch to gate reaching the new preheader and re-point its
 374   // successors.
 375   OldPH->getInstList().splice(std::prev(OldPH->end()),
 376                               BI.getParent()->getInstList(), BI);
 377   OldPH->getTerminator()->eraseFromParent();
 378   BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
 379   BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
 380
 381   // Create a new unconditional branch that will continue the loop as a new
 382   // terminator.
 383   BranchInst::Create(ContinueBB, ParentBB);
 384
 385   // Rewrite the relevant PHI nodes.
 386   if (UnswitchedBB == LoopExitBB)
 387     rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
 388   else
 389     rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
 390                                               *ParentBB, *OldPH);
 391
 392   // Now we need to update the dominator tree.
 393   updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
 394   // But if we split something off of the loop exit block then we also removed
 395   // one of the predecessors for the loop exit block and may need to update its
 396   // idom.
 397   if (UnswitchedBB != LoopExitBB)
 398     updateLoopExitIDom(LoopExitBB, L, DT);
 399
 400   // Since this is an i1 condition we can also trivially replace uses of it
 401   // within the loop with a constant.
 402   replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
 403
 404   ++NumTrivial;
 405   ++NumBranches;
 406   return true;
 407 }
 408
 409 /// Unswitch a trivial switch if the condition is loop invariant.
 410 ///
 411 /// This routine should only be called when loop code leading to the switch has
 412 /// been validated as trivial (no side effects). This routine checks if the
 413 /// condition is invariant and that at least one of the successors is a loop
 414 /// exit. This allows us to unswitch without duplicating the loop, making it
 415 /// trivial.
 416 ///
 417 /// If this routine fails to unswitch the switch it returns false.
 418 ///
 419 /// If the switch can be unswitched, this routine splits the preheader and
 420 /// copies the switch above that split. If the default case is one of the
 421 /// exiting cases, it copies the non-exiting cases and points them at the new
 422 /// preheader. If the default case is not exiting, it copies the exiting cases
 423 /// and points the default at the preheader. It preserves loop simplified form
 424 /// (splitting the exit blocks as necessary). It simplifies the switch within
 425 /// the loop by removing now-dead cases. If the default case is one of those
 426 /// unswitched, it replaces its destination with a new basic block containing
 427 /// only unreachable. Such basic blocks, while technically loop exits, are not
 428 /// considered for unswitching so this is a stable transform and the same
 429 /// switch will not be revisited. If after unswitching there is only a single
 430 /// in-loop successor, the switch is further simplified to an unconditional
 431 /// branch. Still more cleanup can be done with some simplify-cfg like pass.
 432 static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
 433                                   LoopInfo &LI) {
 434   DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n");
 435   Value *LoopCond = SI.getCondition();
 436
 437   // If this isn't switching on an invariant condition, we can't unswitch it.
 438   if (!L.isLoopInvariant(LoopCond))
 439     return false;
 440
 441   auto *ParentBB = SI.getParent();
 442
 443   // FIXME: We should compute this once at the start and update it!
 444   SmallVector<BasicBlock *, 16> ExitBlocks;
 445   L.getExitBlocks(ExitBlocks);
 446   SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
 447                                              ExitBlocks.end());
 448
 449   SmallVector<int, 4> ExitCaseIndices;
 450   for (auto Case : SI.cases()) {
 451     auto *SuccBB = Case.getCaseSuccessor();
 452     if (ExitBlockSet.count(SuccBB) &&
 453         areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB))
 454       ExitCaseIndices.push_back(Case.getCaseIndex());
 455   }
 456   BasicBlock *DefaultExitBB = nullptr;
 457   if (ExitBlockSet.count(SI.getDefaultDest()) &&
 458       areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
 459       !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
 460     DefaultExitBB = SI.getDefaultDest();
 461   else if (ExitCaseIndices.empty())
 462     return false;
 463
 464   DEBUG(dbgs() << "    unswitching trivial cases...\n");
 465
 466   SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
 467   ExitCases.reserve(ExitCaseIndices.size());
 468   // We walk the case indices backwards so that we remove the last case first
 469   // and don't disrupt the earlier indices.
 470   for (unsigned Index : reverse(ExitCaseIndices)) {
 471     auto CaseI = SI.case_begin() + Index;
 472     // Save the value of this case.
 473     ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
 474     // Delete the unswitched cases.
 475     SI.removeCase(CaseI);
 476   }
 477
 478   // Check if after this all of the remaining cases point at the same
 479   // successor.
 480   BasicBlock *CommonSuccBB = nullptr;
 481   if (SI.getNumCases() > 0 &&
 482       std::all_of(std::next(SI.case_begin()), SI.case_end(),
 483                   [&SI](const SwitchInst::CaseHandle &Case) {
 484                     return Case.getCaseSuccessor() ==
 485                            SI.case_begin()->getCaseSuccessor();
 486                   }))
 487     CommonSuccBB = SI.case_begin()->getCaseSuccessor();
 488
 489   if (DefaultExitBB) {
 490     // We can't remove the default edge so replace it with an edge to either
 491     // the single common remaining successor (if we have one) or an unreachable
 492     // block.
 493     if (CommonSuccBB) {
 494       SI.setDefaultDest(CommonSuccBB);
 495     } else {
 496       BasicBlock *UnreachableBB = BasicBlock::Create(
 497           ParentBB->getContext(),
 498           Twine(ParentBB->getName()) + ".unreachable_default",
 499           ParentBB->getParent());
 500       new UnreachableInst(ParentBB->getContext(), UnreachableBB);
 501       SI.setDefaultDest(UnreachableBB);
 502       DT.addNewBlock(UnreachableBB, ParentBB);
 503     }
 504   } else {
 505     // If we're not unswitching the default, we need it to match any cases to
 506     // have a common successor or if we have no cases it is the common
 507     // successor.
 508     if (SI.getNumCases() == 0)
 509       CommonSuccBB = SI.getDefaultDest();
 510     else if (SI.getDefaultDest() != CommonSuccBB)
 511       CommonSuccBB = nullptr;
 512   }
 513
 514   // Split the preheader, so that we know that there is a safe place to insert
 515   // the switch.
 516   BasicBlock *OldPH = L.getLoopPreheader();
 517   BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
 518   OldPH->getTerminator()->eraseFromParent();
 519
 520   // Now add the unswitched switch.
 521   auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
 522
 523   // Rewrite the IR for the unswitched basic blocks. This requires two steps.
 524   // First, we split any exit blocks with remaining in-loop predecessors. Then
 525   // we update the PHIs in one of two ways depending on if there was a split.
 526   // We walk in reverse so that we split in the same order as the cases
 527   // appeared. This is purely for convenience of reading the resulting IR, but
 528   // it doesn't cost anything really.
 529   SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
 530   SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
 531   // Handle the default exit if necessary.
 532   // FIXME: It'd be great if we could merge this with the loop below but LLVM's
 533   // ranges aren't quite powerful enough yet.
 534   if (DefaultExitBB) {
 535     if (pred_empty(DefaultExitBB)) {
 536       UnswitchedExitBBs.insert(DefaultExitBB);
 537       rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
 538     } else {
 539       auto *SplitBB =
 540           SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
 541       rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
 542                                                 *ParentBB, *OldPH);
 543       updateLoopExitIDom(DefaultExitBB, L, DT);
 544       DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
 545     }
 546   }
 547   // Note that we must use a reference in the for loop so that we update the
 548   // container.
 549   for (auto &CasePair : reverse(ExitCases)) {
 550     // Grab a reference to the exit block in the pair so that we can update it.
 551     BasicBlock *ExitBB = CasePair.second;
 552
 553     // If this case is the last edge into the exit block, we can simply reuse it
 554     // as it will no longer be a loop exit. No mapping necessary.
 555     if (pred_empty(ExitBB)) {
 556       // Only rewrite once.
 557       if (UnswitchedExitBBs.insert(ExitBB).second)
 558         rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
 559       continue;
 560     }
 561
 562     // Otherwise we need to split the exit block so that we retain an exit
 563     // block from the loop and a target for the unswitched condition.
 564     BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
 565     if (!SplitExitBB) {
 566       // If this is the first time we see this, do the split and remember it.
 567       SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
 568       rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
 569                                                 *ParentBB, *OldPH);
 570       updateLoopExitIDom(ExitBB, L, DT);
 571     }
 572     // Update the case pair to point to the split block.
 573     CasePair.second = SplitExitBB;
 574   }
 575
 576   // Now add the unswitched cases. We do this in reverse order as we built them
 577   // in reverse order.
 578   for (auto CasePair : reverse(ExitCases)) {
 579     ConstantInt *CaseVal = CasePair.first;
 580     BasicBlock *UnswitchedBB = CasePair.second;
 581
 582     NewSI->addCase(CaseVal, UnswitchedBB);
 583     updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
 584   }
 585
 586   // If the default was unswitched, re-point it and add explicit cases for
 587   // entering the loop.
 588   if (DefaultExitBB) {
 589     NewSI->setDefaultDest(DefaultExitBB);
 590     updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
 591
 592     // We removed all the exit cases, so we just copy the cases to the
 593     // unswitched switch.
 594     for (auto Case : SI.cases())
 595       NewSI->addCase(Case.getCaseValue(), NewPH);
 596   }
 597
 598   // If we ended up with a common successor for every path through the switch
 599   // after unswitching, rewrite it to an unconditional branch to make it easy
 600   // to recognize. Otherwise we potentially have to recognize the default case
 601   // pointing at unreachable and other complexity.
 602   if (CommonSuccBB) {
 603     BasicBlock *BB = SI.getParent();
 604     SI.eraseFromParent();
 605     BranchInst::Create(CommonSuccBB, BB);
 606   }
 607
 608   DT.verifyDomTree();
 609   ++NumTrivial;
 610   ++NumSwitches;
 611   return true;
 612 }
 613
 614 /// This routine scans the loop to find a branch or switch which occurs before
 615 /// any side effects occur. These can potentially be unswitched without
 616 /// duplicating the loop. If a branch or switch is successfully unswitched the
 617 /// scanning continues to see if subsequent branches or switches have become
 618 /// trivial. Once all trivial candidates have been unswitched, this routine
 619 /// returns.
 620 ///
 621 /// The return value indicates whether anything was unswitched (and therefore
 622 /// changed).
 623 static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
 624                                          LoopInfo &LI) {
 625   bool Changed = false;
 626
 627   // If loop header has only one reachable successor we should keep looking for
 628   // trivial condition candidates in the successor as well. An alternative is
 629   // to constant fold conditions and merge successors into loop header (then we
 630   // only need to check header's terminator). The reason for not doing this in
 631   // LoopUnswitch pass is that it could potentially break LoopPassManager's
 632   // invariants. Folding dead branches could either eliminate the current loop
 633   // or make other loops unreachable. LCSSA form might also not be preserved
 634   // after deleting branches. The following code keeps traversing loop header's
 635   // successors until it finds the trivial condition candidate (condition that
 636   // is not a constant). Since unswitching generates branches with constant
 637   // conditions, this scenario could be very common in practice.
 638   BasicBlock *CurrentBB = L.getHeader();
 639   SmallPtrSet<BasicBlock *, 8> Visited;
 640   Visited.insert(CurrentBB);
 641   do {
 642     // Check if there are any side-effecting instructions (e.g. stores, calls,
 643     // volatile loads) in the part of the loop that the code *would* execute
 644     // without unswitching.
 645     if (llvm::any_of(*CurrentBB,
 646                      [](Instruction &I) { return I.mayHaveSideEffects(); }))
 647       return Changed;
 648
 649     TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
 650
 651     if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
 652       // Don't bother trying to unswitch past a switch with a constant
 653       // condition. This should be removed prior to running this pass by
 654       // simplify-cfg.
 655       if (isa<Constant>(SI->getCondition()))
 656         return Changed;
 657
 658       if (!unswitchTrivialSwitch(L, *SI, DT, LI))
 659         // Coludn't unswitch this one so we're done.
 660         return Changed;
 661
 662       // Mark that we managed to unswitch something.
 663       Changed = true;
 664
 665       // If unswitching turned the terminator into an unconditional branch then
 666       // we can continue. The unswitching logic specifically works to fold any
 667       // cases it can into an unconditional branch to make it easier to
 668       // recognize here.
 669       auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
 670       if (!BI || BI->isConditional())
 671         return Changed;
 672
 673       CurrentBB = BI->getSuccessor(0);
 674       continue;
 675     }
 676
 677     auto *BI = dyn_cast<BranchInst>(CurrentTerm);
 678     if (!BI)
 679       // We do not understand other terminator instructions.
 680       return Changed;
 681
 682     // Don't bother trying to unswitch past an unconditional branch or a branch
 683     // with a constant value. These should be removed by simplify-cfg prior to
 684     // running this pass.
 685     if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
 686       return Changed;
 687
 688     // Found a trivial condition candidate: non-foldable conditional branch. If
 689     // we fail to unswitch this, we can't do anything else that is trivial.
 690     if (!unswitchTrivialBranch(L, *BI, DT, LI))
 691       return Changed;
 692
 693     // Mark that we managed to unswitch something.
 694     Changed = true;
 695
 696     // We unswitched the branch. This should always leave us with an
 697     // unconditional branch that we can follow now.
 698     BI = cast<BranchInst>(CurrentBB->getTerminator());
 699     assert(!BI->isConditional() &&
 700            "Cannot form a conditional branch by unswitching1");
 701     CurrentBB = BI->getSuccessor(0);
 702
 703     // When continuing, if we exit the loop or reach a previous visited block,
 704     // then we can not reach any trivial condition candidates (unfoldable
 705     // branch instructions or switch instructions) and no unswitch can happen.
 706   } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
 707
 708   return Changed;
 709 }
 710
 711 /// Unswitch control flow predicated on loop invariant conditions.
 712 ///
 713 /// This first hoists all branches or switches which are trivial (IE, do not
 714 /// require duplicating any part of the loop) out of the loop body. It then
 715 /// looks at other loop invariant control flows and tries to unswitch those as
 716 /// well by cloning the loop if the result is small enough.
 717 static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
 718                          AssumptionCache &AC) {
 719   assert(L.isLCSSAForm(DT) &&
 720          "Loops must be in LCSSA form before unswitching.");
 721   bool Changed = false;
 722
 723   // Must be in loop simplified form: we need a preheader and dedicated exits.
 724   if (!L.isLoopSimplifyForm())
 725     return false;
 726
 727   // Try trivial unswitch first before loop over other basic blocks in the loop.
 728   Changed |= unswitchAllTrivialConditions(L, DT, LI);
 729
 730   // FIXME: Add support for non-trivial unswitching by cloning the loop.
 731
 732   return Changed;
 733 }
 734
 735 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
 736                                               LoopStandardAnalysisResults &AR,
 737                                               LPMUpdater &U) {
 738   Function &F = *L.getHeader()->getParent();
 739   (void)F;
 740
 741   DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
 742
 743   if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC))
 744     return PreservedAnalyses::all();
 745
 746 #ifndef NDEBUG
 747   // Historically this pass has had issues with the dominator tree so verify it
 748   // in asserts builds.
 749   AR.DT.verifyDomTree();
 750 #endif
 751   return getLoopPassPreservedAnalyses();
 752 }
 753
 754 namespace {
 755
 756 class SimpleLoopUnswitchLegacyPass : public LoopPass {
 757 public:
 758   static char ID; // Pass ID, replacement for typeid
 759
 760   explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
 761     initializeSimpleLoopUnswitchLegacyPassPass(
 762         *PassRegistry::getPassRegistry());
 763   }
 764
 765   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
 766
 767   void getAnalysisUsage(AnalysisUsage &AU) const override {
 768     AU.addRequired<AssumptionCacheTracker>();
 769     getLoopAnalysisUsage(AU);
 770   }
 771 };
 772
 773 } // end anonymous namespace
 774
 775 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
 776   if (skipLoop(L))
 777     return false;
 778
 779   Function &F = *L->getHeader()->getParent();
 780
 781   DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
 782
 783   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 784   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 785   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 786
 787   bool Changed = unswitchLoop(*L, DT, LI, AC);
 788
 789 #ifndef NDEBUG
 790   // Historically this pass has had issues with the dominator tree so verify it
 791   // in asserts builds.
 792   DT.verifyDomTree();
 793 #endif
 794   return Changed;
 795 }
 796
 797 char SimpleLoopUnswitchLegacyPass::ID = 0;
 798 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
 799                       "Simple unswitch loops", false, false)
 800 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 801 INITIALIZE_PASS_DEPENDENCY(LoopPass)
 802 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 803 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
 804                     "Simple unswitch loops", false, false)
 805
 806 Pass *llvm::createSimpleLoopUnswitchLegacyPass() {
 807   return new SimpleLoopUnswitchLegacyPass();
 808 }