contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp

   1 //===- GVNSink.cpp - sink expressions into successors -------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file GVNSink.cpp
  11 /// This pass attempts to sink instructions into successors, reducing static
  12 /// instruction count and enabling if-conversion.
  13 ///
  14 /// We use a variant of global value numbering to decide what can be sunk.
  15 /// Consider:
  16 ///
  17 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
  18 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
  19 ///                  \           /
  20 ///            [ %e = phi i32 %a2, %c2 ]
  21 ///            [ add i32 %e, 4         ]
  22 ///
  23 ///
  24 /// GVN would number %a1 and %c1 differently because they compute different
  25 /// results - the VN of an instruction is a function of its opcode and the
  26 /// transitive closure of its operands. This is the key property for hoisting
  27 /// and CSE.
  28 ///
  29 /// What we want when sinking however is for a numbering that is a function of
  30 /// the *uses* of an instruction, which allows us to answer the question "if I
  31 /// replace %a1 with %c1, will it contribute in an equivalent way to all
  32 /// successive instructions?". The PostValueTable class in GVN provides this
  33 /// mapping.
  34 ///
  35 //===----------------------------------------------------------------------===//
  36
  37 #include "llvm/ADT/DenseMap.h"
  38 #include "llvm/ADT/DenseMapInfo.h"
  39 #include "llvm/ADT/DenseSet.h"
  40 #include "llvm/ADT/Hashing.h"
  41 #include "llvm/ADT/Optional.h"
  42 #include "llvm/ADT/PostOrderIterator.h"
  43 #include "llvm/ADT/SCCIterator.h"
  44 #include "llvm/ADT/SmallPtrSet.h"
  45 #include "llvm/ADT/Statistic.h"
  46 #include "llvm/ADT/StringExtras.h"
  47 #include "llvm/Analysis/GlobalsModRef.h"
  48 #include "llvm/Analysis/MemorySSA.h"
  49 #include "llvm/Analysis/PostDominators.h"
  50 #include "llvm/Analysis/TargetTransformInfo.h"
  51 #include "llvm/Analysis/ValueTracking.h"
  52 #include "llvm/IR/Instructions.h"
  53 #include "llvm/IR/Verifier.h"
  54 #include "llvm/Support/MathExtras.h"
  55 #include "llvm/Transforms/Scalar.h"
  56 #include "llvm/Transforms/Scalar/GVN.h"
  57 #include "llvm/Transforms/Scalar/GVNExpression.h"
  58 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  59 #include "llvm/Transforms/Utils/Local.h"
  60 #include <unordered_set>
  61 using namespace llvm;
  62
  63 #define DEBUG_TYPE "gvn-sink"
  64
  65 STATISTIC(NumRemoved, "Number of instructions removed");
  66
  67 namespace llvm {
  68 namespace GVNExpression {
  69
  70 LLVM_DUMP_METHOD void Expression::dump() const {
  71   print(dbgs());
  72   dbgs() << "\n";
  73 }
  74
  75 }
  76 }
  77
  78 namespace {
  79
  80 static bool isMemoryInst(const Instruction *I) {
  81   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
  82          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
  83          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
  84 }
  85
  86 /// Iterates through instructions in a set of blocks in reverse order from the
  87 /// first non-terminator. For example (assume all blocks have size n):
  88 ///   LockstepReverseIterator I([B1, B2, B3]);
  89 ///   *I-- = [B1[n], B2[n], B3[n]];
  90 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
  91 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
  92 ///   ...
  93 ///
  94 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
  95 /// to
  96 /// determine which blocks are still going and the order they appear in the
  97 /// list returned by operator*.
  98 class LockstepReverseIterator {
  99   ArrayRef<BasicBlock *> Blocks;
 100   SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
 101   SmallVector<Instruction *, 4> Insts;
 102   bool Fail;
 103
 104 public:
 105   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
 106     reset();
 107   }
 108
 109   void reset() {
 110     Fail = false;
 111     ActiveBlocks.clear();
 112     for (BasicBlock *BB : Blocks)
 113       ActiveBlocks.insert(BB);
 114     Insts.clear();
 115     for (BasicBlock *BB : Blocks) {
 116       if (BB->size() <= 1) {
 117         // Block wasn't big enough - only contained a terminator.
 118         ActiveBlocks.erase(BB);
 119         continue;
 120       }
 121       Insts.push_back(BB->getTerminator()->getPrevNode());
 122     }
 123     if (Insts.empty())
 124       Fail = true;
 125   }
 126
 127   bool isValid() const { return !Fail; }
 128   ArrayRef<Instruction *> operator*() const { return Insts; }
 129   SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
 130
 131   void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
 132     for (auto II = Insts.begin(); II != Insts.end();) {
 133       if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
 134           Blocks.end()) {
 135         ActiveBlocks.erase((*II)->getParent());
 136         II = Insts.erase(II);
 137       } else {
 138         ++II;
 139       }
 140     }
 141   }
 142
 143   void operator--() {
 144     if (Fail)
 145       return;
 146     SmallVector<Instruction *, 4> NewInsts;
 147     for (auto *Inst : Insts) {
 148       if (Inst == &Inst->getParent()->front())
 149         ActiveBlocks.erase(Inst->getParent());
 150       else
 151         NewInsts.push_back(Inst->getPrevNode());
 152     }
 153     if (NewInsts.empty()) {
 154       Fail = true;
 155       return;
 156     }
 157     Insts = NewInsts;
 158   }
 159 };
 160
 161 //===----------------------------------------------------------------------===//
 162
 163 /// Candidate solution for sinking. There may be different ways to
 164 /// sink instructions, differing in the number of instructions sunk,
 165 /// the number of predecessors sunk from and the number of PHIs
 166 /// required.
 167 struct SinkingInstructionCandidate {
 168   unsigned NumBlocks;
 169   unsigned NumInstructions;
 170   unsigned NumPHIs;
 171   unsigned NumMemoryInsts;
 172   int Cost = -1;
 173   SmallVector<BasicBlock *, 4> Blocks;
 174
 175   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
 176     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
 177     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
 178     Cost = (NumInstructions * (NumBlocks - 1)) -
 179            (NumExtraPHIs *
 180             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
 181            - SplitEdgeCost;
 182   }
 183   bool operator>(const SinkingInstructionCandidate &Other) const {
 184     return Cost > Other.Cost;
 185   }
 186 };
 187
 188 #ifndef NDEBUG
 189 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
 190                               const SinkingInstructionCandidate &C) {
 191   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
 192      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
 193   return OS;
 194 }
 195 #endif
 196
 197 //===----------------------------------------------------------------------===//
 198
 199 /// Describes a PHI node that may or may not exist. These track the PHIs
 200 /// that must be created if we sunk a sequence of instructions. It provides
 201 /// a hash function for efficient equality comparisons.
 202 class ModelledPHI {
 203   SmallVector<Value *, 4> Values;
 204   SmallVector<BasicBlock *, 4> Blocks;
 205
 206 public:
 207   ModelledPHI() {}
 208   ModelledPHI(const PHINode *PN) {
 209     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
 210       Blocks.push_back(PN->getIncomingBlock(I));
 211     std::sort(Blocks.begin(), Blocks.end());
 212
 213     // This assumes the PHI is already well-formed and there aren't conflicting
 214     // incoming values for the same block.
 215     for (auto *B : Blocks)
 216       Values.push_back(PN->getIncomingValueForBlock(B));
 217   }
 218   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
 219   /// without the same ID.
 220   /// \note This is specifically for DenseMapInfo - do not use this!
 221   static ModelledPHI createDummy(size_t ID) {
 222     ModelledPHI M;
 223     M.Values.push_back(reinterpret_cast<Value*>(ID));
 224     return M;
 225   }
 226
 227   /// Create a PHI from an array of incoming values and incoming blocks.
 228   template <typename VArray, typename BArray>
 229   ModelledPHI(const VArray &V, const BArray &B) {
 230     std::copy(V.begin(), V.end(), std::back_inserter(Values));
 231     std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
 232   }
 233
 234   /// Create a PHI from [I[OpNum] for I in Insts].
 235   template <typename BArray>
 236   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
 237     std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
 238     for (auto *I : Insts)
 239       Values.push_back(I->getOperand(OpNum));
 240   }
 241
 242   /// Restrict the PHI's contents down to only \c NewBlocks.
 243   /// \c NewBlocks must be a subset of \c this->Blocks.
 244   void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
 245     auto BI = Blocks.begin();
 246     auto VI = Values.begin();
 247     while (BI != Blocks.end()) {
 248       assert(VI != Values.end());
 249       if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
 250           NewBlocks.end()) {
 251         BI = Blocks.erase(BI);
 252         VI = Values.erase(VI);
 253       } else {
 254         ++BI;
 255         ++VI;
 256       }
 257     }
 258     assert(Blocks.size() == NewBlocks.size());
 259   }
 260
 261   ArrayRef<Value *> getValues() const { return Values; }
 262
 263   bool areAllIncomingValuesSame() const {
 264     return all_of(Values, [&](Value *V) { return V == Values[0]; });
 265   }
 266   bool areAllIncomingValuesSameType() const {
 267     return all_of(
 268         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
 269   }
 270   bool areAnyIncomingValuesConstant() const {
 271     return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
 272   }
 273   // Hash functor
 274   unsigned hash() const {
 275       return (unsigned)hash_combine_range(Values.begin(), Values.end());
 276   }
 277   bool operator==(const ModelledPHI &Other) const {
 278     return Values == Other.Values && Blocks == Other.Blocks;
 279   }
 280 };
 281
 282 template <typename ModelledPHI> struct DenseMapInfo {
 283   static inline ModelledPHI &getEmptyKey() {
 284     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
 285     return Dummy;
 286   }
 287   static inline ModelledPHI &getTombstoneKey() {
 288     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
 289     return Dummy;
 290   }
 291   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
 292   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
 293     return LHS == RHS;
 294   }
 295 };
 296
 297 typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
 298
 299 //===----------------------------------------------------------------------===//
 300 //                             ValueTable
 301 //===----------------------------------------------------------------------===//
 302 // This is a value number table where the value number is a function of the
 303 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
 304 // that the program would be equivalent if we replaced A with PHI(A, B).
 305 //===----------------------------------------------------------------------===//
 306
 307 /// A GVN expression describing how an instruction is used. The operands
 308 /// field of BasicExpression is used to store uses, not operands.
 309 ///
 310 /// This class also contains fields for discriminators used when determining
 311 /// equivalence of instructions with sideeffects.
 312 class InstructionUseExpr : public GVNExpression::BasicExpression {
 313   unsigned MemoryUseOrder = -1;
 314   bool Volatile = false;
 315
 316 public:
 317   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
 318                      BumpPtrAllocator &A)
 319       : GVNExpression::BasicExpression(I->getNumUses()) {
 320     allocateOperands(R, A);
 321     setOpcode(I->getOpcode());
 322     setType(I->getType());
 323
 324     for (auto &U : I->uses())
 325       op_push_back(U.getUser());
 326     std::sort(op_begin(), op_end());
 327   }
 328   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
 329   void setVolatile(bool V) { Volatile = V; }
 330
 331   virtual hash_code getHashValue() const {
 332     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
 333                         MemoryUseOrder, Volatile);
 334   }
 335
 336   template <typename Function> hash_code getHashValue(Function MapFn) {
 337     hash_code H =
 338         hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
 339     for (auto *V : operands())
 340       H = hash_combine(H, MapFn(V));
 341     return H;
 342   }
 343 };
 344
 345 class ValueTable {
 346   DenseMap<Value *, uint32_t> ValueNumbering;
 347   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
 348   DenseMap<size_t, uint32_t> HashNumbering;
 349   BumpPtrAllocator Allocator;
 350   ArrayRecycler<Value *> Recycler;
 351   uint32_t nextValueNumber;
 352
 353   /// Create an expression for I based on its opcode and its uses. If I
 354   /// touches or reads memory, the expression is also based upon its memory
 355   /// order - see \c getMemoryUseOrder().
 356   InstructionUseExpr *createExpr(Instruction *I) {
 357     InstructionUseExpr *E =
 358         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
 359     if (isMemoryInst(I))
 360       E->setMemoryUseOrder(getMemoryUseOrder(I));
 361
 362     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
 363       CmpInst::Predicate Predicate = C->getPredicate();
 364       E->setOpcode((C->getOpcode() << 8) | Predicate);
 365     }
 366     return E;
 367   }
 368
 369   /// Helper to compute the value number for a memory instruction
 370   /// (LoadInst/StoreInst), including checking the memory ordering and
 371   /// volatility.
 372   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
 373     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
 374       return nullptr;
 375     InstructionUseExpr *E = createExpr(I);
 376     E->setVolatile(I->isVolatile());
 377     return E;
 378   }
 379
 380 public:
 381   /// Returns the value number for the specified value, assigning
 382   /// it a new number if it did not have one before.
 383   uint32_t lookupOrAdd(Value *V) {
 384     auto VI = ValueNumbering.find(V);
 385     if (VI != ValueNumbering.end())
 386       return VI->second;
 387
 388     if (!isa<Instruction>(V)) {
 389       ValueNumbering[V] = nextValueNumber;
 390       return nextValueNumber++;
 391     }
 392
 393     Instruction *I = cast<Instruction>(V);
 394     InstructionUseExpr *exp = nullptr;
 395     switch (I->getOpcode()) {
 396     case Instruction::Load:
 397       exp = createMemoryExpr(cast<LoadInst>(I));
 398       break;
 399     case Instruction::Store:
 400       exp = createMemoryExpr(cast<StoreInst>(I));
 401       break;
 402     case Instruction::Call:
 403     case Instruction::Invoke:
 404     case Instruction::Add:
 405     case Instruction::FAdd:
 406     case Instruction::Sub:
 407     case Instruction::FSub:
 408     case Instruction::Mul:
 409     case Instruction::FMul:
 410     case Instruction::UDiv:
 411     case Instruction::SDiv:
 412     case Instruction::FDiv:
 413     case Instruction::URem:
 414     case Instruction::SRem:
 415     case Instruction::FRem:
 416     case Instruction::Shl:
 417     case Instruction::LShr:
 418     case Instruction::AShr:
 419     case Instruction::And:
 420     case Instruction::Or:
 421     case Instruction::Xor:
 422     case Instruction::ICmp:
 423     case Instruction::FCmp:
 424     case Instruction::Trunc:
 425     case Instruction::ZExt:
 426     case Instruction::SExt:
 427     case Instruction::FPToUI:
 428     case Instruction::FPToSI:
 429     case Instruction::UIToFP:
 430     case Instruction::SIToFP:
 431     case Instruction::FPTrunc:
 432     case Instruction::FPExt:
 433     case Instruction::PtrToInt:
 434     case Instruction::IntToPtr:
 435     case Instruction::BitCast:
 436     case Instruction::Select:
 437     case Instruction::ExtractElement:
 438     case Instruction::InsertElement:
 439     case Instruction::ShuffleVector:
 440     case Instruction::InsertValue:
 441     case Instruction::GetElementPtr:
 442       exp = createExpr(I);
 443       break;
 444     default:
 445       break;
 446     }
 447
 448     if (!exp) {
 449       ValueNumbering[V] = nextValueNumber;
 450       return nextValueNumber++;
 451     }
 452
 453     uint32_t e = ExpressionNumbering[exp];
 454     if (!e) {
 455       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
 456       auto I = HashNumbering.find(H);
 457       if (I != HashNumbering.end()) {
 458         e = I->second;
 459       } else {
 460         e = nextValueNumber++;
 461         HashNumbering[H] = e;
 462         ExpressionNumbering[exp] = e;
 463       }
 464     }
 465     ValueNumbering[V] = e;
 466     return e;
 467   }
 468
 469   /// Returns the value number of the specified value. Fails if the value has
 470   /// not yet been numbered.
 471   uint32_t lookup(Value *V) const {
 472     auto VI = ValueNumbering.find(V);
 473     assert(VI != ValueNumbering.end() && "Value not numbered?");
 474     return VI->second;
 475   }
 476
 477   /// Removes all value numberings and resets the value table.
 478   void clear() {
 479     ValueNumbering.clear();
 480     ExpressionNumbering.clear();
 481     HashNumbering.clear();
 482     Recycler.clear(Allocator);
 483     nextValueNumber = 1;
 484   }
 485
 486   ValueTable() : nextValueNumber(1) {}
 487
 488   /// \c Inst uses or touches memory. Return an ID describing the memory state
 489   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
 490   /// the exact same memory operations happen after I1 and I2.
 491   ///
 492   /// This is a very hard problem in general, so we use domain-specific
 493   /// knowledge that we only ever check for equivalence between blocks sharing a
 494   /// single immediate successor that is common, and when determining if I1 ==
 495   /// I2 we will have already determined that next(I1) == next(I2). This
 496   /// inductive property allows us to simply return the value number of the next
 497   /// instruction that defines memory.
 498   uint32_t getMemoryUseOrder(Instruction *Inst) {
 499     auto *BB = Inst->getParent();
 500     for (auto I = std::next(Inst->getIterator()), E = BB->end();
 501          I != E && !I->isTerminator(); ++I) {
 502       if (!isMemoryInst(&*I))
 503         continue;
 504       if (isa<LoadInst>(&*I))
 505         continue;
 506       CallInst *CI = dyn_cast<CallInst>(&*I);
 507       if (CI && CI->onlyReadsMemory())
 508         continue;
 509       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
 510       if (II && II->onlyReadsMemory())
 511         continue;
 512       return lookupOrAdd(&*I);
 513     }
 514     return 0;
 515   }
 516 };
 517
 518 //===----------------------------------------------------------------------===//
 519
 520 class GVNSink {
 521 public:
 522   GVNSink() : VN() {}
 523   bool run(Function &F) {
 524     DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
 525
 526     unsigned NumSunk = 0;
 527     ReversePostOrderTraversal<Function*> RPOT(&F);
 528     for (auto *N : RPOT)
 529       NumSunk += sinkBB(N);
 530
 531     return NumSunk > 0;
 532   }
 533
 534 private:
 535   ValueTable VN;
 536
 537   bool isInstructionBlacklisted(Instruction *I) {
 538     // These instructions may change or break semantics if moved.
 539     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
 540         I->getType()->isTokenTy())
 541       return true;
 542     return false;
 543   }
 544
 545   /// The main heuristic function. Analyze the set of instructions pointed to by
 546   /// LRI and return a candidate solution if these instructions can be sunk, or
 547   /// None otherwise.
 548   Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
 549       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 550       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
 551
 552   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
 553   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
 554                           SmallPtrSetImpl<Value *> &PHIContents) {
 555     for (auto &I : *BB) {
 556       auto *PN = dyn_cast<PHINode>(&I);
 557       if (!PN)
 558         return;
 559
 560       auto MPHI = ModelledPHI(PN);
 561       PHIs.insert(MPHI);
 562       for (auto *V : MPHI.getValues())
 563         PHIContents.insert(V);
 564     }
 565   }
 566
 567   /// The main instruction sinking driver. Set up state and try and sink
 568   /// instructions into BBEnd from its predecessors.
 569   unsigned sinkBB(BasicBlock *BBEnd);
 570
 571   /// Perform the actual mechanics of sinking an instruction from Blocks into
 572   /// BBEnd, which is their only successor.
 573   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
 574
 575   /// Remove PHIs that all have the same incoming value.
 576   void foldPointlessPHINodes(BasicBlock *BB) {
 577     auto I = BB->begin();
 578     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
 579       if (!all_of(PN->incoming_values(),
 580                   [&](const Value *V) { return V == PN->getIncomingValue(0); }))
 581         continue;
 582       if (PN->getIncomingValue(0) != PN)
 583         PN->replaceAllUsesWith(PN->getIncomingValue(0));
 584       else
 585         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
 586       PN->eraseFromParent();
 587     }
 588   }
 589 };
 590
 591 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
 592   LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 593   ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
 594   auto Insts = *LRI;
 595   DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
 596                                                              : Insts) {
 597     I->dump();
 598   } dbgs() << " ]\n";);
 599
 600   DenseMap<uint32_t, unsigned> VNums;
 601   for (auto *I : Insts) {
 602     uint32_t N = VN.lookupOrAdd(I);
 603     DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
 604     if (N == ~0U)
 605       return None;
 606     VNums[N]++;
 607   }
 608   unsigned VNumToSink =
 609       std::max_element(VNums.begin(), VNums.end(),
 610                        [](const std::pair<uint32_t, unsigned> &I,
 611                           const std::pair<uint32_t, unsigned> &J) {
 612                          return I.second < J.second;
 613                        })
 614           ->first;
 615
 616   if (VNums[VNumToSink] == 1)
 617     // Can't sink anything!
 618     return None;
 619
 620   // Now restrict the number of incoming blocks down to only those with
 621   // VNumToSink.
 622   auto &ActivePreds = LRI.getActiveBlocks();
 623   unsigned InitialActivePredSize = ActivePreds.size();
 624   SmallVector<Instruction *, 4> NewInsts;
 625   for (auto *I : Insts) {
 626     if (VN.lookup(I) != VNumToSink)
 627       ActivePreds.erase(I->getParent());
 628     else
 629       NewInsts.push_back(I);
 630   }
 631   for (auto *I : NewInsts)
 632     if (isInstructionBlacklisted(I))
 633       return None;
 634
 635   // If we've restricted the incoming blocks, restrict all needed PHIs also
 636   // to that set.
 637   bool RecomputePHIContents = false;
 638   if (ActivePreds.size() != InitialActivePredSize) {
 639     ModelledPHISet NewNeededPHIs;
 640     for (auto P : NeededPHIs) {
 641       P.restrictToBlocks(ActivePreds);
 642       NewNeededPHIs.insert(P);
 643     }
 644     NeededPHIs = NewNeededPHIs;
 645     LRI.restrictToBlocks(ActivePreds);
 646     RecomputePHIContents = true;
 647   }
 648
 649   // The sunk instruction's results.
 650   ModelledPHI NewPHI(NewInsts, ActivePreds);
 651
 652   // Does sinking this instruction render previous PHIs redundant?
 653   if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
 654     NeededPHIs.erase(NewPHI);
 655     RecomputePHIContents = true;
 656   }
 657
 658   if (RecomputePHIContents) {
 659     // The needed PHIs have changed, so recompute the set of all needed
 660     // values.
 661     PHIContents.clear();
 662     for (auto &PHI : NeededPHIs)
 663       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 664   }
 665
 666   // Is this instruction required by a later PHI that doesn't match this PHI?
 667   // if so, we can't sink this instruction.
 668   for (auto *V : NewPHI.getValues())
 669     if (PHIContents.count(V))
 670       // V exists in this PHI, but the whole PHI is different to NewPHI
 671       // (else it would have been removed earlier). We cannot continue
 672       // because this isn't representable.
 673       return None;
 674
 675   // Which operands need PHIs?
 676   // FIXME: If any of these fail, we should partition up the candidates to
 677   // try and continue making progress.
 678   Instruction *I0 = NewInsts[0];
 679   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
 680     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
 681     if (PHI.areAllIncomingValuesSame())
 682       continue;
 683     if (!canReplaceOperandWithVariable(I0, OpNum))
 684       // We can 't create a PHI from this instruction!
 685       return None;
 686     if (NeededPHIs.count(PHI))
 687       continue;
 688     if (!PHI.areAllIncomingValuesSameType())
 689       return None;
 690     // Don't create indirect calls! The called value is the final operand.
 691     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
 692         PHI.areAnyIncomingValuesConstant())
 693       return None;
 694
 695     NeededPHIs.reserve(NeededPHIs.size());
 696     NeededPHIs.insert(PHI);
 697     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 698   }
 699
 700   if (isMemoryInst(NewInsts[0]))
 701     ++MemoryInstNum;
 702
 703   SinkingInstructionCandidate Cand;
 704   Cand.NumInstructions = ++InstNum;
 705   Cand.NumMemoryInsts = MemoryInstNum;
 706   Cand.NumBlocks = ActivePreds.size();
 707   Cand.NumPHIs = NeededPHIs.size();
 708   for (auto *C : ActivePreds)
 709     Cand.Blocks.push_back(C);
 710
 711   return Cand;
 712 }
 713
 714 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
 715   DEBUG(dbgs() << "GVNSink: running on basic block ";
 716         BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 717   SmallVector<BasicBlock *, 4> Preds;
 718   for (auto *B : predecessors(BBEnd)) {
 719     auto *T = B->getTerminator();
 720     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
 721       Preds.push_back(B);
 722     else
 723       return 0;
 724   }
 725   if (Preds.size() < 2)
 726     return 0;
 727   std::sort(Preds.begin(), Preds.end());
 728
 729   unsigned NumOrigPreds = Preds.size();
 730   // We can only sink instructions through unconditional branches.
 731   for (auto I = Preds.begin(); I != Preds.end();) {
 732     if ((*I)->getTerminator()->getNumSuccessors() != 1)
 733       I = Preds.erase(I);
 734     else
 735       ++I;
 736   }
 737
 738   LockstepReverseIterator LRI(Preds);
 739   SmallVector<SinkingInstructionCandidate, 4> Candidates;
 740   unsigned InstNum = 0, MemoryInstNum = 0;
 741   ModelledPHISet NeededPHIs;
 742   SmallPtrSet<Value *, 4> PHIContents;
 743   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
 744   unsigned NumOrigPHIs = NeededPHIs.size();
 745
 746   while (LRI.isValid()) {
 747     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
 748                                              NeededPHIs, PHIContents);
 749     if (!Cand)
 750       break;
 751     Cand->calculateCost(NumOrigPHIs, Preds.size());
 752     Candidates.emplace_back(*Cand);
 753     --LRI;
 754   }
 755
 756   std::stable_sort(
 757       Candidates.begin(), Candidates.end(),
 758       [](const SinkingInstructionCandidate &A,
 759          const SinkingInstructionCandidate &B) { return A > B; });
 760   DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
 761                                                     : Candidates) dbgs()
 762                                                << "  " << C << "\n";);
 763
 764   // Pick the top candidate, as long it is positive!
 765   if (Candidates.empty() || Candidates.front().Cost <= 0)
 766     return 0;
 767   auto C = Candidates.front();
 768
 769   DEBUG(dbgs() << " -- Sinking: " << C << "\n");
 770   BasicBlock *InsertBB = BBEnd;
 771   if (C.Blocks.size() < NumOrigPreds) {
 772     DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
 773           dbgs() << "\n");
 774     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
 775     if (!InsertBB) {
 776       DEBUG(dbgs() << " -- FAILED to split edge!\n");
 777       // Edge couldn't be split.
 778       return 0;
 779     }
 780   }
 781
 782   for (unsigned I = 0; I < C.NumInstructions; ++I)
 783     sinkLastInstruction(C.Blocks, InsertBB);
 784
 785   return C.NumInstructions;
 786 }
 787
 788 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
 789                                   BasicBlock *BBEnd) {
 790   SmallVector<Instruction *, 4> Insts;
 791   for (BasicBlock *BB : Blocks)
 792     Insts.push_back(BB->getTerminator()->getPrevNode());
 793   Instruction *I0 = Insts.front();
 794
 795   SmallVector<Value *, 4> NewOperands;
 796   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
 797     bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
 798       return I->getOperand(O) != I0->getOperand(O);
 799     });
 800     if (!NeedPHI) {
 801       NewOperands.push_back(I0->getOperand(O));
 802       continue;
 803     }
 804
 805     // Create a new PHI in the successor block and populate it.
 806     auto *Op = I0->getOperand(O);
 807     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
 808     auto *PN = PHINode::Create(Op->getType(), Insts.size(),
 809                                Op->getName() + ".sink", &BBEnd->front());
 810     for (auto *I : Insts)
 811       PN->addIncoming(I->getOperand(O), I->getParent());
 812     NewOperands.push_back(PN);
 813   }
 814
 815   // Arbitrarily use I0 as the new "common" instruction; remap its operands
 816   // and move it to the start of the successor block.
 817   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
 818     I0->getOperandUse(O).set(NewOperands[O]);
 819   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
 820
 821   // Update metadata and IR flags.
 822   for (auto *I : Insts)
 823     if (I != I0) {
 824       combineMetadataForCSE(I0, I);
 825       I0->andIRFlags(I);
 826     }
 827
 828   for (auto *I : Insts)
 829     if (I != I0)
 830       I->replaceAllUsesWith(I0);
 831   foldPointlessPHINodes(BBEnd);
 832
 833   // Finally nuke all instructions apart from the common instruction.
 834   for (auto *I : Insts)
 835     if (I != I0)
 836       I->eraseFromParent();
 837
 838   NumRemoved += Insts.size() - 1;
 839 }
 840
 841 ////////////////////////////////////////////////////////////////////////////////
 842 // Pass machinery / boilerplate
 843
 844 class GVNSinkLegacyPass : public FunctionPass {
 845 public:
 846   static char ID;
 847
 848   GVNSinkLegacyPass() : FunctionPass(ID) {
 849     initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
 850   }
 851
 852   bool runOnFunction(Function &F) override {
 853     if (skipFunction(F))
 854       return false;
 855     GVNSink G;
 856     return G.run(F);
 857   }
 858
 859   void getAnalysisUsage(AnalysisUsage &AU) const override {
 860     AU.addPreserved<GlobalsAAWrapperPass>();
 861   }
 862 };
 863 } // namespace
 864
 865 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
 866   GVNSink G;
 867   if (!G.run(F))
 868     return PreservedAnalyses::all();
 869
 870   PreservedAnalyses PA;
 871   PA.preserve<GlobalsAA>();
 872   return PA;
 873 }
 874
 875 char GVNSinkLegacyPass::ID = 0;
 876 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
 877                       "Early GVN sinking of Expressions", false, false)
 878 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 879 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 880 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
 881                     "Early GVN sinking of Expressions", false, false)
 882
 883 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }