contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp

   1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file GVNSink.cpp
  10 /// This pass attempts to sink instructions into successors, reducing static
  11 /// instruction count and enabling if-conversion.
  12 ///
  13 /// We use a variant of global value numbering to decide what can be sunk.
  14 /// Consider:
  15 ///
  16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
  17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
  18 ///                  \           /
  19 ///            [ %e = phi i32 %a2, %c2 ]
  20 ///            [ add i32 %e, 4         ]
  21 ///
  22 ///
  23 /// GVN would number %a1 and %c1 differently because they compute different
  24 /// results - the VN of an instruction is a function of its opcode and the
  25 /// transitive closure of its operands. This is the key property for hoisting
  26 /// and CSE.
  27 ///
  28 /// What we want when sinking however is for a numbering that is a function of
  29 /// the *uses* of an instruction, which allows us to answer the question "if I
  30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
  31 /// successive instructions?". The PostValueTable class in GVN provides this
  32 /// mapping.
  33 //
  34 //===----------------------------------------------------------------------===//
  35
  36 #include "llvm/ADT/ArrayRef.h"
  37 #include "llvm/ADT/DenseMap.h"
  38 #include "llvm/ADT/DenseMapInfo.h"
  39 #include "llvm/ADT/DenseSet.h"
  40 #include "llvm/ADT/Hashing.h"
  41 #include "llvm/ADT/None.h"
  42 #include "llvm/ADT/Optional.h"
  43 #include "llvm/ADT/PostOrderIterator.h"
  44 #include "llvm/ADT/STLExtras.h"
  45 #include "llvm/ADT/SmallPtrSet.h"
  46 #include "llvm/ADT/SmallVector.h"
  47 #include "llvm/ADT/Statistic.h"
  48 #include "llvm/ADT/StringExtras.h"
  49 #include "llvm/Analysis/GlobalsModRef.h"
  50 #include "llvm/IR/BasicBlock.h"
  51 #include "llvm/IR/CFG.h"
  52 #include "llvm/IR/Constants.h"
  53 #include "llvm/IR/Function.h"
  54 #include "llvm/IR/InstrTypes.h"
  55 #include "llvm/IR/Instruction.h"
  56 #include "llvm/IR/Instructions.h"
  57 #include "llvm/IR/PassManager.h"
  58 #include "llvm/IR/Type.h"
  59 #include "llvm/IR/Use.h"
  60 #include "llvm/IR/Value.h"
  61 #include "llvm/InitializePasses.h"
  62 #include "llvm/Pass.h"
  63 #include "llvm/Support/Allocator.h"
  64 #include "llvm/Support/ArrayRecycler.h"
  65 #include "llvm/Support/AtomicOrdering.h"
  66 #include "llvm/Support/Casting.h"
  67 #include "llvm/Support/Compiler.h"
  68 #include "llvm/Support/Debug.h"
  69 #include "llvm/Support/raw_ostream.h"
  70 #include "llvm/Transforms/Scalar.h"
  71 #include "llvm/Transforms/Scalar/GVN.h"
  72 #include "llvm/Transforms/Scalar/GVNExpression.h"
  73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  74 #include "llvm/Transforms/Utils/Local.h"
  75 #include <algorithm>
  76 #include <cassert>
  77 #include <cstddef>
  78 #include <cstdint>
  79 #include <iterator>
  80 #include <utility>
  81
  82 using namespace llvm;
  83
  84 #define DEBUG_TYPE "gvn-sink"
  85
  86 STATISTIC(NumRemoved, "Number of instructions removed");
  87
  88 namespace llvm {
  89 namespace GVNExpression {
  90
  91 LLVM_DUMP_METHOD void Expression::dump() const {
  92   print(dbgs());
  93   dbgs() << "\n";
  94 }
  95
  96 } // end namespace GVNExpression
  97 } // end namespace llvm
  98
  99 namespace {
 100
 101 static bool isMemoryInst(const Instruction *I) {
 102   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
 103          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
 104          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
 105 }
 106
 107 /// Iterates through instructions in a set of blocks in reverse order from the
 108 /// first non-terminator. For example (assume all blocks have size n):
 109 ///   LockstepReverseIterator I([B1, B2, B3]);
 110 ///   *I-- = [B1[n], B2[n], B3[n]];
 111 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
 112 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
 113 ///   ...
 114 ///
 115 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
 116 /// to
 117 /// determine which blocks are still going and the order they appear in the
 118 /// list returned by operator*.
 119 class LockstepReverseIterator {
 120   ArrayRef<BasicBlock *> Blocks;
 121   SmallSetVector<BasicBlock *, 4> ActiveBlocks;
 122   SmallVector<Instruction *, 4> Insts;
 123   bool Fail;
 124
 125 public:
 126   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
 127     reset();
 128   }
 129
 130   void reset() {
 131     Fail = false;
 132     ActiveBlocks.clear();
 133     for (BasicBlock *BB : Blocks)
 134       ActiveBlocks.insert(BB);
 135     Insts.clear();
 136     for (BasicBlock *BB : Blocks) {
 137       if (BB->size() <= 1) {
 138         // Block wasn't big enough - only contained a terminator.
 139         ActiveBlocks.remove(BB);
 140         continue;
 141       }
 142       Insts.push_back(BB->getTerminator()->getPrevNode());
 143     }
 144     if (Insts.empty())
 145       Fail = true;
 146   }
 147
 148   bool isValid() const { return !Fail; }
 149   ArrayRef<Instruction *> operator*() const { return Insts; }
 150
 151   // Note: This needs to return a SmallSetVector as the elements of
 152   // ActiveBlocks will be later copied to Blocks using std::copy. The
 153   // resultant order of elements in Blocks needs to be deterministic.
 154   // Using SmallPtrSet instead causes non-deterministic order while
 155   // copying. And we cannot simply sort Blocks as they need to match the
 156   // corresponding Values.
 157   SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
 158
 159   void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
 160     for (auto II = Insts.begin(); II != Insts.end();) {
 161       if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
 162           Blocks.end()) {
 163         ActiveBlocks.remove((*II)->getParent());
 164         II = Insts.erase(II);
 165       } else {
 166         ++II;
 167       }
 168     }
 169   }
 170
 171   void operator--() {
 172     if (Fail)
 173       return;
 174     SmallVector<Instruction *, 4> NewInsts;
 175     for (auto *Inst : Insts) {
 176       if (Inst == &Inst->getParent()->front())
 177         ActiveBlocks.remove(Inst->getParent());
 178       else
 179         NewInsts.push_back(Inst->getPrevNode());
 180     }
 181     if (NewInsts.empty()) {
 182       Fail = true;
 183       return;
 184     }
 185     Insts = NewInsts;
 186   }
 187 };
 188
 189 //===----------------------------------------------------------------------===//
 190
 191 /// Candidate solution for sinking. There may be different ways to
 192 /// sink instructions, differing in the number of instructions sunk,
 193 /// the number of predecessors sunk from and the number of PHIs
 194 /// required.
 195 struct SinkingInstructionCandidate {
 196   unsigned NumBlocks;
 197   unsigned NumInstructions;
 198   unsigned NumPHIs;
 199   unsigned NumMemoryInsts;
 200   int Cost = -1;
 201   SmallVector<BasicBlock *, 4> Blocks;
 202
 203   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
 204     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
 205     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
 206     Cost = (NumInstructions * (NumBlocks - 1)) -
 207            (NumExtraPHIs *
 208             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
 209            - SplitEdgeCost;
 210   }
 211
 212   bool operator>(const SinkingInstructionCandidate &Other) const {
 213     return Cost > Other.Cost;
 214   }
 215 };
 216
 217 #ifndef NDEBUG
 218 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
 219   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
 220      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
 221   return OS;
 222 }
 223 #endif
 224
 225 //===----------------------------------------------------------------------===//
 226
 227 /// Describes a PHI node that may or may not exist. These track the PHIs
 228 /// that must be created if we sunk a sequence of instructions. It provides
 229 /// a hash function for efficient equality comparisons.
 230 class ModelledPHI {
 231   SmallVector<Value *, 4> Values;
 232   SmallVector<BasicBlock *, 4> Blocks;
 233
 234 public:
 235   ModelledPHI() = default;
 236
 237   ModelledPHI(const PHINode *PN) {
 238     // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
 239     SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
 240     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
 241       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
 242     llvm::sort(Ops);
 243     for (auto &P : Ops) {
 244       Blocks.push_back(P.first);
 245       Values.push_back(P.second);
 246     }
 247   }
 248
 249   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
 250   /// without the same ID.
 251   /// \note This is specifically for DenseMapInfo - do not use this!
 252   static ModelledPHI createDummy(size_t ID) {
 253     ModelledPHI M;
 254     M.Values.push_back(reinterpret_cast<Value*>(ID));
 255     return M;
 256   }
 257
 258   /// Create a PHI from an array of incoming values and incoming blocks.
 259   template <typename VArray, typename BArray>
 260   ModelledPHI(const VArray &V, const BArray &B) {
 261     llvm::copy(V, std::back_inserter(Values));
 262     llvm::copy(B, std::back_inserter(Blocks));
 263   }
 264
 265   /// Create a PHI from [I[OpNum] for I in Insts].
 266   template <typename BArray>
 267   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
 268     llvm::copy(B, std::back_inserter(Blocks));
 269     for (auto *I : Insts)
 270       Values.push_back(I->getOperand(OpNum));
 271   }
 272
 273   /// Restrict the PHI's contents down to only \c NewBlocks.
 274   /// \c NewBlocks must be a subset of \c this->Blocks.
 275   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
 276     auto BI = Blocks.begin();
 277     auto VI = Values.begin();
 278     while (BI != Blocks.end()) {
 279       assert(VI != Values.end());
 280       if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
 281           NewBlocks.end()) {
 282         BI = Blocks.erase(BI);
 283         VI = Values.erase(VI);
 284       } else {
 285         ++BI;
 286         ++VI;
 287       }
 288     }
 289     assert(Blocks.size() == NewBlocks.size());
 290   }
 291
 292   ArrayRef<Value *> getValues() const { return Values; }
 293
 294   bool areAllIncomingValuesSame() const {
 295     return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
 296   }
 297
 298   bool areAllIncomingValuesSameType() const {
 299     return llvm::all_of(
 300         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
 301   }
 302
 303   bool areAnyIncomingValuesConstant() const {
 304     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
 305   }
 306
 307   // Hash functor
 308   unsigned hash() const {
 309       return (unsigned)hash_combine_range(Values.begin(), Values.end());
 310   }
 311
 312   bool operator==(const ModelledPHI &Other) const {
 313     return Values == Other.Values && Blocks == Other.Blocks;
 314   }
 315 };
 316
 317 template <typename ModelledPHI> struct DenseMapInfo {
 318   static inline ModelledPHI &getEmptyKey() {
 319     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
 320     return Dummy;
 321   }
 322
 323   static inline ModelledPHI &getTombstoneKey() {
 324     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
 325     return Dummy;
 326   }
 327
 328   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
 329
 330   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
 331     return LHS == RHS;
 332   }
 333 };
 334
 335 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
 336
 337 //===----------------------------------------------------------------------===//
 338 //                             ValueTable
 339 //===----------------------------------------------------------------------===//
 340 // This is a value number table where the value number is a function of the
 341 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
 342 // that the program would be equivalent if we replaced A with PHI(A, B).
 343 //===----------------------------------------------------------------------===//
 344
 345 /// A GVN expression describing how an instruction is used. The operands
 346 /// field of BasicExpression is used to store uses, not operands.
 347 ///
 348 /// This class also contains fields for discriminators used when determining
 349 /// equivalence of instructions with sideeffects.
 350 class InstructionUseExpr : public GVNExpression::BasicExpression {
 351   unsigned MemoryUseOrder = -1;
 352   bool Volatile = false;
 353   ArrayRef<int> ShuffleMask;
 354
 355 public:
 356   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
 357                      BumpPtrAllocator &A)
 358       : GVNExpression::BasicExpression(I->getNumUses()) {
 359     allocateOperands(R, A);
 360     setOpcode(I->getOpcode());
 361     setType(I->getType());
 362
 363     if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
 364       ShuffleMask = SVI->getShuffleMask().copy(A);
 365
 366     for (auto &U : I->uses())
 367       op_push_back(U.getUser());
 368     llvm::sort(op_begin(), op_end());
 369   }
 370
 371   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
 372   void setVolatile(bool V) { Volatile = V; }
 373
 374   hash_code getHashValue() const override {
 375     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
 376                         MemoryUseOrder, Volatile, ShuffleMask);
 377   }
 378
 379   template <typename Function> hash_code getHashValue(Function MapFn) {
 380     hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
 381                                ShuffleMask);
 382     for (auto *V : operands())
 383       H = hash_combine(H, MapFn(V));
 384     return H;
 385   }
 386 };
 387
 388 class ValueTable {
 389   DenseMap<Value *, uint32_t> ValueNumbering;
 390   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
 391   DenseMap<size_t, uint32_t> HashNumbering;
 392   BumpPtrAllocator Allocator;
 393   ArrayRecycler<Value *> Recycler;
 394   uint32_t nextValueNumber = 1;
 395
 396   /// Create an expression for I based on its opcode and its uses. If I
 397   /// touches or reads memory, the expression is also based upon its memory
 398   /// order - see \c getMemoryUseOrder().
 399   InstructionUseExpr *createExpr(Instruction *I) {
 400     InstructionUseExpr *E =
 401         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
 402     if (isMemoryInst(I))
 403       E->setMemoryUseOrder(getMemoryUseOrder(I));
 404
 405     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
 406       CmpInst::Predicate Predicate = C->getPredicate();
 407       E->setOpcode((C->getOpcode() << 8) | Predicate);
 408     }
 409     return E;
 410   }
 411
 412   /// Helper to compute the value number for a memory instruction
 413   /// (LoadInst/StoreInst), including checking the memory ordering and
 414   /// volatility.
 415   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
 416     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
 417       return nullptr;
 418     InstructionUseExpr *E = createExpr(I);
 419     E->setVolatile(I->isVolatile());
 420     return E;
 421   }
 422
 423 public:
 424   ValueTable() = default;
 425
 426   /// Returns the value number for the specified value, assigning
 427   /// it a new number if it did not have one before.
 428   uint32_t lookupOrAdd(Value *V) {
 429     auto VI = ValueNumbering.find(V);
 430     if (VI != ValueNumbering.end())
 431       return VI->second;
 432
 433     if (!isa<Instruction>(V)) {
 434       ValueNumbering[V] = nextValueNumber;
 435       return nextValueNumber++;
 436     }
 437
 438     Instruction *I = cast<Instruction>(V);
 439     InstructionUseExpr *exp = nullptr;
 440     switch (I->getOpcode()) {
 441     case Instruction::Load:
 442       exp = createMemoryExpr(cast<LoadInst>(I));
 443       break;
 444     case Instruction::Store:
 445       exp = createMemoryExpr(cast<StoreInst>(I));
 446       break;
 447     case Instruction::Call:
 448     case Instruction::Invoke:
 449     case Instruction::FNeg:
 450     case Instruction::Add:
 451     case Instruction::FAdd:
 452     case Instruction::Sub:
 453     case Instruction::FSub:
 454     case Instruction::Mul:
 455     case Instruction::FMul:
 456     case Instruction::UDiv:
 457     case Instruction::SDiv:
 458     case Instruction::FDiv:
 459     case Instruction::URem:
 460     case Instruction::SRem:
 461     case Instruction::FRem:
 462     case Instruction::Shl:
 463     case Instruction::LShr:
 464     case Instruction::AShr:
 465     case Instruction::And:
 466     case Instruction::Or:
 467     case Instruction::Xor:
 468     case Instruction::ICmp:
 469     case Instruction::FCmp:
 470     case Instruction::Trunc:
 471     case Instruction::ZExt:
 472     case Instruction::SExt:
 473     case Instruction::FPToUI:
 474     case Instruction::FPToSI:
 475     case Instruction::UIToFP:
 476     case Instruction::SIToFP:
 477     case Instruction::FPTrunc:
 478     case Instruction::FPExt:
 479     case Instruction::PtrToInt:
 480     case Instruction::IntToPtr:
 481     case Instruction::BitCast:
 482     case Instruction::AddrSpaceCast:
 483     case Instruction::Select:
 484     case Instruction::ExtractElement:
 485     case Instruction::InsertElement:
 486     case Instruction::ShuffleVector:
 487     case Instruction::InsertValue:
 488     case Instruction::GetElementPtr:
 489       exp = createExpr(I);
 490       break;
 491     default:
 492       break;
 493     }
 494
 495     if (!exp) {
 496       ValueNumbering[V] = nextValueNumber;
 497       return nextValueNumber++;
 498     }
 499
 500     uint32_t e = ExpressionNumbering[exp];
 501     if (!e) {
 502       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
 503       auto I = HashNumbering.find(H);
 504       if (I != HashNumbering.end()) {
 505         e = I->second;
 506       } else {
 507         e = nextValueNumber++;
 508         HashNumbering[H] = e;
 509         ExpressionNumbering[exp] = e;
 510       }
 511     }
 512     ValueNumbering[V] = e;
 513     return e;
 514   }
 515
 516   /// Returns the value number of the specified value. Fails if the value has
 517   /// not yet been numbered.
 518   uint32_t lookup(Value *V) const {
 519     auto VI = ValueNumbering.find(V);
 520     assert(VI != ValueNumbering.end() && "Value not numbered?");
 521     return VI->second;
 522   }
 523
 524   /// Removes all value numberings and resets the value table.
 525   void clear() {
 526     ValueNumbering.clear();
 527     ExpressionNumbering.clear();
 528     HashNumbering.clear();
 529     Recycler.clear(Allocator);
 530     nextValueNumber = 1;
 531   }
 532
 533   /// \c Inst uses or touches memory. Return an ID describing the memory state
 534   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
 535   /// the exact same memory operations happen after I1 and I2.
 536   ///
 537   /// This is a very hard problem in general, so we use domain-specific
 538   /// knowledge that we only ever check for equivalence between blocks sharing a
 539   /// single immediate successor that is common, and when determining if I1 ==
 540   /// I2 we will have already determined that next(I1) == next(I2). This
 541   /// inductive property allows us to simply return the value number of the next
 542   /// instruction that defines memory.
 543   uint32_t getMemoryUseOrder(Instruction *Inst) {
 544     auto *BB = Inst->getParent();
 545     for (auto I = std::next(Inst->getIterator()), E = BB->end();
 546          I != E && !I->isTerminator(); ++I) {
 547       if (!isMemoryInst(&*I))
 548         continue;
 549       if (isa<LoadInst>(&*I))
 550         continue;
 551       CallInst *CI = dyn_cast<CallInst>(&*I);
 552       if (CI && CI->onlyReadsMemory())
 553         continue;
 554       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
 555       if (II && II->onlyReadsMemory())
 556         continue;
 557       return lookupOrAdd(&*I);
 558     }
 559     return 0;
 560   }
 561 };
 562
 563 //===----------------------------------------------------------------------===//
 564
 565 class GVNSink {
 566 public:
 567   GVNSink() = default;
 568
 569   bool run(Function &F) {
 570     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
 571                       << "\n");
 572
 573     unsigned NumSunk = 0;
 574     ReversePostOrderTraversal<Function*> RPOT(&F);
 575     for (auto *N : RPOT)
 576       NumSunk += sinkBB(N);
 577
 578     return NumSunk > 0;
 579   }
 580
 581 private:
 582   ValueTable VN;
 583
 584   bool shouldAvoidSinkingInstruction(Instruction *I) {
 585     // These instructions may change or break semantics if moved.
 586     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
 587         I->getType()->isTokenTy())
 588       return true;
 589     return false;
 590   }
 591
 592   /// The main heuristic function. Analyze the set of instructions pointed to by
 593   /// LRI and return a candidate solution if these instructions can be sunk, or
 594   /// None otherwise.
 595   Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
 596       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 597       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
 598
 599   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
 600   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
 601                           SmallPtrSetImpl<Value *> &PHIContents) {
 602     for (PHINode &PN : BB->phis()) {
 603       auto MPHI = ModelledPHI(&PN);
 604       PHIs.insert(MPHI);
 605       for (auto *V : MPHI.getValues())
 606         PHIContents.insert(V);
 607     }
 608   }
 609
 610   /// The main instruction sinking driver. Set up state and try and sink
 611   /// instructions into BBEnd from its predecessors.
 612   unsigned sinkBB(BasicBlock *BBEnd);
 613
 614   /// Perform the actual mechanics of sinking an instruction from Blocks into
 615   /// BBEnd, which is their only successor.
 616   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
 617
 618   /// Remove PHIs that all have the same incoming value.
 619   void foldPointlessPHINodes(BasicBlock *BB) {
 620     auto I = BB->begin();
 621     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
 622       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
 623             return V == PN->getIncomingValue(0);
 624           }))
 625         continue;
 626       if (PN->getIncomingValue(0) != PN)
 627         PN->replaceAllUsesWith(PN->getIncomingValue(0));
 628       else
 629         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
 630       PN->eraseFromParent();
 631     }
 632   }
 633 };
 634
 635 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
 636   LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 637   ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
 638   auto Insts = *LRI;
 639   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
 640                                                                   : Insts) {
 641     I->dump();
 642   } dbgs() << " ]\n";);
 643
 644   DenseMap<uint32_t, unsigned> VNums;
 645   for (auto *I : Insts) {
 646     uint32_t N = VN.lookupOrAdd(I);
 647     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
 648     if (N == ~0U)
 649       return None;
 650     VNums[N]++;
 651   }
 652   unsigned VNumToSink =
 653       std::max_element(VNums.begin(), VNums.end(),
 654                        [](const std::pair<uint32_t, unsigned> &I,
 655                           const std::pair<uint32_t, unsigned> &J) {
 656                          return I.second < J.second;
 657                        })
 658           ->first;
 659
 660   if (VNums[VNumToSink] == 1)
 661     // Can't sink anything!
 662     return None;
 663
 664   // Now restrict the number of incoming blocks down to only those with
 665   // VNumToSink.
 666   auto &ActivePreds = LRI.getActiveBlocks();
 667   unsigned InitialActivePredSize = ActivePreds.size();
 668   SmallVector<Instruction *, 4> NewInsts;
 669   for (auto *I : Insts) {
 670     if (VN.lookup(I) != VNumToSink)
 671       ActivePreds.remove(I->getParent());
 672     else
 673       NewInsts.push_back(I);
 674   }
 675   for (auto *I : NewInsts)
 676     if (shouldAvoidSinkingInstruction(I))
 677       return None;
 678
 679   // If we've restricted the incoming blocks, restrict all needed PHIs also
 680   // to that set.
 681   bool RecomputePHIContents = false;
 682   if (ActivePreds.size() != InitialActivePredSize) {
 683     ModelledPHISet NewNeededPHIs;
 684     for (auto P : NeededPHIs) {
 685       P.restrictToBlocks(ActivePreds);
 686       NewNeededPHIs.insert(P);
 687     }
 688     NeededPHIs = NewNeededPHIs;
 689     LRI.restrictToBlocks(ActivePreds);
 690     RecomputePHIContents = true;
 691   }
 692
 693   // The sunk instruction's results.
 694   ModelledPHI NewPHI(NewInsts, ActivePreds);
 695
 696   // Does sinking this instruction render previous PHIs redundant?
 697   if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
 698     NeededPHIs.erase(NewPHI);
 699     RecomputePHIContents = true;
 700   }
 701
 702   if (RecomputePHIContents) {
 703     // The needed PHIs have changed, so recompute the set of all needed
 704     // values.
 705     PHIContents.clear();
 706     for (auto &PHI : NeededPHIs)
 707       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 708   }
 709
 710   // Is this instruction required by a later PHI that doesn't match this PHI?
 711   // if so, we can't sink this instruction.
 712   for (auto *V : NewPHI.getValues())
 713     if (PHIContents.count(V))
 714       // V exists in this PHI, but the whole PHI is different to NewPHI
 715       // (else it would have been removed earlier). We cannot continue
 716       // because this isn't representable.
 717       return None;
 718
 719   // Which operands need PHIs?
 720   // FIXME: If any of these fail, we should partition up the candidates to
 721   // try and continue making progress.
 722   Instruction *I0 = NewInsts[0];
 723
 724   // If all instructions that are going to participate don't have the same
 725   // number of operands, we can't do any useful PHI analysis for all operands.
 726   auto hasDifferentNumOperands = [&I0](Instruction *I) {
 727     return I->getNumOperands() != I0->getNumOperands();
 728   };
 729   if (any_of(NewInsts, hasDifferentNumOperands))
 730     return None;
 731
 732   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
 733     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
 734     if (PHI.areAllIncomingValuesSame())
 735       continue;
 736     if (!canReplaceOperandWithVariable(I0, OpNum))
 737       // We can 't create a PHI from this instruction!
 738       return None;
 739     if (NeededPHIs.count(PHI))
 740       continue;
 741     if (!PHI.areAllIncomingValuesSameType())
 742       return None;
 743     // Don't create indirect calls! The called value is the final operand.
 744     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
 745         PHI.areAnyIncomingValuesConstant())
 746       return None;
 747
 748     NeededPHIs.reserve(NeededPHIs.size());
 749     NeededPHIs.insert(PHI);
 750     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 751   }
 752
 753   if (isMemoryInst(NewInsts[0]))
 754     ++MemoryInstNum;
 755
 756   SinkingInstructionCandidate Cand;
 757   Cand.NumInstructions = ++InstNum;
 758   Cand.NumMemoryInsts = MemoryInstNum;
 759   Cand.NumBlocks = ActivePreds.size();
 760   Cand.NumPHIs = NeededPHIs.size();
 761   for (auto *C : ActivePreds)
 762     Cand.Blocks.push_back(C);
 763
 764   return Cand;
 765 }
 766
 767 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
 768   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
 769              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 770   SmallVector<BasicBlock *, 4> Preds;
 771   for (auto *B : predecessors(BBEnd)) {
 772     auto *T = B->getTerminator();
 773     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
 774       Preds.push_back(B);
 775     else
 776       return 0;
 777   }
 778   if (Preds.size() < 2)
 779     return 0;
 780   llvm::sort(Preds);
 781
 782   unsigned NumOrigPreds = Preds.size();
 783   // We can only sink instructions through unconditional branches.
 784   for (auto I = Preds.begin(); I != Preds.end();) {
 785     if ((*I)->getTerminator()->getNumSuccessors() != 1)
 786       I = Preds.erase(I);
 787     else
 788       ++I;
 789   }
 790
 791   LockstepReverseIterator LRI(Preds);
 792   SmallVector<SinkingInstructionCandidate, 4> Candidates;
 793   unsigned InstNum = 0, MemoryInstNum = 0;
 794   ModelledPHISet NeededPHIs;
 795   SmallPtrSet<Value *, 4> PHIContents;
 796   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
 797   unsigned NumOrigPHIs = NeededPHIs.size();
 798
 799   while (LRI.isValid()) {
 800     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
 801                                              NeededPHIs, PHIContents);
 802     if (!Cand)
 803       break;
 804     Cand->calculateCost(NumOrigPHIs, Preds.size());
 805     Candidates.emplace_back(*Cand);
 806     --LRI;
 807   }
 808
 809   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
 810   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
 811                                                          : Candidates) dbgs()
 812                                                     << "  " << C << "\n";);
 813
 814   // Pick the top candidate, as long it is positive!
 815   if (Candidates.empty() || Candidates.front().Cost <= 0)
 816     return 0;
 817   auto C = Candidates.front();
 818
 819   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
 820   BasicBlock *InsertBB = BBEnd;
 821   if (C.Blocks.size() < NumOrigPreds) {
 822     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
 823                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 824     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
 825     if (!InsertBB) {
 826       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
 827       // Edge couldn't be split.
 828       return 0;
 829     }
 830   }
 831
 832   for (unsigned I = 0; I < C.NumInstructions; ++I)
 833     sinkLastInstruction(C.Blocks, InsertBB);
 834
 835   return C.NumInstructions;
 836 }
 837
 838 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
 839                                   BasicBlock *BBEnd) {
 840   SmallVector<Instruction *, 4> Insts;
 841   for (BasicBlock *BB : Blocks)
 842     Insts.push_back(BB->getTerminator()->getPrevNode());
 843   Instruction *I0 = Insts.front();
 844
 845   SmallVector<Value *, 4> NewOperands;
 846   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
 847     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
 848       return I->getOperand(O) != I0->getOperand(O);
 849     });
 850     if (!NeedPHI) {
 851       NewOperands.push_back(I0->getOperand(O));
 852       continue;
 853     }
 854
 855     // Create a new PHI in the successor block and populate it.
 856     auto *Op = I0->getOperand(O);
 857     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
 858     auto *PN = PHINode::Create(Op->getType(), Insts.size(),
 859                                Op->getName() + ".sink", &BBEnd->front());
 860     for (auto *I : Insts)
 861       PN->addIncoming(I->getOperand(O), I->getParent());
 862     NewOperands.push_back(PN);
 863   }
 864
 865   // Arbitrarily use I0 as the new "common" instruction; remap its operands
 866   // and move it to the start of the successor block.
 867   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
 868     I0->getOperandUse(O).set(NewOperands[O]);
 869   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
 870
 871   // Update metadata and IR flags.
 872   for (auto *I : Insts)
 873     if (I != I0) {
 874       combineMetadataForCSE(I0, I, true);
 875       I0->andIRFlags(I);
 876     }
 877
 878   for (auto *I : Insts)
 879     if (I != I0)
 880       I->replaceAllUsesWith(I0);
 881   foldPointlessPHINodes(BBEnd);
 882
 883   // Finally nuke all instructions apart from the common instruction.
 884   for (auto *I : Insts)
 885     if (I != I0)
 886       I->eraseFromParent();
 887
 888   NumRemoved += Insts.size() - 1;
 889 }
 890
 891 ////////////////////////////////////////////////////////////////////////////////
 892 // Pass machinery / boilerplate
 893
 894 class GVNSinkLegacyPass : public FunctionPass {
 895 public:
 896   static char ID;
 897
 898   GVNSinkLegacyPass() : FunctionPass(ID) {
 899     initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
 900   }
 901
 902   bool runOnFunction(Function &F) override {
 903     if (skipFunction(F))
 904       return false;
 905     GVNSink G;
 906     return G.run(F);
 907   }
 908
 909   void getAnalysisUsage(AnalysisUsage &AU) const override {
 910     AU.addPreserved<GlobalsAAWrapperPass>();
 911   }
 912 };
 913
 914 } // end anonymous namespace
 915
 916 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
 917   GVNSink G;
 918   if (!G.run(F))
 919     return PreservedAnalyses::all();
 920
 921   PreservedAnalyses PA;
 922   PA.preserve<GlobalsAA>();
 923   return PA;
 924 }
 925
 926 char GVNSinkLegacyPass::ID = 0;
 927
 928 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
 929                       "Early GVN sinking of Expressions", false, false)
 930 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 931 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 932 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
 933                     "Early GVN sinking of Expressions", false, false)
 934
 935 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }