contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp

   1 //===---- NewGVN.cpp - Global Value Numbering Pass --------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 /// \file
  10 /// This file implements the new LLVM's Global Value Numbering pass.
  11 /// GVN partitions values computed by a function into congruence classes.
  12 /// Values ending up in the same congruence class are guaranteed to be the same
  13 /// for every execution of the program. In that respect, congruency is a
  14 /// compile-time approximation of equivalence of values at runtime.
  15 /// The algorithm implemented here uses a sparse formulation and it's based
  16 /// on the ideas described in the paper:
  17 /// "A Sparse Algorithm for Predicated Global Value Numbering" from
  18 /// Karthik Gargi.
  19 ///
  20 //===----------------------------------------------------------------------===//
  21
  22 #include "llvm/Transforms/Scalar/NewGVN.h"
  23 #include "llvm/ADT/BitVector.h"
  24 #include "llvm/ADT/DenseMap.h"
  25 #include "llvm/ADT/DenseSet.h"
  26 #include "llvm/ADT/DepthFirstIterator.h"
  27 #include "llvm/ADT/Hashing.h"
  28 #include "llvm/ADT/MapVector.h"
  29 #include "llvm/ADT/PostOrderIterator.h"
  30 #include "llvm/ADT/STLExtras.h"
  31 #include "llvm/ADT/SmallPtrSet.h"
  32 #include "llvm/ADT/SmallSet.h"
  33 #include "llvm/ADT/SparseBitVector.h"
  34 #include "llvm/ADT/Statistic.h"
  35 #include "llvm/ADT/TinyPtrVector.h"
  36 #include "llvm/Analysis/AliasAnalysis.h"
  37 #include "llvm/Analysis/AssumptionCache.h"
  38 #include "llvm/Analysis/CFG.h"
  39 #include "llvm/Analysis/CFGPrinter.h"
  40 #include "llvm/Analysis/ConstantFolding.h"
  41 #include "llvm/Analysis/GlobalsModRef.h"
  42 #include "llvm/Analysis/InstructionSimplify.h"
  43 #include "llvm/Analysis/Loads.h"
  44 #include "llvm/Analysis/MemoryBuiltins.h"
  45 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  46 #include "llvm/Analysis/MemoryLocation.h"
  47 #include "llvm/Analysis/PHITransAddr.h"
  48 #include "llvm/Analysis/TargetLibraryInfo.h"
  49 #include "llvm/Analysis/ValueTracking.h"
  50 #include "llvm/IR/DataLayout.h"
  51 #include "llvm/IR/Dominators.h"
  52 #include "llvm/IR/GlobalVariable.h"
  53 #include "llvm/IR/IRBuilder.h"
  54 #include "llvm/IR/IntrinsicInst.h"
  55 #include "llvm/IR/LLVMContext.h"
  56 #include "llvm/IR/Metadata.h"
  57 #include "llvm/IR/PatternMatch.h"
  58 #include "llvm/IR/PredIteratorCache.h"
  59 #include "llvm/IR/Type.h"
  60 #include "llvm/Support/Allocator.h"
  61 #include "llvm/Support/CommandLine.h"
  62 #include "llvm/Support/Debug.h"
  63 #include "llvm/Transforms/Scalar.h"
  64 #include "llvm/Transforms/Scalar/GVNExpression.h"
  65 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  66 #include "llvm/Transforms/Utils/Local.h"
  67 #include "llvm/Transforms/Utils/MemorySSA.h"
  68 #include "llvm/Transforms/Utils/SSAUpdater.h"
  69 #include <unordered_map>
  70 #include <utility>
  71 #include <vector>
  72 using namespace llvm;
  73 using namespace PatternMatch;
  74 using namespace llvm::GVNExpression;
  75
  76 #define DEBUG_TYPE "newgvn"
  77
  78 STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
  79 STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
  80 STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
  81 STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
  82 STATISTIC(NumGVNMaxIterations,
  83           "Maximum Number of iterations it took to converge GVN");
  84
  85 //===----------------------------------------------------------------------===//
  86 //                                GVN Pass
  87 //===----------------------------------------------------------------------===//
  88
  89 // Anchor methods.
  90 namespace llvm {
  91 namespace GVNExpression {
  92 Expression::~Expression() = default;
  93 BasicExpression::~BasicExpression() = default;
  94 CallExpression::~CallExpression() = default;
  95 LoadExpression::~LoadExpression() = default;
  96 StoreExpression::~StoreExpression() = default;
  97 AggregateValueExpression::~AggregateValueExpression() = default;
  98 PHIExpression::~PHIExpression() = default;
  99 }
 100 }
 101
 102 // Congruence classes represent the set of expressions/instructions
 103 // that are all the same *during some scope in the function*.
 104 // That is, because of the way we perform equality propagation, and
 105 // because of memory value numbering, it is not correct to assume
 106 // you can willy-nilly replace any member with any other at any
 107 // point in the function.
 108 //
 109 // For any Value in the Member set, it is valid to replace any dominated member
 110 // with that Value.
 111 //
 112 // Every congruence class has a leader, and the leader is used to
 113 // symbolize instructions in a canonical way (IE every operand of an
 114 // instruction that is a member of the same congruence class will
 115 // always be replaced with leader during symbolization).
 116 // To simplify symbolization, we keep the leader as a constant if class can be
 117 // proved to be a constant value.
 118 // Otherwise, the leader is a randomly chosen member of the value set, it does
 119 // not matter which one is chosen.
 120 // Each congruence class also has a defining expression,
 121 // though the expression may be null.  If it exists, it can be used for forward
 122 // propagation and reassociation of values.
 123 //
 124 struct CongruenceClass {
 125   using MemberSet = SmallPtrSet<Value *, 4>;
 126   unsigned ID;
 127   // Representative leader.
 128   Value *RepLeader = nullptr;
 129   // Defining Expression.
 130   const Expression *DefiningExpr = nullptr;
 131   // Actual members of this class.
 132   MemberSet Members;
 133
 134   // True if this class has no members left.  This is mainly used for assertion
 135   // purposes, and for skipping empty classes.
 136   bool Dead = false;
 137
 138   explicit CongruenceClass(unsigned ID) : ID(ID) {}
 139   CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
 140       : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
 141 };
 142
 143 namespace llvm {
 144 template <> struct DenseMapInfo<const Expression *> {
 145   static const Expression *getEmptyKey() {
 146     auto Val = static_cast<uintptr_t>(-1);
 147     Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
 148     return reinterpret_cast<const Expression *>(Val);
 149   }
 150   static const Expression *getTombstoneKey() {
 151     auto Val = static_cast<uintptr_t>(~1U);
 152     Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
 153     return reinterpret_cast<const Expression *>(Val);
 154   }
 155   static unsigned getHashValue(const Expression *V) {
 156     return static_cast<unsigned>(V->getHashValue());
 157   }
 158   static bool isEqual(const Expression *LHS, const Expression *RHS) {
 159     if (LHS == RHS)
 160       return true;
 161     if (LHS == getTombstoneKey() || RHS == getTombstoneKey() ||
 162         LHS == getEmptyKey() || RHS == getEmptyKey())
 163       return false;
 164     return *LHS == *RHS;
 165   }
 166 };
 167 } // end namespace llvm
 168
 169 class NewGVN : public FunctionPass {
 170   DominatorTree *DT;
 171   const DataLayout *DL;
 172   const TargetLibraryInfo *TLI;
 173   AssumptionCache *AC;
 174   AliasAnalysis *AA;
 175   MemorySSA *MSSA;
 176   MemorySSAWalker *MSSAWalker;
 177   BumpPtrAllocator ExpressionAllocator;
 178   ArrayRecycler<Value *> ArgRecycler;
 179
 180   // Congruence class info.
 181   CongruenceClass *InitialClass;
 182   std::vector<CongruenceClass *> CongruenceClasses;
 183   unsigned NextCongruenceNum;
 184
 185   // Value Mappings.
 186   DenseMap<Value *, CongruenceClass *> ValueToClass;
 187   DenseMap<Value *, const Expression *> ValueToExpression;
 188
 189   // A table storing which memorydefs/phis represent a memory state provably
 190   // equivalent to another memory state.
 191   // We could use the congruence class machinery, but the MemoryAccess's are
 192   // abstract memory states, so they can only ever be equivalent to each other,
 193   // and not to constants, etc.
 194   DenseMap<const MemoryAccess *, MemoryAccess *> MemoryAccessEquiv;
 195
 196   // Expression to class mapping.
 197   using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
 198   ExpressionClassMap ExpressionToClass;
 199
 200   // Which values have changed as a result of leader changes.
 201   SmallPtrSet<Value *, 8> ChangedValues;
 202
 203   // Reachability info.
 204   using BlockEdge = BasicBlockEdge;
 205   DenseSet<BlockEdge> ReachableEdges;
 206   SmallPtrSet<const BasicBlock *, 8> ReachableBlocks;
 207
 208   // This is a bitvector because, on larger functions, we may have
 209   // thousands of touched instructions at once (entire blocks,
 210   // instructions with hundreds of uses, etc).  Even with optimization
 211   // for when we mark whole blocks as touched, when this was a
 212   // SmallPtrSet or DenseSet, for some functions, we spent >20% of all
 213   // the time in GVN just managing this list.  The bitvector, on the
 214   // other hand, efficiently supports test/set/clear of both
 215   // individual and ranges, as well as "find next element" This
 216   // enables us to use it as a worklist with essentially 0 cost.
 217   BitVector TouchedInstructions;
 218
 219   DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
 220   DenseMap<const DomTreeNode *, std::pair<unsigned, unsigned>>
 221       DominatedInstRange;
 222
 223 #ifndef NDEBUG
 224   // Debugging for how many times each block and instruction got processed.
 225   DenseMap<const Value *, unsigned> ProcessedCount;
 226 #endif
 227
 228   // DFS info.
 229   DenseMap<const BasicBlock *, std::pair<int, int>> DFSDomMap;
 230   DenseMap<const Value *, unsigned> InstrDFS;
 231   SmallVector<Value *, 32> DFSToInstr;
 232
 233   // Deletion info.
 234   SmallPtrSet<Instruction *, 8> InstructionsToErase;
 235
 236 public:
 237   static char ID; // Pass identification, replacement for typeid.
 238   NewGVN() : FunctionPass(ID) {
 239     initializeNewGVNPass(*PassRegistry::getPassRegistry());
 240   }
 241
 242   bool runOnFunction(Function &F) override;
 243   bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
 244               TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA);
 245
 246 private:
 247   // This transformation requires dominator postdominator info.
 248   void getAnalysisUsage(AnalysisUsage &AU) const override {
 249     AU.addRequired<AssumptionCacheTracker>();
 250     AU.addRequired<DominatorTreeWrapperPass>();
 251     AU.addRequired<TargetLibraryInfoWrapperPass>();
 252     AU.addRequired<MemorySSAWrapperPass>();
 253     AU.addRequired<AAResultsWrapperPass>();
 254
 255     AU.addPreserved<DominatorTreeWrapperPass>();
 256     AU.addPreserved<GlobalsAAWrapperPass>();
 257   }
 258
 259   // Expression handling.
 260   const Expression *createExpression(Instruction *, const BasicBlock *);
 261   const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *,
 262                                            const BasicBlock *);
 263   PHIExpression *createPHIExpression(Instruction *);
 264   const VariableExpression *createVariableExpression(Value *);
 265   const ConstantExpression *createConstantExpression(Constant *);
 266   const Expression *createVariableOrConstant(Value *V, const BasicBlock *B);
 267   const UnknownExpression *createUnknownExpression(Instruction *);
 268   const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *,
 269                                                const BasicBlock *);
 270   LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,
 271                                        MemoryAccess *, const BasicBlock *);
 272
 273   const CallExpression *createCallExpression(CallInst *, MemoryAccess *,
 274                                              const BasicBlock *);
 275   const AggregateValueExpression *
 276   createAggregateValueExpression(Instruction *, const BasicBlock *);
 277   bool setBasicExpressionInfo(Instruction *, BasicExpression *,
 278                               const BasicBlock *);
 279
 280   // Congruence class handling.
 281   CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) {
 282     auto *result = new CongruenceClass(NextCongruenceNum++, Leader, E);
 283     CongruenceClasses.emplace_back(result);
 284     return result;
 285   }
 286
 287   CongruenceClass *createSingletonCongruenceClass(Value *Member) {
 288     CongruenceClass *CClass = createCongruenceClass(Member, nullptr);
 289     CClass->Members.insert(Member);
 290     ValueToClass[Member] = CClass;
 291     return CClass;
 292   }
 293   void initializeCongruenceClasses(Function &F);
 294
 295   // Value number an Instruction or MemoryPhi.
 296   void valueNumberMemoryPhi(MemoryPhi *);
 297   void valueNumberInstruction(Instruction *);
 298
 299   // Symbolic evaluation.
 300   const Expression *checkSimplificationResults(Expression *, Instruction *,
 301                                                Value *);
 302   const Expression *performSymbolicEvaluation(Value *, const BasicBlock *);
 303   const Expression *performSymbolicLoadEvaluation(Instruction *,
 304                                                   const BasicBlock *);
 305   const Expression *performSymbolicStoreEvaluation(Instruction *,
 306                                                    const BasicBlock *);
 307   const Expression *performSymbolicCallEvaluation(Instruction *,
 308                                                   const BasicBlock *);
 309   const Expression *performSymbolicPHIEvaluation(Instruction *,
 310                                                  const BasicBlock *);
 311   bool setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To);
 312   const Expression *performSymbolicAggrValueEvaluation(Instruction *,
 313                                                        const BasicBlock *);
 314
 315   // Congruence finding.
 316   // Templated to allow them to work both on BB's and BB-edges.
 317   template <class T>
 318   Value *lookupOperandLeader(Value *, const User *, const T &) const;
 319   void performCongruenceFinding(Value *, const Expression *);
 320
 321   // Reachability handling.
 322   void updateReachableEdge(BasicBlock *, BasicBlock *);
 323   void processOutgoingEdges(TerminatorInst *, BasicBlock *);
 324   bool isOnlyReachableViaThisEdge(const BasicBlockEdge &) const;
 325   Value *findConditionEquivalence(Value *, BasicBlock *) const;
 326   MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const;
 327
 328   // Elimination.
 329   struct ValueDFS;
 330   void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
 331                                 SmallVectorImpl<ValueDFS> &);
 332
 333   bool eliminateInstructions(Function &);
 334   void replaceInstruction(Instruction *, Value *);
 335   void markInstructionForDeletion(Instruction *);
 336   void deleteInstructionsInBlock(BasicBlock *);
 337
 338   // New instruction creation.
 339   void handleNewInstruction(Instruction *){};
 340
 341   // Various instruction touch utilities
 342   void markUsersTouched(Value *);
 343   void markMemoryUsersTouched(MemoryAccess *);
 344   void markLeaderChangeTouched(CongruenceClass *CC);
 345
 346   // Utilities.
 347   void cleanupTables();
 348   std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
 349   void updateProcessedCount(Value *V);
 350   void verifyMemoryCongruency();
 351 };
 352
 353 char NewGVN::ID = 0;
 354
 355 // createGVNPass - The public interface to this file.
 356 FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); }
 357
 358 template <typename T>
 359 static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) {
 360   if ((!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS)) ||
 361       !LHS.BasicExpression::equals(RHS)) {
 362     return false;
 363   } else if (const auto *L = dyn_cast<LoadExpression>(&RHS)) {
 364     if (LHS.getDefiningAccess() != L->getDefiningAccess())
 365       return false;
 366   } else if (const auto *S = dyn_cast<StoreExpression>(&RHS)) {
 367     if (LHS.getDefiningAccess() != S->getDefiningAccess())
 368       return false;
 369   }
 370   return true;
 371 }
 372
 373 bool LoadExpression::equals(const Expression &Other) const {
 374   return equalsLoadStoreHelper(*this, Other);
 375 }
 376
 377 bool StoreExpression::equals(const Expression &Other) const {
 378   return equalsLoadStoreHelper(*this, Other);
 379 }
 380
 381 #ifndef NDEBUG
 382 static std::string getBlockName(const BasicBlock *B) {
 383   return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
 384 }
 385 #endif
 386
 387 INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false)
 388 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 389 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 390 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 391 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 392 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 393 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 394 INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
 395
 396 PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
 397   BasicBlock *PHIBlock = I->getParent();
 398   auto *PN = cast<PHINode>(I);
 399   auto *E =
 400       new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
 401
 402   E->allocateOperands(ArgRecycler, ExpressionAllocator);
 403   E->setType(I->getType());
 404   E->setOpcode(I->getOpcode());
 405
 406   auto ReachablePhiArg = [&](const Use &U) {
 407     return ReachableBlocks.count(PN->getIncomingBlock(U));
 408   };
 409
 410   // Filter out unreachable operands
 411   auto Filtered = make_filter_range(PN->operands(), ReachablePhiArg);
 412
 413   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
 414                  [&](const Use &U) -> Value * {
 415                    // Don't try to transform self-defined phis.
 416                    if (U == PN)
 417                      return PN;
 418                    const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock);
 419                    return lookupOperandLeader(U, I, BBE);
 420                  });
 421   return E;
 422 }
 423
 424 // Set basic expression info (Arguments, type, opcode) for Expression
 425 // E from Instruction I in block B.
 426 bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
 427                                     const BasicBlock *B) {
 428   bool AllConstant = true;
 429   if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
 430     E->setType(GEP->getSourceElementType());
 431   else
 432     E->setType(I->getType());
 433   E->setOpcode(I->getOpcode());
 434   E->allocateOperands(ArgRecycler, ExpressionAllocator);
 435
 436   // Transform the operand array into an operand leader array, and keep track of
 437   // whether all members are constant.
 438   std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
 439     auto Operand = lookupOperandLeader(O, I, B);
 440     AllConstant &= isa<Constant>(Operand);
 441     return Operand;
 442   });
 443
 444   return AllConstant;
 445 }
 446
 447 const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
 448                                                  Value *Arg1, Value *Arg2,
 449                                                  const BasicBlock *B) {
 450   auto *E = new (ExpressionAllocator) BasicExpression(2);
 451
 452   E->setType(T);
 453   E->setOpcode(Opcode);
 454   E->allocateOperands(ArgRecycler, ExpressionAllocator);
 455   if (Instruction::isCommutative(Opcode)) {
 456     // Ensure that commutative instructions that only differ by a permutation
 457     // of their operands get the same value number by sorting the operand value
 458     // numbers.  Since all commutative instructions have two operands it is more
 459     // efficient to sort by hand rather than using, say, std::sort.
 460     if (Arg1 > Arg2)
 461       std::swap(Arg1, Arg2);
 462   }
 463   E->op_push_back(lookupOperandLeader(Arg1, nullptr, B));
 464   E->op_push_back(lookupOperandLeader(Arg2, nullptr, B));
 465
 466   Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI,
 467                            DT, AC);
 468   if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
 469     return SimplifiedE;
 470   return E;
 471 }
 472
 473 // Take a Value returned by simplification of Expression E/Instruction
 474 // I, and see if it resulted in a simpler expression. If so, return
 475 // that expression.
 476 // TODO: Once finished, this should not take an Instruction, we only
 477 // use it for printing.
 478 const Expression *NewGVN::checkSimplificationResults(Expression *E,
 479                                                      Instruction *I, Value *V) {
 480   if (!V)
 481     return nullptr;
 482   if (auto *C = dyn_cast<Constant>(V)) {
 483     if (I)
 484       DEBUG(dbgs() << "Simplified " << *I << " to "
 485                    << " constant " << *C << "\n");
 486     NumGVNOpsSimplified++;
 487     assert(isa<BasicExpression>(E) &&
 488            "We should always have had a basic expression here");
 489
 490     cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
 491     ExpressionAllocator.Deallocate(E);
 492     return createConstantExpression(C);
 493   } else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
 494     if (I)
 495       DEBUG(dbgs() << "Simplified " << *I << " to "
 496                    << " variable " << *V << "\n");
 497     cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
 498     ExpressionAllocator.Deallocate(E);
 499     return createVariableExpression(V);
 500   }
 501
 502   CongruenceClass *CC = ValueToClass.lookup(V);
 503   if (CC && CC->DefiningExpr) {
 504     if (I)
 505       DEBUG(dbgs() << "Simplified " << *I << " to "
 506                    << " expression " << *V << "\n");
 507     NumGVNOpsSimplified++;
 508     assert(isa<BasicExpression>(E) &&
 509            "We should always have had a basic expression here");
 510     cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
 511     ExpressionAllocator.Deallocate(E);
 512     return CC->DefiningExpr;
 513   }
 514   return nullptr;
 515 }
 516
 517 const Expression *NewGVN::createExpression(Instruction *I,
 518                                            const BasicBlock *B) {
 519
 520   auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
 521
 522   bool AllConstant = setBasicExpressionInfo(I, E, B);
 523
 524   if (I->isCommutative()) {
 525     // Ensure that commutative instructions that only differ by a permutation
 526     // of their operands get the same value number by sorting the operand value
 527     // numbers.  Since all commutative instructions have two operands it is more
 528     // efficient to sort by hand rather than using, say, std::sort.
 529     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
 530     if (E->getOperand(0) > E->getOperand(1))
 531       E->swapOperands(0, 1);
 532   }
 533
 534   // Perform simplificaiton
 535   // TODO: Right now we only check to see if we get a constant result.
 536   // We may get a less than constant, but still better, result for
 537   // some operations.
 538   // IE
 539   //  add 0, x -> x
 540   //  and x, x -> x
 541   // We should handle this by simply rewriting the expression.
 542   if (auto *CI = dyn_cast<CmpInst>(I)) {
 543     // Sort the operand value numbers so x<y and y>x get the same value
 544     // number.
 545     CmpInst::Predicate Predicate = CI->getPredicate();
 546     if (E->getOperand(0) > E->getOperand(1)) {
 547       E->swapOperands(0, 1);
 548       Predicate = CmpInst::getSwappedPredicate(Predicate);
 549     }
 550     E->setOpcode((CI->getOpcode() << 8) | Predicate);
 551     // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands
 552     // TODO: Since we noop bitcasts, we may need to check types before
 553     // simplifying, so that we don't end up simplifying based on a wrong
 554     // type assumption. We should clean this up so we can use constants of the
 555     // wrong type
 556
 557     assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() &&
 558            "Wrong types on cmp instruction");
 559     if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
 560          E->getOperand(1)->getType() == I->getOperand(1)->getType())) {
 561       Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
 562                                  *DL, TLI, DT, AC);
 563       if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
 564         return SimplifiedE;
 565     }
 566   } else if (isa<SelectInst>(I)) {
 567     if (isa<Constant>(E->getOperand(0)) ||
 568         (E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
 569          E->getOperand(2)->getType() == I->getOperand(2)->getType())) {
 570       Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
 571                                     E->getOperand(2), *DL, TLI, DT, AC);
 572       if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
 573         return SimplifiedE;
 574     }
 575   } else if (I->isBinaryOp()) {
 576     Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
 577                              *DL, TLI, DT, AC);
 578     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
 579       return SimplifiedE;
 580   } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
 581     Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC);
 582     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
 583       return SimplifiedE;
 584   } else if (isa<GetElementPtrInst>(I)) {
 585     Value *V = SimplifyGEPInst(E->getType(),
 586                                ArrayRef<Value *>(E->op_begin(), E->op_end()),
 587                                *DL, TLI, DT, AC);
 588     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
 589       return SimplifiedE;
 590   } else if (AllConstant) {
 591     // We don't bother trying to simplify unless all of the operands
 592     // were constant.
 593     // TODO: There are a lot of Simplify*'s we could call here, if we
 594     // wanted to.  The original motivating case for this code was a
 595     // zext i1 false to i8, which we don't have an interface to
 596     // simplify (IE there is no SimplifyZExt).
 597
 598     SmallVector<Constant *, 8> C;
 599     for (Value *Arg : E->operands())
 600       C.emplace_back(cast<Constant>(Arg));
 601
 602     if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI))
 603       if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
 604         return SimplifiedE;
 605   }
 606   return E;
 607 }
 608
 609 const AggregateValueExpression *
 610 NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) {
 611   if (auto *II = dyn_cast<InsertValueInst>(I)) {
 612     auto *E = new (ExpressionAllocator)
 613         AggregateValueExpression(I->getNumOperands(), II->getNumIndices());
 614     setBasicExpressionInfo(I, E, B);
 615     E->allocateIntOperands(ExpressionAllocator);
 616     std::copy(II->idx_begin(), II->idx_end(), int_op_inserter(E));
 617     return E;
 618   } else if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
 619     auto *E = new (ExpressionAllocator)
 620         AggregateValueExpression(I->getNumOperands(), EI->getNumIndices());
 621     setBasicExpressionInfo(EI, E, B);
 622     E->allocateIntOperands(ExpressionAllocator);
 623     std::copy(EI->idx_begin(), EI->idx_end(), int_op_inserter(E));
 624     return E;
 625   }
 626   llvm_unreachable("Unhandled type of aggregate value operation");
 627 }
 628
 629 const VariableExpression *NewGVN::createVariableExpression(Value *V) {
 630   auto *E = new (ExpressionAllocator) VariableExpression(V);
 631   E->setOpcode(V->getValueID());
 632   return E;
 633 }
 634
 635 const Expression *NewGVN::createVariableOrConstant(Value *V,
 636                                                    const BasicBlock *B) {
 637   auto Leader = lookupOperandLeader(V, nullptr, B);
 638   if (auto *C = dyn_cast<Constant>(Leader))
 639     return createConstantExpression(C);
 640   return createVariableExpression(Leader);
 641 }
 642
 643 const ConstantExpression *NewGVN::createConstantExpression(Constant *C) {
 644   auto *E = new (ExpressionAllocator) ConstantExpression(C);
 645   E->setOpcode(C->getValueID());
 646   return E;
 647 }
 648
 649 const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) {
 650   auto *E = new (ExpressionAllocator) UnknownExpression(I);
 651   E->setOpcode(I->getOpcode());
 652   return E;
 653 }
 654
 655 const CallExpression *NewGVN::createCallExpression(CallInst *CI,
 656                                                    MemoryAccess *HV,
 657                                                    const BasicBlock *B) {
 658   // FIXME: Add operand bundles for calls.
 659   auto *E =
 660       new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV);
 661   setBasicExpressionInfo(CI, E, B);
 662   return E;
 663 }
 664
 665 // See if we have a congruence class and leader for this operand, and if so,
 666 // return it. Otherwise, return the operand itself.
 667 template <class T>
 668 Value *NewGVN::lookupOperandLeader(Value *V, const User *U, const T &B) const {
 669   CongruenceClass *CC = ValueToClass.lookup(V);
 670   if (CC && (CC != InitialClass))
 671     return CC->RepLeader;
 672   return V;
 673 }
 674
 675 MemoryAccess *NewGVN::lookupMemoryAccessEquiv(MemoryAccess *MA) const {
 676   MemoryAccess *Result = MemoryAccessEquiv.lookup(MA);
 677   return Result ? Result : MA;
 678 }
 679
 680 LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
 681                                              LoadInst *LI, MemoryAccess *DA,
 682                                              const BasicBlock *B) {
 683   auto *E = new (ExpressionAllocator) LoadExpression(1, LI, DA);
 684   E->allocateOperands(ArgRecycler, ExpressionAllocator);
 685   E->setType(LoadType);
 686
 687   // Give store and loads same opcode so they value number together.
 688   E->setOpcode(0);
 689   E->op_push_back(lookupOperandLeader(PointerOp, LI, B));
 690   if (LI)
 691     E->setAlignment(LI->getAlignment());
 692
 693   // TODO: Value number heap versions. We may be able to discover
 694   // things alias analysis can't on it's own (IE that a store and a
 695   // load have the same value, and thus, it isn't clobbering the load).
 696   return E;
 697 }
 698
 699 const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
 700                                                      MemoryAccess *DA,
 701                                                      const BasicBlock *B) {
 702   auto *E =
 703       new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA);
 704   E->allocateOperands(ArgRecycler, ExpressionAllocator);
 705   E->setType(SI->getValueOperand()->getType());
 706
 707   // Give store and loads same opcode so they value number together.
 708   E->setOpcode(0);
 709   E->op_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B));
 710
 711   // TODO: Value number heap versions. We may be able to discover
 712   // things alias analysis can't on it's own (IE that a store and a
 713   // load have the same value, and thus, it isn't clobbering the load).
 714   return E;
 715 }
 716
 717 // Utility function to check whether the congruence class has a member other
 718 // than the given instruction.
 719 bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) {
 720   // Either it has more than one member, in which case it must contain something
 721   // other than us (because it's indexed by value), or if it only has one member
 722   // right now, that member should not be us.
 723   return CC->Members.size() > 1 || CC->Members.count(I) == 0;
 724 }
 725
 726 const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
 727                                                          const BasicBlock *B) {
 728   // Unlike loads, we never try to eliminate stores, so we do not check if they
 729   // are simple and avoid value numbering them.
 730   auto *SI = cast<StoreInst>(I);
 731   MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
 732   // See if we are defined by a previous store expression, it already has a
 733   // value, and it's the same value as our current store. FIXME: Right now, we
 734   // only do this for simple stores, we should expand to cover memcpys, etc.
 735   if (SI->isSimple()) {
 736     // Get the expression, if any, for the RHS of the MemoryDef.
 737     MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
 738         cast<MemoryDef>(StoreAccess)->getDefiningAccess());
 739     const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
 740     CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
 741     // Basically, check if the congruence class the store is in is defined by a
 742     // store that isn't us, and has the same value.  MemorySSA takes care of
 743     // ensuring the store has the same memory state as us already.
 744     if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
 745         CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) &&
 746         hasMemberOtherThanUs(CC, I))
 747       return createStoreExpression(SI, StoreRHS, B);
 748   }
 749
 750   return createStoreExpression(SI, StoreAccess, B);
 751 }
 752
 753 const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
 754                                                         const BasicBlock *B) {
 755   auto *LI = cast<LoadInst>(I);
 756
 757   // We can eliminate in favor of non-simple loads, but we won't be able to
 758   // eliminate the loads themselves.
 759   if (!LI->isSimple())
 760     return nullptr;
 761
 762   Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand(), I, B);
 763   // Load of undef is undef.
 764   if (isa<UndefValue>(LoadAddressLeader))
 765     return createConstantExpression(UndefValue::get(LI->getType()));
 766
 767   MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
 768
 769   if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
 770     if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
 771       Instruction *DefiningInst = MD->getMemoryInst();
 772       // If the defining instruction is not reachable, replace with undef.
 773       if (!ReachableBlocks.count(DefiningInst->getParent()))
 774         return createConstantExpression(UndefValue::get(LI->getType()));
 775     }
 776   }
 777
 778   const Expression *E =
 779       createLoadExpression(LI->getType(), LI->getPointerOperand(), LI,
 780                            lookupMemoryAccessEquiv(DefiningAccess), B);
 781   return E;
 782 }
 783
 784 // Evaluate read only and pure calls, and create an expression result.
 785 const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I,
 786                                                         const BasicBlock *B) {
 787   auto *CI = cast<CallInst>(I);
 788   if (AA->doesNotAccessMemory(CI))
 789     return createCallExpression(CI, nullptr, B);
 790   if (AA->onlyReadsMemory(CI)) {
 791     MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI);
 792     return createCallExpression(CI, lookupMemoryAccessEquiv(DefiningAccess), B);
 793   }
 794   return nullptr;
 795 }
 796
 797 // Update the memory access equivalence table to say that From is equal to To,
 798 // and return true if this is different from what already existed in the table.
 799 bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
 800   DEBUG(dbgs() << "Setting " << *From << " equivalent to ");
 801   if (!To)
 802     DEBUG(dbgs() << "itself");
 803   else
 804     DEBUG(dbgs() << *To);
 805   DEBUG(dbgs() << "\n");
 806   auto LookupResult = MemoryAccessEquiv.find(From);
 807   bool Changed = false;
 808   // If it's already in the table, see if the value changed.
 809   if (LookupResult != MemoryAccessEquiv.end()) {
 810     if (To && LookupResult->second != To) {
 811       // It wasn't equivalent before, and now it is.
 812       LookupResult->second = To;
 813       Changed = true;
 814     } else if (!To) {
 815       // It used to be equivalent to something, and now it's not.
 816       MemoryAccessEquiv.erase(LookupResult);
 817       Changed = true;
 818     }
 819   } else {
 820     assert(!To &&
 821            "Memory equivalence should never change from nothing to something");
 822   }
 823
 824   return Changed;
 825 }
 826 // Evaluate PHI nodes symbolically, and create an expression result.
 827 const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
 828                                                        const BasicBlock *B) {
 829   auto *E = cast<PHIExpression>(createPHIExpression(I));
 830   // We match the semantics of SimplifyPhiNode from InstructionSimplify here.
 831
 832   // See if all arguaments are the same.
 833   // We track if any were undef because they need special handling.
 834   bool HasUndef = false;
 835   auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
 836     if (Arg == I)
 837       return false;
 838     if (isa<UndefValue>(Arg)) {
 839       HasUndef = true;
 840       return false;
 841     }
 842     return true;
 843   });
 844   // If we are left with no operands, it's undef
 845   if (Filtered.begin() == Filtered.end()) {
 846     DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
 847                  << "\n");
 848     E->deallocateOperands(ArgRecycler);
 849     ExpressionAllocator.Deallocate(E);
 850     return createConstantExpression(UndefValue::get(I->getType()));
 851   }
 852   Value *AllSameValue = *(Filtered.begin());
 853   ++Filtered.begin();
 854   // Can't use std::equal here, sadly, because filter.begin moves.
 855   if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
 856         return V == AllSameValue;
 857       })) {
 858     // In LLVM's non-standard representation of phi nodes, it's possible to have
 859     // phi nodes with cycles (IE dependent on other phis that are .... dependent
 860     // on the original phi node), especially in weird CFG's where some arguments
 861     // are unreachable, or uninitialized along certain paths.  This can cause
 862     // infinite loops during evaluation. We work around this by not trying to
 863     // really evaluate them independently, but instead using a variable
 864     // expression to say if one is equivalent to the other.
 865     // We also special case undef, so that if we have an undef, we can't use the
 866     // common value unless it dominates the phi block.
 867     if (HasUndef) {
 868       // Only have to check for instructions
 869       if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
 870         if (!DT->dominates(AllSameInst, I))
 871           return E;
 872     }
 873
 874     NumGVNPhisAllSame++;
 875     DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
 876                  << "\n");
 877     E->deallocateOperands(ArgRecycler);
 878     ExpressionAllocator.Deallocate(E);
 879     if (auto *C = dyn_cast<Constant>(AllSameValue))
 880       return createConstantExpression(C);
 881     return createVariableExpression(AllSameValue);
 882   }
 883   return E;
 884 }
 885
 886 const Expression *
 887 NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
 888                                            const BasicBlock *B) {
 889   if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
 890     auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
 891     if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
 892       unsigned Opcode = 0;
 893       // EI might be an extract from one of our recognised intrinsics. If it
 894       // is we'll synthesize a semantically equivalent expression instead on
 895       // an extract value expression.
 896       switch (II->getIntrinsicID()) {
 897       case Intrinsic::sadd_with_overflow:
 898       case Intrinsic::uadd_with_overflow:
 899         Opcode = Instruction::Add;
 900         break;
 901       case Intrinsic::ssub_with_overflow:
 902       case Intrinsic::usub_with_overflow:
 903         Opcode = Instruction::Sub;
 904         break;
 905       case Intrinsic::smul_with_overflow:
 906       case Intrinsic::umul_with_overflow:
 907         Opcode = Instruction::Mul;
 908         break;
 909       default:
 910         break;
 911       }
 912
 913       if (Opcode != 0) {
 914         // Intrinsic recognized. Grab its args to finish building the
 915         // expression.
 916         assert(II->getNumArgOperands() == 2 &&
 917                "Expect two args for recognised intrinsics.");
 918         return createBinaryExpression(Opcode, EI->getType(),
 919                                       II->getArgOperand(0),
 920                                       II->getArgOperand(1), B);
 921       }
 922     }
 923   }
 924
 925   return createAggregateValueExpression(I, B);
 926 }
 927
 928 // Substitute and symbolize the value before value numbering.
 929 const Expression *NewGVN::performSymbolicEvaluation(Value *V,
 930                                                     const BasicBlock *B) {
 931   const Expression *E = nullptr;
 932   if (auto *C = dyn_cast<Constant>(V))
 933     E = createConstantExpression(C);
 934   else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
 935     E = createVariableExpression(V);
 936   } else {
 937     // TODO: memory intrinsics.
 938     // TODO: Some day, we should do the forward propagation and reassociation
 939     // parts of the algorithm.
 940     auto *I = cast<Instruction>(V);
 941     switch (I->getOpcode()) {
 942     case Instruction::ExtractValue:
 943     case Instruction::InsertValue:
 944       E = performSymbolicAggrValueEvaluation(I, B);
 945       break;
 946     case Instruction::PHI:
 947       E = performSymbolicPHIEvaluation(I, B);
 948       break;
 949     case Instruction::Call:
 950       E = performSymbolicCallEvaluation(I, B);
 951       break;
 952     case Instruction::Store:
 953       E = performSymbolicStoreEvaluation(I, B);
 954       break;
 955     case Instruction::Load:
 956       E = performSymbolicLoadEvaluation(I, B);
 957       break;
 958     case Instruction::BitCast: {
 959       E = createExpression(I, B);
 960     } break;
 961
 962     case Instruction::Add:
 963     case Instruction::FAdd:
 964     case Instruction::Sub:
 965     case Instruction::FSub:
 966     case Instruction::Mul:
 967     case Instruction::FMul:
 968     case Instruction::UDiv:
 969     case Instruction::SDiv:
 970     case Instruction::FDiv:
 971     case Instruction::URem:
 972     case Instruction::SRem:
 973     case Instruction::FRem:
 974     case Instruction::Shl:
 975     case Instruction::LShr:
 976     case Instruction::AShr:
 977     case Instruction::And:
 978     case Instruction::Or:
 979     case Instruction::Xor:
 980     case Instruction::ICmp:
 981     case Instruction::FCmp:
 982     case Instruction::Trunc:
 983     case Instruction::ZExt:
 984     case Instruction::SExt:
 985     case Instruction::FPToUI:
 986     case Instruction::FPToSI:
 987     case Instruction::UIToFP:
 988     case Instruction::SIToFP:
 989     case Instruction::FPTrunc:
 990     case Instruction::FPExt:
 991     case Instruction::PtrToInt:
 992     case Instruction::IntToPtr:
 993     case Instruction::Select:
 994     case Instruction::ExtractElement:
 995     case Instruction::InsertElement:
 996     case Instruction::ShuffleVector:
 997     case Instruction::GetElementPtr:
 998       E = createExpression(I, B);
 999       break;
1000     default:
1001       return nullptr;
1002     }
1003   }
1004   return E;
1005 }
1006
1007 // There is an edge from 'Src' to 'Dst'.  Return true if every path from
1008 // the entry block to 'Dst' passes via this edge.  In particular 'Dst'
1009 // must not be reachable via another edge from 'Src'.
1010 bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) const {
1011
1012   // While in theory it is interesting to consider the case in which Dst has
1013   // more than one predecessor, because Dst might be part of a loop which is
1014   // only reachable from Src, in practice it is pointless since at the time
1015   // GVN runs all such loops have preheaders, which means that Dst will have
1016   // been changed to have only one predecessor, namely Src.
1017   const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
1018   const BasicBlock *Src = E.getStart();
1019   assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
1020   (void)Src;
1021   return Pred != nullptr;
1022 }
1023
1024 void NewGVN::markUsersTouched(Value *V) {
1025   // Now mark the users as touched.
1026   for (auto *User : V->users()) {
1027     assert(isa<Instruction>(User) && "Use of value not within an instruction?");
1028     TouchedInstructions.set(InstrDFS[User]);
1029   }
1030 }
1031
1032 void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
1033   for (auto U : MA->users()) {
1034     if (auto *MUD = dyn_cast<MemoryUseOrDef>(U))
1035       TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]);
1036     else
1037       TouchedInstructions.set(InstrDFS[U]);
1038   }
1039 }
1040
1041 // Touch the instructions that need to be updated after a congruence class has a
1042 // leader change, and mark changed values.
1043 void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
1044   for (auto M : CC->Members) {
1045     if (auto *I = dyn_cast<Instruction>(M))
1046       TouchedInstructions.set(InstrDFS[I]);
1047     ChangedValues.insert(M);
1048   }
1049 }
1050
1051 // Perform congruence finding on a given value numbering expression.
1052 void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
1053   ValueToExpression[V] = E;
1054   // This is guaranteed to return something, since it will at least find
1055   // INITIAL.
1056
1057   CongruenceClass *VClass = ValueToClass[V];
1058   assert(VClass && "Should have found a vclass");
1059   // Dead classes should have been eliminated from the mapping.
1060   assert(!VClass->Dead && "Found a dead class");
1061
1062   CongruenceClass *EClass;
1063   if (const auto *VE = dyn_cast<VariableExpression>(E)) {
1064     EClass = ValueToClass[VE->getVariableValue()];
1065   } else {
1066     auto lookupResult = ExpressionToClass.insert({E, nullptr});
1067
1068     // If it's not in the value table, create a new congruence class.
1069     if (lookupResult.second) {
1070       CongruenceClass *NewClass = createCongruenceClass(nullptr, E);
1071       auto place = lookupResult.first;
1072       place->second = NewClass;
1073
1074       // Constants and variables should always be made the leader.
1075       if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
1076         NewClass->RepLeader = CE->getConstantValue();
1077       } else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
1078         StoreInst *SI = SE->getStoreInst();
1079         NewClass->RepLeader =
1080             lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
1081       } else {
1082         NewClass->RepLeader = V;
1083       }
1084       assert(!isa<VariableExpression>(E) &&
1085              "VariableExpression should have been handled already");
1086
1087       EClass = NewClass;
1088       DEBUG(dbgs() << "Created new congruence class for " << *V
1089                    << " using expression " << *E << " at " << NewClass->ID
1090                    << " and leader " << *(NewClass->RepLeader) << "\n");
1091       DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
1092     } else {
1093       EClass = lookupResult.first->second;
1094       if (isa<ConstantExpression>(E))
1095         assert(isa<Constant>(EClass->RepLeader) &&
1096                "Any class with a constant expression should have a "
1097                "constant leader");
1098
1099       assert(EClass && "Somehow don't have an eclass");
1100
1101       assert(!EClass->Dead && "We accidentally looked up a dead class");
1102     }
1103   }
1104   bool WasInChanged = ChangedValues.erase(V);
1105   if (VClass != EClass || WasInChanged) {
1106     DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E
1107                  << "\n");
1108
1109     if (VClass != EClass) {
1110       DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID
1111                    << "\n");
1112
1113       VClass->Members.erase(V);
1114       EClass->Members.insert(V);
1115       ValueToClass[V] = EClass;
1116       // See if we destroyed the class or need to swap leaders.
1117       if (VClass->Members.empty() && VClass != InitialClass) {
1118         if (VClass->DefiningExpr) {
1119           VClass->Dead = true;
1120           DEBUG(dbgs() << "Erasing expression " << *E << " from table\n");
1121           ExpressionToClass.erase(VClass->DefiningExpr);
1122         }
1123       } else if (VClass->RepLeader == V) {
1124         // When the leader changes, the value numbering of
1125         // everything may change due to symbolization changes, so we need to
1126         // reprocess.
1127         VClass->RepLeader = *(VClass->Members.begin());
1128         markLeaderChangeTouched(VClass);
1129       }
1130     }
1131
1132     markUsersTouched(V);
1133     if (auto *I = dyn_cast<Instruction>(V)) {
1134       if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
1135         // If this is a MemoryDef, we need to update the equivalence table. If
1136         // we determined the expression is congruent to a different memory
1137         // state, use that different memory state.  If we determined it didn't,
1138         // we update that as well.  Right now, we only support store
1139         // expressions.
1140         if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
1141             EClass->Members.size() != 1) {
1142           auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
1143           setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
1144         } else {
1145           setMemoryAccessEquivTo(MA, nullptr);
1146         }
1147         markMemoryUsersTouched(MA);
1148       }
1149     }
1150   } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
1151     // There is, sadly, one complicating thing for stores.  Stores do not
1152     // produce values, only consume them.  However, in order to make loads and
1153     // stores value number the same, we ignore the value operand of the store.
1154     // But the value operand will still be the leader of our class, and thus, it
1155     // may change.  Because the store is a use, the store will get reprocessed,
1156     // but nothing will change about it, and so nothing above will catch it
1157     // (since the class will not change).  In order to make sure everything ends
1158     // up okay, we need to recheck the leader of the class.  Since stores of
1159     // different values value number differently due to different memorydefs, we
1160     // are guaranteed the leader is always the same between stores in the same
1161     // class.
1162     DEBUG(dbgs() << "Checking store leader\n");
1163     auto ProperLeader =
1164         lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
1165     if (EClass->RepLeader != ProperLeader) {
1166       DEBUG(dbgs() << "Store leader changed, fixing\n");
1167       EClass->RepLeader = ProperLeader;
1168       markLeaderChangeTouched(EClass);
1169       markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
1170     }
1171   }
1172 }
1173
1174 // Process the fact that Edge (from, to) is reachable, including marking
1175 // any newly reachable blocks and instructions for processing.
1176 void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
1177   // Check if the Edge was reachable before.
1178   if (ReachableEdges.insert({From, To}).second) {
1179     // If this block wasn't reachable before, all instructions are touched.
1180     if (ReachableBlocks.insert(To).second) {
1181       DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n");
1182       const auto &InstRange = BlockInstRange.lookup(To);
1183       TouchedInstructions.set(InstRange.first, InstRange.second);
1184     } else {
1185       DEBUG(dbgs() << "Block " << getBlockName(To)
1186                    << " was reachable, but new edge {" << getBlockName(From)
1187                    << "," << getBlockName(To) << "} to it found\n");
1188
1189       // We've made an edge reachable to an existing block, which may
1190       // impact predicates. Otherwise, only mark the phi nodes as touched, as
1191       // they are the only thing that depend on new edges. Anything using their
1192       // values will get propagated to if necessary.
1193       if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
1194         TouchedInstructions.set(InstrDFS[MemPhi]);
1195
1196       auto BI = To->begin();
1197       while (isa<PHINode>(BI)) {
1198         TouchedInstructions.set(InstrDFS[&*BI]);
1199         ++BI;
1200       }
1201     }
1202   }
1203 }
1204
1205 // Given a predicate condition (from a switch, cmp, or whatever) and a block,
1206 // see if we know some constant value for it already.
1207 Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const {
1208   auto Result = lookupOperandLeader(Cond, nullptr, B);
1209   if (isa<Constant>(Result))
1210     return Result;
1211   return nullptr;
1212 }
1213
1214 // Process the outgoing edges of a block for reachability.
1215 void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
1216   // Evaluate reachability of terminator instruction.
1217   BranchInst *BR;
1218   if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
1219     Value *Cond = BR->getCondition();
1220     Value *CondEvaluated = findConditionEquivalence(Cond, B);
1221     if (!CondEvaluated) {
1222       if (auto *I = dyn_cast<Instruction>(Cond)) {
1223         const Expression *E = createExpression(I, B);
1224         if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
1225           CondEvaluated = CE->getConstantValue();
1226         }
1227       } else if (isa<ConstantInt>(Cond)) {
1228         CondEvaluated = Cond;
1229       }
1230     }
1231     ConstantInt *CI;
1232     BasicBlock *TrueSucc = BR->getSuccessor(0);
1233     BasicBlock *FalseSucc = BR->getSuccessor(1);
1234     if (CondEvaluated && (CI = dyn_cast<ConstantInt>(CondEvaluated))) {
1235       if (CI->isOne()) {
1236         DEBUG(dbgs() << "Condition for Terminator " << *TI
1237                      << " evaluated to true\n");
1238         updateReachableEdge(B, TrueSucc);
1239       } else if (CI->isZero()) {
1240         DEBUG(dbgs() << "Condition for Terminator " << *TI
1241                      << " evaluated to false\n");
1242         updateReachableEdge(B, FalseSucc);
1243       }
1244     } else {
1245       updateReachableEdge(B, TrueSucc);
1246       updateReachableEdge(B, FalseSucc);
1247     }
1248   } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
1249     // For switches, propagate the case values into the case
1250     // destinations.
1251
1252     // Remember how many outgoing edges there are to every successor.
1253     SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
1254
1255     Value *SwitchCond = SI->getCondition();
1256     Value *CondEvaluated = findConditionEquivalence(SwitchCond, B);
1257     // See if we were able to turn this switch statement into a constant.
1258     if (CondEvaluated && isa<ConstantInt>(CondEvaluated)) {
1259       auto *CondVal = cast<ConstantInt>(CondEvaluated);
1260       // We should be able to get case value for this.
1261       auto CaseVal = SI->findCaseValue(CondVal);
1262       if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) {
1263         // We proved the value is outside of the range of the case.
1264         // We can't do anything other than mark the default dest as reachable,
1265         // and go home.
1266         updateReachableEdge(B, SI->getDefaultDest());
1267         return;
1268       }
1269       // Now get where it goes and mark it reachable.
1270       BasicBlock *TargetBlock = CaseVal.getCaseSuccessor();
1271       updateReachableEdge(B, TargetBlock);
1272     } else {
1273       for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
1274         BasicBlock *TargetBlock = SI->getSuccessor(i);
1275         ++SwitchEdges[TargetBlock];
1276         updateReachableEdge(B, TargetBlock);
1277       }
1278     }
1279   } else {
1280     // Otherwise this is either unconditional, or a type we have no
1281     // idea about. Just mark successors as reachable.
1282     for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
1283       BasicBlock *TargetBlock = TI->getSuccessor(i);
1284       updateReachableEdge(B, TargetBlock);
1285     }
1286
1287     // This also may be a memory defining terminator, in which case, set it
1288     // equivalent to nothing.
1289     if (MemoryAccess *MA = MSSA->getMemoryAccess(TI))
1290       setMemoryAccessEquivTo(MA, nullptr);
1291   }
1292 }
1293
1294 // The algorithm initially places the values of the routine in the INITIAL
1295 // congruence
1296 // class. The leader of INITIAL is the undetermined value `TOP`.
1297 // When the algorithm has finished, values still in INITIAL are unreachable.
1298 void NewGVN::initializeCongruenceClasses(Function &F) {
1299   // FIXME now i can't remember why this is 2
1300   NextCongruenceNum = 2;
1301   // Initialize all other instructions to be in INITIAL class.
1302   CongruenceClass::MemberSet InitialValues;
1303   InitialClass = createCongruenceClass(nullptr, nullptr);
1304   for (auto &B : F) {
1305     if (auto *MP = MSSA->getMemoryAccess(&B))
1306       MemoryAccessEquiv.insert({MP, MSSA->getLiveOnEntryDef()});
1307
1308     for (auto &I : B) {
1309       InitialValues.insert(&I);
1310       ValueToClass[&I] = InitialClass;
1311       // All memory accesses are equivalent to live on entry to start. They must
1312       // be initialized to something so that initial changes are noticed. For
1313       // the maximal answer, we initialize them all to be the same as
1314       // liveOnEntry.  Note that to save time, we only initialize the
1315       // MemoryDef's for stores and all MemoryPhis to be equal.  Right now, no
1316       // other expression can generate a memory equivalence.  If we start
1317       // handling memcpy/etc, we can expand this.
1318       if (isa<StoreInst>(&I))
1319         MemoryAccessEquiv.insert(
1320             {MSSA->getMemoryAccess(&I), MSSA->getLiveOnEntryDef()});
1321     }
1322   }
1323   InitialClass->Members.swap(InitialValues);
1324
1325   // Initialize arguments to be in their own unique congruence classes
1326   for (auto &FA : F.args())
1327     createSingletonCongruenceClass(&FA);
1328 }
1329
1330 void NewGVN::cleanupTables() {
1331   for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) {
1332     DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has "
1333                  << CongruenceClasses[i]->Members.size() << " members\n");
1334     // Make sure we delete the congruence class (probably worth switching to
1335     // a unique_ptr at some point.
1336     delete CongruenceClasses[i];
1337     CongruenceClasses[i] = nullptr;
1338   }
1339
1340   ValueToClass.clear();
1341   ArgRecycler.clear(ExpressionAllocator);
1342   ExpressionAllocator.Reset();
1343   CongruenceClasses.clear();
1344   ExpressionToClass.clear();
1345   ValueToExpression.clear();
1346   ReachableBlocks.clear();
1347   ReachableEdges.clear();
1348 #ifndef NDEBUG
1349   ProcessedCount.clear();
1350 #endif
1351   DFSDomMap.clear();
1352   InstrDFS.clear();
1353   InstructionsToErase.clear();
1354
1355   DFSToInstr.clear();
1356   BlockInstRange.clear();
1357   TouchedInstructions.clear();
1358   DominatedInstRange.clear();
1359   MemoryAccessEquiv.clear();
1360 }
1361
1362 std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
1363                                                        unsigned Start) {
1364   unsigned End = Start;
1365   if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
1366     InstrDFS[MemPhi] = End++;
1367     DFSToInstr.emplace_back(MemPhi);
1368   }
1369
1370   for (auto &I : *B) {
1371     InstrDFS[&I] = End++;
1372     DFSToInstr.emplace_back(&I);
1373   }
1374
1375   // All of the range functions taken half-open ranges (open on the end side).
1376   // So we do not subtract one from count, because at this point it is one
1377   // greater than the last instruction.
1378   return std::make_pair(Start, End);
1379 }
1380
1381 void NewGVN::updateProcessedCount(Value *V) {
1382 #ifndef NDEBUG
1383   if (ProcessedCount.count(V) == 0) {
1384     ProcessedCount.insert({V, 1});
1385   } else {
1386     ProcessedCount[V] += 1;
1387     assert(ProcessedCount[V] < 100 &&
1388            "Seem to have processed the same Value a lot");
1389   }
1390 #endif
1391 }
1392 // Evaluate MemoryPhi nodes symbolically, just like PHI nodes
1393 void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
1394   // If all the arguments are the same, the MemoryPhi has the same value as the
1395   // argument.
1396   // Filter out unreachable blocks from our operands.
1397   auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
1398     return ReachableBlocks.count(MP->getIncomingBlock(U));
1399   });
1400
1401   assert(Filtered.begin() != Filtered.end() &&
1402          "We should not be processing a MemoryPhi in a completely "
1403          "unreachable block");
1404
1405   // Transform the remaining operands into operand leaders.
1406   // FIXME: mapped_iterator should have a range version.
1407   auto LookupFunc = [&](const Use &U) {
1408     return lookupMemoryAccessEquiv(cast<MemoryAccess>(U));
1409   };
1410   auto MappedBegin = map_iterator(Filtered.begin(), LookupFunc);
1411   auto MappedEnd = map_iterator(Filtered.end(), LookupFunc);
1412
1413   // and now check if all the elements are equal.
1414   // Sadly, we can't use std::equals since these are random access iterators.
1415   MemoryAccess *AllSameValue = *MappedBegin;
1416   ++MappedBegin;
1417   bool AllEqual = std::all_of(
1418       MappedBegin, MappedEnd,
1419       [&AllSameValue](const MemoryAccess *V) { return V == AllSameValue; });
1420
1421   if (AllEqual)
1422     DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue << "\n");
1423   else
1424     DEBUG(dbgs() << "Memory Phi value numbered to itself\n");
1425
1426   if (setMemoryAccessEquivTo(MP, AllEqual ? AllSameValue : nullptr))
1427     markMemoryUsersTouched(MP);
1428 }
1429
1430 // Value number a single instruction, symbolically evaluating, performing
1431 // congruence finding, and updating mappings.
1432 void NewGVN::valueNumberInstruction(Instruction *I) {
1433   DEBUG(dbgs() << "Processing instruction " << *I << "\n");
1434   if (isInstructionTriviallyDead(I, TLI)) {
1435     DEBUG(dbgs() << "Skipping unused instruction\n");
1436     markInstructionForDeletion(I);
1437     return;
1438   }
1439   if (!I->isTerminator()) {
1440     const auto *Symbolized = performSymbolicEvaluation(I, I->getParent());
1441     // If we couldn't come up with a symbolic expression, use the unknown
1442     // expression
1443     if (Symbolized == nullptr)
1444       Symbolized = createUnknownExpression(I);
1445     performCongruenceFinding(I, Symbolized);
1446   } else {
1447     // Handle terminators that return values. All of them produce values we
1448     // don't currently understand.
1449     if (!I->getType()->isVoidTy()) {
1450       auto *Symbolized = createUnknownExpression(I);
1451       performCongruenceFinding(I, Symbolized);
1452     }
1453     processOutgoingEdges(dyn_cast<TerminatorInst>(I), I->getParent());
1454   }
1455 }
1456
1457 // Verify the that the memory equivalence table makes sense relative to the
1458 // congruence classes.
1459 void NewGVN::verifyMemoryCongruency() {
1460   // Anything equivalent in the memory access table should be in the same
1461   // congruence class.
1462
1463   // Filter out the unreachable and trivially dead entries, because they may
1464   // never have been updated if the instructions were not processed.
1465   auto ReachableAccessPred =
1466       [&](const std::pair<const MemoryAccess *, MemoryAccess *> Pair) {
1467         bool Result = ReachableBlocks.count(Pair.first->getBlock());
1468         if (!Result)
1469           return false;
1470         if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
1471           return !isInstructionTriviallyDead(MemDef->getMemoryInst());
1472         return true;
1473       };
1474
1475   auto Filtered = make_filter_range(MemoryAccessEquiv, ReachableAccessPred);
1476   for (auto KV : Filtered) {
1477     assert(KV.first != KV.second &&
1478            "We added a useless equivalence to the memory equivalence table");
1479     // Unreachable instructions may not have changed because we never process
1480     // them.
1481     if (!ReachableBlocks.count(KV.first->getBlock()))
1482       continue;
1483     if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
1484       auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
1485       if (FirstMUD && SecondMUD)
1486         assert(
1487             ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
1488                 ValueToClass.lookup(SecondMUD->getMemoryInst()) &&
1489             "The instructions for these memory operations should have been in "
1490             "the same congruence class");
1491     } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
1492
1493       // We can only sanely verify that MemoryDefs in the operand list all have
1494       // the same class.
1495       auto ReachableOperandPred = [&](const Use &U) {
1496         return ReachableBlocks.count(FirstMP->getIncomingBlock(U)) &&
1497                isa<MemoryDef>(U);
1498
1499       };
1500       // All arguments should in the same class, ignoring unreachable arguments
1501       auto FilteredPhiArgs =
1502           make_filter_range(FirstMP->operands(), ReachableOperandPred);
1503       SmallVector<const CongruenceClass *, 16> PhiOpClasses;
1504       std::transform(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
1505                      std::back_inserter(PhiOpClasses), [&](const Use &U) {
1506                        const MemoryDef *MD = cast<MemoryDef>(U);
1507                        return ValueToClass.lookup(MD->getMemoryInst());
1508                      });
1509       assert(std::equal(PhiOpClasses.begin(), PhiOpClasses.end(),
1510                         PhiOpClasses.begin()) &&
1511              "All MemoryPhi arguments should be in the same class");
1512     }
1513   }
1514 }
1515
1516 // This is the main transformation entry point.
1517 bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
1518                     TargetLibraryInfo *_TLI, AliasAnalysis *_AA,
1519                     MemorySSA *_MSSA) {
1520   bool Changed = false;
1521   DT = _DT;
1522   AC = _AC;
1523   TLI = _TLI;
1524   AA = _AA;
1525   MSSA = _MSSA;
1526   DL = &F.getParent()->getDataLayout();
1527   MSSAWalker = MSSA->getWalker();
1528
1529   // Count number of instructions for sizing of hash tables, and come
1530   // up with a global dfs numbering for instructions.
1531   unsigned ICount = 1;
1532   // Add an empty instruction to account for the fact that we start at 1
1533   DFSToInstr.emplace_back(nullptr);
1534   // Note: We want RPO traversal of the blocks, which is not quite the same as
1535   // dominator tree order, particularly with regard whether backedges get
1536   // visited first or second, given a block with multiple successors.
1537   // If we visit in the wrong order, we will end up performing N times as many
1538   // iterations.
1539   // The dominator tree does guarantee that, for a given dom tree node, it's
1540   // parent must occur before it in the RPO ordering. Thus, we only need to sort
1541   // the siblings.
1542   DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
1543   ReversePostOrderTraversal<Function *> RPOT(&F);
1544   unsigned Counter = 0;
1545   for (auto &B : RPOT) {
1546     auto *Node = DT->getNode(B);
1547     assert(Node && "RPO and Dominator tree should have same reachability");
1548     RPOOrdering[Node] = ++Counter;
1549   }
1550   // Sort dominator tree children arrays into RPO.
1551   for (auto &B : RPOT) {
1552     auto *Node = DT->getNode(B);
1553     if (Node->getChildren().size() > 1)
1554       std::sort(Node->begin(), Node->end(),
1555                 [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) {
1556                   return RPOOrdering[A] < RPOOrdering[B];
1557                 });
1558   }
1559
1560   // Now a standard depth first ordering of the domtree is equivalent to RPO.
1561   auto DFI = df_begin(DT->getRootNode());
1562   for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) {
1563     BasicBlock *B = DFI->getBlock();
1564     const auto &BlockRange = assignDFSNumbers(B, ICount);
1565     BlockInstRange.insert({B, BlockRange});
1566     ICount += BlockRange.second - BlockRange.first;
1567   }
1568
1569   // Handle forward unreachable blocks and figure out which blocks
1570   // have single preds.
1571   for (auto &B : F) {
1572     // Assign numbers to unreachable blocks.
1573     if (!DFI.nodeVisited(DT->getNode(&B))) {
1574       const auto &BlockRange = assignDFSNumbers(&B, ICount);
1575       BlockInstRange.insert({&B, BlockRange});
1576       ICount += BlockRange.second - BlockRange.first;
1577     }
1578   }
1579
1580   TouchedInstructions.resize(ICount);
1581   DominatedInstRange.reserve(F.size());
1582   // Ensure we don't end up resizing the expressionToClass map, as
1583   // that can be quite expensive. At most, we have one expression per
1584   // instruction.
1585   ExpressionToClass.reserve(ICount);
1586
1587   // Initialize the touched instructions to include the entry block.
1588   const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
1589   TouchedInstructions.set(InstRange.first, InstRange.second);
1590   ReachableBlocks.insert(&F.getEntryBlock());
1591
1592   initializeCongruenceClasses(F);
1593
1594   unsigned int Iterations = 0;
1595   // We start out in the entry block.
1596   BasicBlock *LastBlock = &F.getEntryBlock();
1597   while (TouchedInstructions.any()) {
1598     ++Iterations;
1599     // Walk through all the instructions in all the blocks in RPO.
1600     for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
1601          InstrNum = TouchedInstructions.find_next(InstrNum)) {
1602       assert(InstrNum != 0 && "Bit 0 should never be set, something touched an "
1603                               "instruction not in the lookup table");
1604       Value *V = DFSToInstr[InstrNum];
1605       BasicBlock *CurrBlock = nullptr;
1606
1607       if (auto *I = dyn_cast<Instruction>(V))
1608         CurrBlock = I->getParent();
1609       else if (auto *MP = dyn_cast<MemoryPhi>(V))
1610         CurrBlock = MP->getBlock();
1611       else
1612         llvm_unreachable("DFSToInstr gave us an unknown type of instruction");
1613
1614       // If we hit a new block, do reachability processing.
1615       if (CurrBlock != LastBlock) {
1616         LastBlock = CurrBlock;
1617         bool BlockReachable = ReachableBlocks.count(CurrBlock);
1618         const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock);
1619
1620         // If it's not reachable, erase any touched instructions and move on.
1621         if (!BlockReachable) {
1622           TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second);
1623           DEBUG(dbgs() << "Skipping instructions in block "
1624                        << getBlockName(CurrBlock)
1625                        << " because it is unreachable\n");
1626           continue;
1627         }
1628         updateProcessedCount(CurrBlock);
1629       }
1630
1631       if (auto *MP = dyn_cast<MemoryPhi>(V)) {
1632         DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
1633         valueNumberMemoryPhi(MP);
1634       } else if (auto *I = dyn_cast<Instruction>(V)) {
1635         valueNumberInstruction(I);
1636       } else {
1637         llvm_unreachable("Should have been a MemoryPhi or Instruction");
1638       }
1639       updateProcessedCount(V);
1640       // Reset after processing (because we may mark ourselves as touched when
1641       // we propagate equalities).
1642       TouchedInstructions.reset(InstrNum);
1643     }
1644   }
1645   NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
1646 #ifndef NDEBUG
1647   verifyMemoryCongruency();
1648 #endif
1649   Changed |= eliminateInstructions(F);
1650
1651   // Delete all instructions marked for deletion.
1652   for (Instruction *ToErase : InstructionsToErase) {
1653     if (!ToErase->use_empty())
1654       ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
1655
1656     ToErase->eraseFromParent();
1657   }
1658
1659   // Delete all unreachable blocks.
1660   auto UnreachableBlockPred = [&](const BasicBlock &BB) {
1661     return !ReachableBlocks.count(&BB);
1662   };
1663
1664   for (auto &BB : make_filter_range(F, UnreachableBlockPred)) {
1665     DEBUG(dbgs() << "We believe block " << getBlockName(&BB)
1666                  << " is unreachable\n");
1667     deleteInstructionsInBlock(&BB);
1668     Changed = true;
1669   }
1670
1671   cleanupTables();
1672   return Changed;
1673 }
1674
1675 bool NewGVN::runOnFunction(Function &F) {
1676   if (skipFunction(F))
1677     return false;
1678   return runGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
1679                 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
1680                 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
1681                 &getAnalysis<AAResultsWrapperPass>().getAAResults(),
1682                 &getAnalysis<MemorySSAWrapperPass>().getMSSA());
1683 }
1684
1685 PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
1686   NewGVN Impl;
1687
1688   // Apparently the order in which we get these results matter for
1689   // the old GVN (see Chandler's comment in GVN.cpp). I'll keep
1690   // the same order here, just in case.
1691   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1692   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1693   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1694   auto &AA = AM.getResult<AAManager>(F);
1695   auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
1696   bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA);
1697   if (!Changed)
1698     return PreservedAnalyses::all();
1699   PreservedAnalyses PA;
1700   PA.preserve<DominatorTreeAnalysis>();
1701   PA.preserve<GlobalsAA>();
1702   return PA;
1703 }
1704
1705 // Return true if V is a value that will always be available (IE can
1706 // be placed anywhere) in the function.  We don't do globals here
1707 // because they are often worse to put in place.
1708 // TODO: Separate cost from availability
1709 static bool alwaysAvailable(Value *V) {
1710   return isa<Constant>(V) || isa<Argument>(V);
1711 }
1712
1713 // Get the basic block from an instruction/value.
1714 static BasicBlock *getBlockForValue(Value *V) {
1715   if (auto *I = dyn_cast<Instruction>(V))
1716     return I->getParent();
1717   return nullptr;
1718 }
1719
1720 struct NewGVN::ValueDFS {
1721   int DFSIn = 0;
1722   int DFSOut = 0;
1723   int LocalNum = 0;
1724   // Only one of these will be set.
1725   Value *Val = nullptr;
1726   Use *U = nullptr;
1727
1728   bool operator<(const ValueDFS &Other) const {
1729     // It's not enough that any given field be less than - we have sets
1730     // of fields that need to be evaluated together to give a proper ordering.
1731     // For example, if you have;
1732     // DFS (1, 3)
1733     // Val 0
1734     // DFS (1, 2)
1735     // Val 50
1736     // We want the second to be less than the first, but if we just go field
1737     // by field, we will get to Val 0 < Val 50 and say the first is less than
1738     // the second. We only want it to be less than if the DFS orders are equal.
1739     //
1740     // Each LLVM instruction only produces one value, and thus the lowest-level
1741     // differentiator that really matters for the stack (and what we use as as a
1742     // replacement) is the local dfs number.
1743     // Everything else in the structure is instruction level, and only affects
1744     // the order in which we will replace operands of a given instruction.
1745     //
1746     // For a given instruction (IE things with equal dfsin, dfsout, localnum),
1747     // the order of replacement of uses does not matter.
1748     // IE given,
1749     //  a = 5
1750     //  b = a + a
1751     // When you hit b, you will have two valuedfs with the same dfsin, out, and
1752     // localnum.
1753     // The .val will be the same as well.
1754     // The .u's will be different.
1755     // You will replace both, and it does not matter what order you replace them
1756     // in (IE whether you replace operand 2, then operand 1, or operand 1, then
1757     // operand 2).
1758     // Similarly for the case of same dfsin, dfsout, localnum, but different
1759     // .val's
1760     //  a = 5
1761     //  b  = 6
1762     //  c = a + b
1763     // in c, we will a valuedfs for a, and one for b,with everything the same
1764     // but .val  and .u.
1765     // It does not matter what order we replace these operands in.
1766     // You will always end up with the same IR, and this is guaranteed.
1767     return std::tie(DFSIn, DFSOut, LocalNum, Val, U) <
1768            std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val,
1769                     Other.U);
1770   }
1771 };
1772
1773 void NewGVN::convertDenseToDFSOrdered(
1774     CongruenceClass::MemberSet &Dense,
1775     SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
1776   for (auto D : Dense) {
1777     // First add the value.
1778     BasicBlock *BB = getBlockForValue(D);
1779     // Constants are handled prior to ever calling this function, so
1780     // we should only be left with instructions as members.
1781     assert(BB && "Should have figured out a basic block for value");
1782     ValueDFS VD;
1783
1784     std::pair<int, int> DFSPair = DFSDomMap[BB];
1785     assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair");
1786     VD.DFSIn = DFSPair.first;
1787     VD.DFSOut = DFSPair.second;
1788     VD.Val = D;
1789     // If it's an instruction, use the real local dfs number.
1790     if (auto *I = dyn_cast<Instruction>(D))
1791       VD.LocalNum = InstrDFS[I];
1792     else
1793       llvm_unreachable("Should have been an instruction");
1794
1795     DFSOrderedSet.emplace_back(VD);
1796
1797     // Now add the users.
1798     for (auto &U : D->uses()) {
1799       if (auto *I = dyn_cast<Instruction>(U.getUser())) {
1800         ValueDFS VD;
1801         // Put the phi node uses in the incoming block.
1802         BasicBlock *IBlock;
1803         if (auto *P = dyn_cast<PHINode>(I)) {
1804           IBlock = P->getIncomingBlock(U);
1805           // Make phi node users appear last in the incoming block
1806           // they are from.
1807           VD.LocalNum = InstrDFS.size() + 1;
1808         } else {
1809           IBlock = I->getParent();
1810           VD.LocalNum = InstrDFS[I];
1811         }
1812         std::pair<int, int> DFSPair = DFSDomMap[IBlock];
1813         VD.DFSIn = DFSPair.first;
1814         VD.DFSOut = DFSPair.second;
1815         VD.U = &U;
1816         DFSOrderedSet.emplace_back(VD);
1817       }
1818     }
1819   }
1820 }
1821
1822 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
1823   // Patch the replacement so that it is not more restrictive than the value
1824   // being replaced.
1825   auto *Op = dyn_cast<BinaryOperator>(I);
1826   auto *ReplOp = dyn_cast<BinaryOperator>(Repl);
1827
1828   if (Op && ReplOp)
1829     ReplOp->andIRFlags(Op);
1830
1831   if (auto *ReplInst = dyn_cast<Instruction>(Repl)) {
1832     // FIXME: If both the original and replacement value are part of the
1833     // same control-flow region (meaning that the execution of one
1834     // guarentees the executation of the other), then we can combine the
1835     // noalias scopes here and do better than the general conservative
1836     // answer used in combineMetadata().
1837
1838     // In general, GVN unifies expressions over different control-flow
1839     // regions, and so we need a conservative combination of the noalias
1840     // scopes.
1841     unsigned KnownIDs[] = {
1842         LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
1843         LLVMContext::MD_noalias,        LLVMContext::MD_range,
1844         LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
1845         LLVMContext::MD_invariant_group};
1846     combineMetadata(ReplInst, I, KnownIDs);
1847   }
1848 }
1849
1850 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
1851   patchReplacementInstruction(I, Repl);
1852   I->replaceAllUsesWith(Repl);
1853 }
1854
1855 void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
1856   DEBUG(dbgs() << "  BasicBlock Dead:" << *BB);
1857   ++NumGVNBlocksDeleted;
1858
1859   // Check to see if there are non-terminating instructions to delete.
1860   if (isa<TerminatorInst>(BB->begin()))
1861     return;
1862
1863   // Delete the instructions backwards, as it has a reduced likelihood of having
1864   // to update as many def-use and use-def chains. Start after the terminator.
1865   auto StartPoint = BB->rbegin();
1866   ++StartPoint;
1867   // Note that we explicitly recalculate BB->rend() on each iteration,
1868   // as it may change when we remove the first instruction.
1869   for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend();) {
1870     Instruction &Inst = *I++;
1871     if (!Inst.use_empty())
1872       Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
1873     if (isa<LandingPadInst>(Inst))
1874       continue;
1875
1876     Inst.eraseFromParent();
1877     ++NumGVNInstrDeleted;
1878   }
1879 }
1880
1881 void NewGVN::markInstructionForDeletion(Instruction *I) {
1882   DEBUG(dbgs() << "Marking " << *I << " for deletion\n");
1883   InstructionsToErase.insert(I);
1884 }
1885
1886 void NewGVN::replaceInstruction(Instruction *I, Value *V) {
1887
1888   DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n");
1889   patchAndReplaceAllUsesWith(I, V);
1890   // We save the actual erasing to avoid invalidating memory
1891   // dependencies until we are done with everything.
1892   markInstructionForDeletion(I);
1893 }
1894
1895 namespace {
1896
1897 // This is a stack that contains both the value and dfs info of where
1898 // that value is valid.
1899 class ValueDFSStack {
1900 public:
1901   Value *back() const { return ValueStack.back(); }
1902   std::pair<int, int> dfs_back() const { return DFSStack.back(); }
1903
1904   void push_back(Value *V, int DFSIn, int DFSOut) {
1905     ValueStack.emplace_back(V);
1906     DFSStack.emplace_back(DFSIn, DFSOut);
1907   }
1908   bool empty() const { return DFSStack.empty(); }
1909   bool isInScope(int DFSIn, int DFSOut) const {
1910     if (empty())
1911       return false;
1912     return DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second;
1913   }
1914
1915   void popUntilDFSScope(int DFSIn, int DFSOut) {
1916
1917     // These two should always be in sync at this point.
1918     assert(ValueStack.size() == DFSStack.size() &&
1919            "Mismatch between ValueStack and DFSStack");
1920     while (
1921         !DFSStack.empty() &&
1922         !(DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second)) {
1923       DFSStack.pop_back();
1924       ValueStack.pop_back();
1925     }
1926   }
1927
1928 private:
1929   SmallVector<Value *, 8> ValueStack;
1930   SmallVector<std::pair<int, int>, 8> DFSStack;
1931 };
1932 }
1933
1934 bool NewGVN::eliminateInstructions(Function &F) {
1935   // This is a non-standard eliminator. The normal way to eliminate is
1936   // to walk the dominator tree in order, keeping track of available
1937   // values, and eliminating them.  However, this is mildly
1938   // pointless. It requires doing lookups on every instruction,
1939   // regardless of whether we will ever eliminate it.  For
1940   // instructions part of most singleton congruence classes, we know we
1941   // will never eliminate them.
1942
1943   // Instead, this eliminator looks at the congruence classes directly, sorts
1944   // them into a DFS ordering of the dominator tree, and then we just
1945   // perform elimination straight on the sets by walking the congruence
1946   // class member uses in order, and eliminate the ones dominated by the
1947   // last member.   This is worst case O(E log E) where E = number of
1948   // instructions in a single congruence class.  In theory, this is all
1949   // instructions.   In practice, it is much faster, as most instructions are
1950   // either in singleton congruence classes or can't possibly be eliminated
1951   // anyway (if there are no overlapping DFS ranges in class).
1952   // When we find something not dominated, it becomes the new leader
1953   // for elimination purposes.
1954   // TODO: If we wanted to be faster, We could remove any members with no
1955   // overlapping ranges while sorting, as we will never eliminate anything
1956   // with those members, as they don't dominate anything else in our set.
1957
1958   bool AnythingReplaced = false;
1959
1960   // Since we are going to walk the domtree anyway, and we can't guarantee the
1961   // DFS numbers are updated, we compute some ourselves.
1962   DT->updateDFSNumbers();
1963
1964   for (auto &B : F) {
1965     if (!ReachableBlocks.count(&B)) {
1966       for (const auto S : successors(&B)) {
1967         for (auto II = S->begin(); isa<PHINode>(II); ++II) {
1968           auto &Phi = cast<PHINode>(*II);
1969           DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
1970                        << getBlockName(&B)
1971                        << " with undef due to it being unreachable\n");
1972           for (auto &Operand : Phi.incoming_values())
1973             if (Phi.getIncomingBlock(Operand) == &B)
1974               Operand.set(UndefValue::get(Phi.getType()));
1975         }
1976       }
1977     }
1978     DomTreeNode *Node = DT->getNode(&B);
1979     if (Node)
1980       DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()};
1981   }
1982
1983   for (CongruenceClass *CC : CongruenceClasses) {
1984     // FIXME: We should eventually be able to replace everything still
1985     // in the initial class with undef, as they should be unreachable.
1986     // Right now, initial still contains some things we skip value
1987     // numbering of (UNREACHABLE's, for example).
1988     if (CC == InitialClass || CC->Dead)
1989       continue;
1990     assert(CC->RepLeader && "We should have had a leader");
1991
1992     // If this is a leader that is always available, and it's a
1993     // constant or has no equivalences, just replace everything with
1994     // it. We then update the congruence class with whatever members
1995     // are left.
1996     if (alwaysAvailable(CC->RepLeader)) {
1997       SmallPtrSet<Value *, 4> MembersLeft;
1998       for (auto M : CC->Members) {
1999
2000         Value *Member = M;
2001
2002         // Void things have no uses we can replace.
2003         if (Member == CC->RepLeader || Member->getType()->isVoidTy()) {
2004           MembersLeft.insert(Member);
2005           continue;
2006         }
2007
2008         DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for "
2009                      << *Member << "\n");
2010         // Due to equality propagation, these may not always be
2011         // instructions, they may be real values.  We don't really
2012         // care about trying to replace the non-instructions.
2013         if (auto *I = dyn_cast<Instruction>(Member)) {
2014           assert(CC->RepLeader != I &&
2015                  "About to accidentally remove our leader");
2016           replaceInstruction(I, CC->RepLeader);
2017           AnythingReplaced = true;
2018
2019           continue;
2020         } else {
2021           MembersLeft.insert(I);
2022         }
2023       }
2024       CC->Members.swap(MembersLeft);
2025
2026     } else {
2027       DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n");
2028       // If this is a singleton, we can skip it.
2029       if (CC->Members.size() != 1) {
2030
2031         // This is a stack because equality replacement/etc may place
2032         // constants in the middle of the member list, and we want to use
2033         // those constant values in preference to the current leader, over
2034         // the scope of those constants.
2035         ValueDFSStack EliminationStack;
2036
2037         // Convert the members to DFS ordered sets and then merge them.
2038         SmallVector<ValueDFS, 8> DFSOrderedSet;
2039         convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
2040
2041         // Sort the whole thing.
2042         std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
2043
2044         for (auto &VD : DFSOrderedSet) {
2045           int MemberDFSIn = VD.DFSIn;
2046           int MemberDFSOut = VD.DFSOut;
2047           Value *Member = VD.Val;
2048           Use *MemberUse = VD.U;
2049
2050           if (Member) {
2051             // We ignore void things because we can't get a value from them.
2052             // FIXME: We could actually use this to kill dead stores that are
2053             // dominated by equivalent earlier stores.
2054             if (Member->getType()->isVoidTy())
2055               continue;
2056           }
2057
2058           if (EliminationStack.empty()) {
2059             DEBUG(dbgs() << "Elimination Stack is empty\n");
2060           } else {
2061             DEBUG(dbgs() << "Elimination Stack Top DFS numbers are ("
2062                          << EliminationStack.dfs_back().first << ","
2063                          << EliminationStack.dfs_back().second << ")\n");
2064           }
2065
2066           DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << ","
2067                        << MemberDFSOut << ")\n");
2068           // First, we see if we are out of scope or empty.  If so,
2069           // and there equivalences, we try to replace the top of
2070           // stack with equivalences (if it's on the stack, it must
2071           // not have been eliminated yet).
2072           // Then we synchronize to our current scope, by
2073           // popping until we are back within a DFS scope that
2074           // dominates the current member.
2075           // Then, what happens depends on a few factors
2076           // If the stack is now empty, we need to push
2077           // If we have a constant or a local equivalence we want to
2078           // start using, we also push.
2079           // Otherwise, we walk along, processing members who are
2080           // dominated by this scope, and eliminate them.
2081           bool ShouldPush =
2082               Member && (EliminationStack.empty() || isa<Constant>(Member));
2083           bool OutOfScope =
2084               !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut);
2085
2086           if (OutOfScope || ShouldPush) {
2087             // Sync to our current scope.
2088             EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
2089             ShouldPush |= Member && EliminationStack.empty();
2090             if (ShouldPush) {
2091               EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
2092             }
2093           }
2094
2095           // If we get to this point, and the stack is empty we must have a use
2096           // with nothing we can use to eliminate it, just skip it.
2097           if (EliminationStack.empty())
2098             continue;
2099
2100           // Skip the Value's, we only want to eliminate on their uses.
2101           if (Member)
2102             continue;
2103           Value *Result = EliminationStack.back();
2104
2105           // Don't replace our existing users with ourselves.
2106           if (MemberUse->get() == Result)
2107             continue;
2108
2109           DEBUG(dbgs() << "Found replacement " << *Result << " for "
2110                        << *MemberUse->get() << " in " << *(MemberUse->getUser())
2111                        << "\n");
2112
2113           // If we replaced something in an instruction, handle the patching of
2114           // metadata.
2115           if (auto *ReplacedInst = dyn_cast<Instruction>(MemberUse->get()))
2116             patchReplacementInstruction(ReplacedInst, Result);
2117
2118           assert(isa<Instruction>(MemberUse->getUser()));
2119           MemberUse->set(Result);
2120           AnythingReplaced = true;
2121         }
2122       }
2123     }
2124
2125     // Cleanup the congruence class.
2126     SmallPtrSet<Value *, 4> MembersLeft;
2127     for (Value *Member : CC->Members) {
2128       if (Member->getType()->isVoidTy()) {
2129         MembersLeft.insert(Member);
2130         continue;
2131       }
2132
2133       if (auto *MemberInst = dyn_cast<Instruction>(Member)) {
2134         if (isInstructionTriviallyDead(MemberInst)) {
2135           // TODO: Don't mark loads of undefs.
2136           markInstructionForDeletion(MemberInst);
2137           continue;
2138         }
2139       }
2140       MembersLeft.insert(Member);
2141     }
2142     CC->Members.swap(MembersLeft);
2143   }
2144
2145   return AnythingReplaced;
2146 }