1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/GuardUtils.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/MemorySSA.h"
27 #include "llvm/Analysis/MemorySSAUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/Analysis/ValueTracking.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/PassManager.h"
43 #include "llvm/IR/PatternMatch.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Use.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
50 #include "llvm/Support/AtomicOrdering.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/DebugCounter.h"
54 #include "llvm/Support/RecyclingAllocator.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils/GuardUtils.h"
58 #include "llvm/Transforms/Utils/Local.h"
65 using namespace llvm::PatternMatch;
67 #define DEBUG_TYPE "early-cse"
69 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
70 STATISTIC(NumCSE, "Number of instructions CSE'd");
71 STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
72 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
73 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
74 STATISTIC(NumDSE, "Number of trivial dead stores removed");
76 DEBUG_COUNTER(CSECounter, "early-cse",
77 "Controls which instructions are removed");
79 static cl::opt<unsigned> EarlyCSEMssaOptCap(
80 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
81 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
82 "for faster compile. Caps the MemorySSA clobbering calls."));
84 static cl::opt<bool> EarlyCSEDebugHash(
85 "earlycse-debug-hash", cl::init(false), cl::Hidden,
86 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
87 "function is well-behaved w.r.t. its isEqual predicate"));
89 //===----------------------------------------------------------------------===//
91 //===----------------------------------------------------------------------===//
95 /// Struct representing the available values in the scoped hash table.
99 SimpleValue(Instruction *I) : Inst(I) {
100 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
103 bool isSentinel() const {
104 return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
105 Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
108 static bool canHandle(Instruction *Inst) {
109 // This can only handle non-void readnone functions.
110 if (CallInst *CI = dyn_cast<CallInst>(Inst))
111 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
112 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
113 isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
114 isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
115 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
116 isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
117 isa<InsertValueInst>(Inst);
121 } // end anonymous namespace
125 template <> struct DenseMapInfo<SimpleValue> {
126 static inline SimpleValue getEmptyKey() {
127 return DenseMapInfo<Instruction *>::getEmptyKey();
130 static inline SimpleValue getTombstoneKey() {
131 return DenseMapInfo<Instruction *>::getTombstoneKey();
134 static unsigned getHashValue(SimpleValue Val);
135 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
138 } // end namespace llvm
140 /// Match a 'select' including an optional 'not's of the condition.
141 static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
143 SelectPatternFlavor &Flavor) {
144 // Return false if V is not even a select.
145 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
148 // Look through a 'not' of the condition operand by swapping A/B.
150 if (match(Cond, m_Not(m_Value(CondNot)))) {
155 // Match canonical forms of abs/nabs/min/max. We are not using ValueTracking's
156 // more powerful matchSelectPattern() because it may rely on instruction flags
157 // such as "nsw". That would be incompatible with the current hashing
158 // mechanism that may remove flags to increase the likelihood of CSE.
160 // These are the canonical forms of abs(X) and nabs(X) created by instcombine:
161 // %N = sub i32 0, %X
162 // %C = icmp slt i32 %X, 0
163 // %ABS = select i1 %C, i32 %N, i32 %X
165 // %N = sub i32 0, %X
166 // %C = icmp slt i32 %X, 0
167 // %NABS = select i1 %C, i32 %X, i32 %N
168 Flavor = SPF_UNKNOWN;
169 CmpInst::Predicate Pred;
170 if (match(Cond, m_ICmp(Pred, m_Specific(B), m_ZeroInt())) &&
171 Pred == ICmpInst::ICMP_SLT && match(A, m_Neg(m_Specific(B)))) {
172 // ABS: B < 0 ? -B : B
176 if (match(Cond, m_ICmp(Pred, m_Specific(A), m_ZeroInt())) &&
177 Pred == ICmpInst::ICMP_SLT && match(B, m_Neg(m_Specific(A)))) {
178 // NABS: A < 0 ? A : -A
183 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
184 // Check for commuted variants of min/max by swapping predicate.
185 // If we do not match the standard or commuted patterns, this is not a
186 // recognized form of min/max, but it is still a select, so return true.
187 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
189 Pred = ICmpInst::getSwappedPredicate(Pred);
193 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
194 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
195 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
196 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
203 static unsigned getHashValueImpl(SimpleValue Val) {
204 Instruction *Inst = Val.Inst;
205 // Hash in all of the operands as pointers.
206 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
207 Value *LHS = BinOp->getOperand(0);
208 Value *RHS = BinOp->getOperand(1);
209 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
212 return hash_combine(BinOp->getOpcode(), LHS, RHS);
215 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
216 // Compares can be commuted by swapping the comparands and
217 // updating the predicate. Choose the form that has the
218 // comparands in sorted order, or in the case of a tie, the
219 // one with the lower predicate.
220 Value *LHS = CI->getOperand(0);
221 Value *RHS = CI->getOperand(1);
222 CmpInst::Predicate Pred = CI->getPredicate();
223 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
224 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
228 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
231 // Hash general selects to allow matching commuted true/false operands.
232 SelectPatternFlavor SPF;
234 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
235 // Hash min/max/abs (cmp + select) to allow for commuted operands.
236 // Min/max may also have non-canonical compare predicate (eg, the compare for
237 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
239 // TODO: We should also detect FP min/max.
240 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
241 SPF == SPF_UMIN || SPF == SPF_UMAX) {
244 return hash_combine(Inst->getOpcode(), SPF, A, B);
246 if (SPF == SPF_ABS || SPF == SPF_NABS) {
247 // ABS/NABS always puts the input in A and its negation in B.
248 return hash_combine(Inst->getOpcode(), SPF, A, B);
251 // Hash general selects to allow matching commuted true/false operands.
253 // If we do not have a compare as the condition, just hash in the condition.
254 CmpInst::Predicate Pred;
256 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
257 return hash_combine(Inst->getOpcode(), Cond, A, B);
259 // Similar to cmp normalization (above) - canonicalize the predicate value:
260 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
261 if (CmpInst::getInversePredicate(Pred) < Pred) {
262 Pred = CmpInst::getInversePredicate(Pred);
265 return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
268 if (CastInst *CI = dyn_cast<CastInst>(Inst))
269 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
271 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
272 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
273 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
275 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
276 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
278 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
280 assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
281 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
282 isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst)) &&
283 "Invalid/unknown instruction");
285 // Mix in the opcode.
288 hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
291 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
293 // If -earlycse-debug-hash was specified, return a constant -- this
294 // will force all hashing to collide, so we'll exhaustively search
295 // the table for a match, and the assertion in isEqual will fire if
296 // there's a bug causing equal keys to hash differently.
297 if (EarlyCSEDebugHash)
300 return getHashValueImpl(Val);
303 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
304 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
306 if (LHS.isSentinel() || RHS.isSentinel())
309 if (LHSI->getOpcode() != RHSI->getOpcode())
311 if (LHSI->isIdenticalToWhenDefined(RHSI))
314 // If we're not strictly identical, we still might be a commutable instruction
315 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
316 if (!LHSBinOp->isCommutative())
319 assert(isa<BinaryOperator>(RHSI) &&
320 "same opcode, but different instruction type?");
321 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
324 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
325 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
327 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
328 assert(isa<CmpInst>(RHSI) &&
329 "same opcode, but different instruction type?");
330 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
332 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
333 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
334 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
337 // Min/max/abs can occur with commuted operands, non-canonical predicates,
338 // and/or non-canonical operands.
339 // Selects can be non-trivially equivalent via inverted conditions and swaps.
340 SelectPatternFlavor LSPF, RSPF;
341 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
342 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
343 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
345 // TODO: We should also detect FP min/max.
346 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
347 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
348 return ((LHSA == RHSA && LHSB == RHSB) ||
349 (LHSA == RHSB && LHSB == RHSA));
351 if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
352 // Abs results are placed in a defined order by matchSelectPattern.
353 return LHSA == RHSA && LHSB == RHSB;
356 // select Cond, A, B <--> select not(Cond), B, A
357 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
361 // If the true/false operands are swapped and the conditions are compares
362 // with inverted predicates, the selects are equal:
363 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
365 // This also handles patterns with a double-negation in the sense of not +
366 // inverse, because we looked through a 'not' in the matching function and
368 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
370 // This intentionally does NOT handle patterns with a double-negation in
371 // the sense of not + not, because doing so could result in values
373 // as equal that hash differently in the min/max/abs cases like:
374 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
375 // ^ hashes as min ^ would not hash as min
376 // In the context of the EarlyCSE pass, however, such cases never reach
377 // this code, as we simplify the double-negation before hashing the second
378 // select (and so still succeed at CSEing them).
379 if (LHSA == RHSB && LHSB == RHSA) {
380 CmpInst::Predicate PredL, PredR;
382 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
383 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
384 CmpInst::getInversePredicate(PredL) == PredR)
392 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
393 // These comparisons are nontrivial, so assert that equality implies
394 // hash equality (DenseMap demands this as an invariant).
395 bool Result = isEqualImpl(LHS, RHS);
396 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
397 getHashValueImpl(LHS) == getHashValueImpl(RHS));
401 //===----------------------------------------------------------------------===//
403 //===----------------------------------------------------------------------===//
407 /// Struct representing the available call values in the scoped hash
412 CallValue(Instruction *I) : Inst(I) {
413 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
416 bool isSentinel() const {
417 return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
418 Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
421 static bool canHandle(Instruction *Inst) {
422 // Don't value number anything that returns void.
423 if (Inst->getType()->isVoidTy())
426 CallInst *CI = dyn_cast<CallInst>(Inst);
427 if (!CI || !CI->onlyReadsMemory())
433 } // end anonymous namespace
437 template <> struct DenseMapInfo<CallValue> {
438 static inline CallValue getEmptyKey() {
439 return DenseMapInfo<Instruction *>::getEmptyKey();
442 static inline CallValue getTombstoneKey() {
443 return DenseMapInfo<Instruction *>::getTombstoneKey();
446 static unsigned getHashValue(CallValue Val);
447 static bool isEqual(CallValue LHS, CallValue RHS);
450 } // end namespace llvm
452 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
453 Instruction *Inst = Val.Inst;
454 // Hash all of the operands as pointers and mix in the opcode.
457 hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
460 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
461 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
462 if (LHS.isSentinel() || RHS.isSentinel())
464 return LHSI->isIdenticalTo(RHSI);
467 //===----------------------------------------------------------------------===//
468 // EarlyCSE implementation
469 //===----------------------------------------------------------------------===//
473 /// A simple and fast domtree-based CSE pass.
475 /// This pass does a simple depth-first walk over the dominator tree,
476 /// eliminating trivially redundant instructions and using instsimplify to
477 /// canonicalize things as it goes. It is intended to be fast and catch obvious
478 /// cases so that instcombine and other passes are more effective. It is
479 /// expected that a later pass of GVN will catch the interesting/hard cases.
482 const TargetLibraryInfo &TLI;
483 const TargetTransformInfo &TTI;
486 const SimplifyQuery SQ;
488 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
491 RecyclingAllocator<BumpPtrAllocator,
492 ScopedHashTableVal<SimpleValue, Value *>>;
494 ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
497 /// A scoped hash table of the current values of all of our simple
498 /// scalar expressions.
500 /// As we walk down the domtree, we look to see if instructions are in this:
501 /// if so, we replace them with what we find, otherwise we insert them so
502 /// that dominated values can succeed in their lookup.
503 ScopedHTType AvailableValues;
505 /// A scoped hash table of the current values of previously encountered
506 /// memory locations.
508 /// This allows us to get efficient access to dominating loads or stores when
509 /// we have a fully redundant load. In addition to the most recent load, we
510 /// keep track of a generation count of the read, which is compared against
511 /// the current generation count. The current generation count is incremented
512 /// after every possibly writing memory operation, which ensures that we only
513 /// CSE loads with other loads that have no intervening store. Ordering
514 /// events (such as fences or atomic instructions) increment the generation
515 /// count as well; essentially, we model these as writes to all possible
516 /// locations. Note that atomic and/or volatile loads and stores can be
517 /// present the table; it is the responsibility of the consumer to inspect
518 /// the atomicity/volatility if needed.
520 Instruction *DefInst = nullptr;
521 unsigned Generation = 0;
523 bool IsAtomic = false;
525 LoadValue() = default;
526 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
528 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
529 IsAtomic(IsAtomic) {}
532 using LoadMapAllocator =
533 RecyclingAllocator<BumpPtrAllocator,
534 ScopedHashTableVal<Value *, LoadValue>>;
536 ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
539 LoadHTType AvailableLoads;
541 // A scoped hash table mapping memory locations (represented as typed
542 // addresses) to generation numbers at which that memory location became
543 // (henceforth indefinitely) invariant.
544 using InvariantMapAllocator =
545 RecyclingAllocator<BumpPtrAllocator,
546 ScopedHashTableVal<MemoryLocation, unsigned>>;
547 using InvariantHTType =
548 ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
549 InvariantMapAllocator>;
550 InvariantHTType AvailableInvariants;
552 /// A scoped hash table of the current values of read-only call
555 /// It uses the same generation count as loads.
557 ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
558 CallHTType AvailableCalls;
560 /// This is the current generation of the memory value.
561 unsigned CurrentGeneration = 0;
563 /// Set up the EarlyCSE runner for a particular function.
564 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
565 const TargetTransformInfo &TTI, DominatorTree &DT,
566 AssumptionCache &AC, MemorySSA *MSSA)
567 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
568 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
573 unsigned ClobberCounter = 0;
574 // Almost a POD, but needs to call the constructors for the scoped hash
575 // tables so that a new scope gets pushed on. These are RAII so that the
576 // scope gets popped when the NodeScope is destroyed.
579 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
580 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
581 : Scope(AvailableValues), LoadScope(AvailableLoads),
582 InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
583 NodeScope(const NodeScope &) = delete;
584 NodeScope &operator=(const NodeScope &) = delete;
587 ScopedHTType::ScopeTy Scope;
588 LoadHTType::ScopeTy LoadScope;
589 InvariantHTType::ScopeTy InvariantScope;
590 CallHTType::ScopeTy CallScope;
593 // Contains all the needed information to create a stack for doing a depth
594 // first traversal of the tree. This includes scopes for values, loads, and
595 // calls as well as the generation. There is a child iterator so that the
596 // children do not need to be store separately.
599 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
600 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
601 unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
602 DomTreeNode::iterator end)
603 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
605 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
608 StackNode(const StackNode &) = delete;
609 StackNode &operator=(const StackNode &) = delete;
612 unsigned currentGeneration() { return CurrentGeneration; }
613 unsigned childGeneration() { return ChildGeneration; }
614 void childGeneration(unsigned generation) { ChildGeneration = generation; }
615 DomTreeNode *node() { return Node; }
616 DomTreeNode::iterator childIter() { return ChildIter; }
618 DomTreeNode *nextChild() {
619 DomTreeNode *child = *ChildIter;
624 DomTreeNode::iterator end() { return EndIter; }
625 bool isProcessed() { return Processed; }
626 void process() { Processed = true; }
629 unsigned CurrentGeneration;
630 unsigned ChildGeneration;
632 DomTreeNode::iterator ChildIter;
633 DomTreeNode::iterator EndIter;
635 bool Processed = false;
638 /// Wrapper class to handle memory instructions, including loads,
639 /// stores and intrinsic loads and stores defined by the target.
640 class ParseMemoryInst {
642 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
644 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
645 if (TTI.getTgtMemIntrinsic(II, Info))
646 IsTargetMemInst = true;
649 bool isLoad() const {
650 if (IsTargetMemInst) return Info.ReadMem;
651 return isa<LoadInst>(Inst);
654 bool isStore() const {
655 if (IsTargetMemInst) return Info.WriteMem;
656 return isa<StoreInst>(Inst);
659 bool isAtomic() const {
661 return Info.Ordering != AtomicOrdering::NotAtomic;
662 return Inst->isAtomic();
665 bool isUnordered() const {
667 return Info.isUnordered();
669 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
670 return LI->isUnordered();
671 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
672 return SI->isUnordered();
674 // Conservative answer
675 return !Inst->isAtomic();
678 bool isVolatile() const {
680 return Info.IsVolatile;
682 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
683 return LI->isVolatile();
684 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
685 return SI->isVolatile();
687 // Conservative answer
691 bool isInvariantLoad() const {
692 if (auto *LI = dyn_cast<LoadInst>(Inst))
693 return LI->hasMetadata(LLVMContext::MD_invariant_load);
697 bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
698 return (getPointerOperand() == Inst.getPointerOperand() &&
699 getMatchingId() == Inst.getMatchingId());
702 bool isValid() const { return getPointerOperand() != nullptr; }
704 // For regular (non-intrinsic) loads/stores, this is set to -1. For
705 // intrinsic loads/stores, the id is retrieved from the corresponding
706 // field in the MemIntrinsicInfo structure. That field contains
707 // non-negative values only.
708 int getMatchingId() const {
709 if (IsTargetMemInst) return Info.MatchingId;
713 Value *getPointerOperand() const {
714 if (IsTargetMemInst) return Info.PtrVal;
715 return getLoadStorePointerOperand(Inst);
718 bool mayReadFromMemory() const {
719 if (IsTargetMemInst) return Info.ReadMem;
720 return Inst->mayReadFromMemory();
723 bool mayWriteToMemory() const {
724 if (IsTargetMemInst) return Info.WriteMem;
725 return Inst->mayWriteToMemory();
729 bool IsTargetMemInst = false;
730 MemIntrinsicInfo Info;
734 bool processNode(DomTreeNode *Node);
736 bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
737 const BasicBlock *BB, const BasicBlock *Pred);
739 Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
740 if (auto *LI = dyn_cast<LoadInst>(Inst))
742 if (auto *SI = dyn_cast<StoreInst>(Inst))
743 return SI->getValueOperand();
744 assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
745 return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
749 /// Return true if the instruction is known to only operate on memory
750 /// provably invariant in the given "generation".
751 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
753 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
754 Instruction *EarlierInst, Instruction *LaterInst);
756 void removeMSSA(Instruction *Inst) {
760 MSSA->verifyMemorySSA();
761 // Removing a store here can leave MemorySSA in an unoptimized state by
762 // creating MemoryPhis that have identical arguments and by creating
763 // MemoryUses whose defining access is not an actual clobber. The phi case
764 // is handled by MemorySSA when passing OptimizePhis = true to
765 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
766 // by MemorySSA's getClobberingMemoryAccess.
767 MSSAUpdater->removeMemoryAccess(Inst, true);
771 } // end anonymous namespace
773 /// Determine if the memory referenced by LaterInst is from the same heap
774 /// version as EarlierInst.
775 /// This is currently called in two scenarios:
787 /// in both cases we want to verify that there are no possible writes to the
788 /// memory referenced by p between the earlier and later instruction.
789 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
790 unsigned LaterGeneration,
791 Instruction *EarlierInst,
792 Instruction *LaterInst) {
793 // Check the simple memory generation tracking first.
794 if (EarlierGeneration == LaterGeneration)
800 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
801 // read/write memory, then we can safely return true here.
802 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
803 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
804 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
805 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
806 // with the default optimization pipeline.
807 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
810 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
814 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
815 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
816 // EarlierInst and LaterInst and neither can any other write that potentially
817 // clobbers LaterInst.
818 MemoryAccess *LaterDef;
819 if (ClobberCounter < EarlyCSEMssaOptCap) {
820 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
823 LaterDef = LaterMA->getDefiningAccess();
825 return MSSA->dominates(LaterDef, EarlierMA);
828 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
829 // A location loaded from with an invariant_load is assumed to *never* change
830 // within the visible scope of the compilation.
831 if (auto *LI = dyn_cast<LoadInst>(I))
832 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
835 auto MemLocOpt = MemoryLocation::getOrNone(I);
837 // "target" intrinsic forms of loads aren't currently known to
838 // MemoryLocation::get. TODO
840 MemoryLocation MemLoc = *MemLocOpt;
841 if (!AvailableInvariants.count(MemLoc))
844 // Is the generation at which this became invariant older than the
846 return AvailableInvariants.lookup(MemLoc) <= GenAt;
849 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
850 const BranchInst *BI, const BasicBlock *BB,
851 const BasicBlock *Pred) {
852 assert(BI->isConditional() && "Should be a conditional branch!");
853 assert(BI->getCondition() == CondInst && "Wrong condition?");
854 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
855 auto *TorF = (BI->getSuccessor(0) == BB)
856 ? ConstantInt::getTrue(BB->getContext())
857 : ConstantInt::getFalse(BB->getContext());
858 auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
859 if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
860 return BOp->getOpcode() == Opcode;
863 // If the condition is AND operation, we can propagate its operands into the
864 // true branch. If it is OR operation, we can propagate them into the false
866 unsigned PropagateOpcode =
867 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
869 bool MadeChanges = false;
870 SmallVector<Instruction *, 4> WorkList;
871 SmallPtrSet<Instruction *, 4> Visited;
872 WorkList.push_back(CondInst);
873 while (!WorkList.empty()) {
874 Instruction *Curr = WorkList.pop_back_val();
876 AvailableValues.insert(Curr, TorF);
877 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
878 << Curr->getName() << "' as " << *TorF << " in "
879 << BB->getName() << "\n");
880 if (!DebugCounter::shouldExecute(CSECounter)) {
881 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
883 // Replace all dominated uses with the known value.
884 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
885 BasicBlockEdge(Pred, BB))) {
891 if (MatchBinOp(Curr, PropagateOpcode))
892 for (auto &Op : cast<BinaryOperator>(Curr)->operands())
893 if (Instruction *OPI = dyn_cast<Instruction>(Op))
894 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
895 WorkList.push_back(OPI);
901 bool EarlyCSE::processNode(DomTreeNode *Node) {
902 bool Changed = false;
903 BasicBlock *BB = Node->getBlock();
905 // If this block has a single predecessor, then the predecessor is the parent
906 // of the domtree node and all of the live out memory values are still current
907 // in this block. If this block has multiple predecessors, then they could
908 // have invalidated the live-out memory values of our parent value. For now,
909 // just be conservative and invalidate memory if this block has multiple
911 if (!BB->getSinglePredecessor())
914 // If this node has a single predecessor which ends in a conditional branch,
915 // we can infer the value of the branch condition given that we took this
916 // path. We need the single predecessor to ensure there's not another path
917 // which reaches this block where the condition might hold a different
918 // value. Since we're adding this to the scoped hash table (like any other
919 // def), it will have been popped if we encounter a future merge block.
920 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
921 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
922 if (BI && BI->isConditional()) {
923 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
924 if (CondInst && SimpleValue::canHandle(CondInst))
925 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
929 /// LastStore - Keep track of the last non-volatile store that we saw... for
930 /// as long as there in no instruction that reads memory. If we see a store
931 /// to the same location, we delete the dead store. This zaps trivial dead
932 /// stores which can occur in bitfield code among other things.
933 Instruction *LastStore = nullptr;
935 // See if any instructions in the block can be eliminated. If so, do it. If
936 // not, add them to AvailableValues.
937 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
938 Instruction *Inst = &*I++;
940 // Dead instructions should just be removed.
941 if (isInstructionTriviallyDead(Inst, &TLI)) {
942 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
943 if (!DebugCounter::shouldExecute(CSECounter)) {
944 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
948 salvageDebugInfoOrMarkUndef(*Inst);
950 Inst->eraseFromParent();
956 // Skip assume intrinsics, they don't really have side effects (although
957 // they're marked as such to ensure preservation of control dependencies),
958 // and this pass will not bother with its removal. However, we should mark
959 // its condition as true for all dominated blocks.
960 if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
962 dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
963 if (CondI && SimpleValue::canHandle(CondI)) {
964 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
966 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
968 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
972 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
973 if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
974 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
978 // We can skip all invariant.start intrinsics since they only read memory,
979 // and we can forward values across it. For invariant starts without
980 // invariant ends, we can use the fact that the invariantness never ends to
981 // start a scope in the current generaton which is true for all future
982 // generations. Also, we dont need to consume the last store since the
983 // semantics of invariant.start allow us to perform DSE of the last
984 // store, if there was a store following invariant.start. Consider:
987 // invariant.start(p)
989 // We can DSE the store to 30, since the store 40 to invariant location p
990 // causes undefined behaviour.
991 if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
992 // If there are any uses, the scope might end.
993 if (!Inst->use_empty())
995 auto *CI = cast<CallInst>(Inst);
996 MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
997 // Don't start a scope if we already have a better one pushed
998 if (!AvailableInvariants.count(MemLoc))
999 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1003 if (isGuard(Inst)) {
1005 dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
1006 if (SimpleValue::canHandle(CondI)) {
1007 // Do we already know the actual value of this condition?
1008 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1009 // Is the condition known to be true?
1010 if (isa<ConstantInt>(KnownCond) &&
1011 cast<ConstantInt>(KnownCond)->isOne()) {
1013 << "EarlyCSE removing guard: " << *Inst << '\n');
1015 Inst->eraseFromParent();
1019 // Use the known value if it wasn't true.
1020 cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
1022 // The condition we're on guarding here is true for all dominated
1024 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1028 // Guard intrinsics read all memory, but don't write any memory.
1029 // Accordingly, don't update the generation but consume the last store (to
1030 // avoid an incorrect DSE).
1031 LastStore = nullptr;
1035 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1036 // its simpler value.
1037 if (Value *V = SimplifyInstruction(Inst, SQ)) {
1038 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V
1040 if (!DebugCounter::shouldExecute(CSECounter)) {
1041 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1043 bool Killed = false;
1044 if (!Inst->use_empty()) {
1045 Inst->replaceAllUsesWith(V);
1048 if (isInstructionTriviallyDead(Inst, &TLI)) {
1050 Inst->eraseFromParent();
1061 // If this is a simple instruction that we can value number, process it.
1062 if (SimpleValue::canHandle(Inst)) {
1063 // See if the instruction has an available value. If so, use it.
1064 if (Value *V = AvailableValues.lookup(Inst)) {
1065 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V
1067 if (!DebugCounter::shouldExecute(CSECounter)) {
1068 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1071 if (auto *I = dyn_cast<Instruction>(V))
1072 I->andIRFlags(Inst);
1073 Inst->replaceAllUsesWith(V);
1075 Inst->eraseFromParent();
1081 // Otherwise, just remember that this value is available.
1082 AvailableValues.insert(Inst, Inst);
1086 ParseMemoryInst MemInst(Inst, TTI);
1087 // If this is a non-volatile load, process it.
1088 if (MemInst.isValid() && MemInst.isLoad()) {
1089 // (conservatively) we can't peak past the ordering implied by this
1090 // operation, but we can add this load to our set of available values
1091 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1092 LastStore = nullptr;
1093 ++CurrentGeneration;
1096 if (MemInst.isInvariantLoad()) {
1097 // If we pass an invariant load, we know that memory location is
1098 // indefinitely constant from the moment of first dereferenceability.
1099 // We conservatively treat the invariant_load as that moment. If we
1100 // pass a invariant load after already establishing a scope, don't
1101 // restart it since we want to preserve the earliest point seen.
1102 auto MemLoc = MemoryLocation::get(Inst);
1103 if (!AvailableInvariants.count(MemLoc))
1104 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1107 // If we have an available version of this load, and if it is the right
1108 // generation or the load is known to be from an invariant location,
1109 // replace this instruction.
1111 // If either the dominating load or the current load are invariant, then
1112 // we can assume the current load loads the same value as the dominating
1114 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1115 if (InVal.DefInst != nullptr &&
1116 InVal.MatchingId == MemInst.getMatchingId() &&
1117 // We don't yet handle removing loads with ordering of any kind.
1118 !MemInst.isVolatile() && MemInst.isUnordered() &&
1119 // We can't replace an atomic load with one which isn't also atomic.
1120 InVal.IsAtomic >= MemInst.isAtomic() &&
1121 (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1122 isSameMemGeneration(InVal.Generation, CurrentGeneration,
1123 InVal.DefInst, Inst))) {
1124 Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
1125 if (Op != nullptr) {
1126 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
1127 << " to: " << *InVal.DefInst << '\n');
1128 if (!DebugCounter::shouldExecute(CSECounter)) {
1129 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1132 if (!Inst->use_empty())
1133 Inst->replaceAllUsesWith(Op);
1135 Inst->eraseFromParent();
1142 // Otherwise, remember that we have this instruction.
1143 AvailableLoads.insert(
1144 MemInst.getPointerOperand(),
1145 LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1146 MemInst.isAtomic()));
1147 LastStore = nullptr;
1151 // If this instruction may read from memory or throw (and potentially read
1152 // from memory in the exception handler), forget LastStore. Load/store
1153 // intrinsics will indicate both a read and a write to memory. The target
1154 // may override this (e.g. so that a store intrinsic does not read from
1155 // memory, and thus will be treated the same as a regular store for
1156 // commoning purposes).
1157 if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
1158 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1159 LastStore = nullptr;
1161 // If this is a read-only call, process it.
1162 if (CallValue::canHandle(Inst)) {
1163 // If we have an available version of this call, and if it is the right
1164 // generation, replace this instruction.
1165 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
1166 if (InVal.first != nullptr &&
1167 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1169 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1170 << " to: " << *InVal.first << '\n');
1171 if (!DebugCounter::shouldExecute(CSECounter)) {
1172 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1175 if (!Inst->use_empty())
1176 Inst->replaceAllUsesWith(InVal.first);
1178 Inst->eraseFromParent();
1184 // Otherwise, remember that we have this instruction.
1185 AvailableCalls.insert(
1186 Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
1190 // A release fence requires that all stores complete before it, but does
1191 // not prevent the reordering of following loads 'before' the fence. As a
1192 // result, we don't need to consider it as writing to memory and don't need
1193 // to advance the generation. We do need to prevent DSE across the fence,
1194 // but that's handled above.
1195 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
1196 if (FI->getOrdering() == AtomicOrdering::Release) {
1197 assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
1201 // write back DSE - If we write back the same value we just loaded from
1202 // the same location and haven't passed any intervening writes or ordering
1203 // operations, we can remove the write. The primary benefit is in allowing
1204 // the available load table to remain valid and value forward past where
1205 // the store originally was.
1206 if (MemInst.isValid() && MemInst.isStore()) {
1207 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1208 if (InVal.DefInst &&
1209 InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
1210 InVal.MatchingId == MemInst.getMatchingId() &&
1211 // We don't yet handle removing stores with ordering of any kind.
1212 !MemInst.isVolatile() && MemInst.isUnordered() &&
1213 (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1214 isSameMemGeneration(InVal.Generation, CurrentGeneration,
1215 InVal.DefInst, Inst))) {
1216 // It is okay to have a LastStore to a different pointer here if MemorySSA
1217 // tells us that the load and store are from the same memory generation.
1218 // In that case, LastStore should keep its present value since we're
1219 // removing the current store.
1220 assert((!LastStore ||
1221 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1222 MemInst.getPointerOperand() ||
1224 "can't have an intervening store if not using MemorySSA!");
1225 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
1226 if (!DebugCounter::shouldExecute(CSECounter)) {
1227 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1231 Inst->eraseFromParent();
1234 // We can avoid incrementing the generation count since we were able
1235 // to eliminate this store.
1240 // Okay, this isn't something we can CSE at all. Check to see if it is
1241 // something that could modify memory. If so, our available memory values
1242 // cannot be used so bump the generation count.
1243 if (Inst->mayWriteToMemory()) {
1244 ++CurrentGeneration;
1246 if (MemInst.isValid() && MemInst.isStore()) {
1247 // We do a trivial form of DSE if there are two stores to the same
1248 // location with no intervening loads. Delete the earlier store.
1249 // At the moment, we don't remove ordered stores, but do remove
1250 // unordered atomic stores. There's no special requirement (for
1251 // unordered atomics) about removing atomic stores only in favor of
1252 // other atomic stores since we were going to execute the non-atomic
1253 // one anyway and the atomic one might never have become visible.
1255 ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1256 assert(LastStoreMemInst.isUnordered() &&
1257 !LastStoreMemInst.isVolatile() &&
1258 "Violated invariant");
1259 if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1260 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1261 << " due to: " << *Inst << '\n');
1262 if (!DebugCounter::shouldExecute(CSECounter)) {
1263 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1265 removeMSSA(LastStore);
1266 LastStore->eraseFromParent();
1269 LastStore = nullptr;
1272 // fallthrough - we can exploit information about this store
1275 // Okay, we just invalidated anything we knew about loaded values. Try
1276 // to salvage *something* by remembering that the stored value is a live
1277 // version of the pointer. It is safe to forward from volatile stores
1278 // to non-volatile loads, so we don't have to check for volatility of
1280 AvailableLoads.insert(
1281 MemInst.getPointerOperand(),
1282 LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1283 MemInst.isAtomic()));
1285 // Remember that this was the last unordered store we saw for DSE. We
1286 // don't yet handle DSE on ordered or volatile stores since we don't
1287 // have a good way to model the ordering requirement for following
1288 // passes once the store is removed. We could insert a fence, but
1289 // since fences are slightly stronger than stores in their ordering,
1290 // it's not clear this is a profitable transform. Another option would
1291 // be to merge the ordering with that of the post dominating store.
1292 if (MemInst.isUnordered() && !MemInst.isVolatile())
1295 LastStore = nullptr;
1303 bool EarlyCSE::run() {
1304 // Note, deque is being used here because there is significant performance
1305 // gains over vector when the container becomes very large due to the
1306 // specific access patterns. For more information see the mailing list
1307 // discussion on this:
1308 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1309 std::deque<StackNode *> nodesToProcess;
1311 bool Changed = false;
1313 // Process the root node.
1314 nodesToProcess.push_back(new StackNode(
1315 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1316 CurrentGeneration, DT.getRootNode(),
1317 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1319 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1321 // Process the stack.
1322 while (!nodesToProcess.empty()) {
1323 // Grab the first item off the stack. Set the current generation, remove
1324 // the node from the stack, and process it.
1325 StackNode *NodeToProcess = nodesToProcess.back();
1327 // Initialize class members.
1328 CurrentGeneration = NodeToProcess->currentGeneration();
1330 // Check if the node needs to be processed.
1331 if (!NodeToProcess->isProcessed()) {
1332 // Process the node.
1333 Changed |= processNode(NodeToProcess->node());
1334 NodeToProcess->childGeneration(CurrentGeneration);
1335 NodeToProcess->process();
1336 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1337 // Push the next child onto the stack.
1338 DomTreeNode *child = NodeToProcess->nextChild();
1339 nodesToProcess.push_back(
1340 new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1341 AvailableCalls, NodeToProcess->childGeneration(),
1342 child, child->begin(), child->end()));
1344 // It has been processed, and there are no more children to process,
1345 // so delete it and pop it off the stack.
1346 delete NodeToProcess;
1347 nodesToProcess.pop_back();
1349 } // while (!nodes...)
1354 PreservedAnalyses EarlyCSEPass::run(Function &F,
1355 FunctionAnalysisManager &AM) {
1356 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1357 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1358 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1359 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1361 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1363 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1366 return PreservedAnalyses::all();
1368 PreservedAnalyses PA;
1369 PA.preserveSet<CFGAnalyses>();
1370 PA.preserve<GlobalsAA>();
1372 PA.preserve<MemorySSAAnalysis>();
1378 /// A simple and fast domtree-based CSE pass.
1380 /// This pass does a simple depth-first walk over the dominator tree,
1381 /// eliminating trivially redundant instructions and using instsimplify to
1382 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1383 /// cases so that instcombine and other passes are more effective. It is
1384 /// expected that a later pass of GVN will catch the interesting/hard cases.
1385 template<bool UseMemorySSA>
1386 class EarlyCSELegacyCommonPass : public FunctionPass {
1390 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1392 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1394 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1397 bool runOnFunction(Function &F) override {
1398 if (skipFunction(F))
1401 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1402 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1403 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1404 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1406 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1408 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1413 void getAnalysisUsage(AnalysisUsage &AU) const override {
1414 AU.addRequired<AssumptionCacheTracker>();
1415 AU.addRequired<DominatorTreeWrapperPass>();
1416 AU.addRequired<TargetLibraryInfoWrapperPass>();
1417 AU.addRequired<TargetTransformInfoWrapperPass>();
1419 AU.addRequired<MemorySSAWrapperPass>();
1420 AU.addPreserved<MemorySSAWrapperPass>();
1422 AU.addPreserved<GlobalsAAWrapperPass>();
1423 AU.addPreserved<AAResultsWrapperPass>();
1424 AU.setPreservesCFG();
1428 } // end anonymous namespace
1430 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1433 char EarlyCSELegacyPass::ID = 0;
1435 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1437 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1438 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1439 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1440 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1441 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1443 using EarlyCSEMemSSALegacyPass =
1444 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1447 char EarlyCSEMemSSALegacyPass::ID = 0;
1449 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1451 return new EarlyCSEMemSSALegacyPass();
1453 return new EarlyCSELegacyPass();
1456 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1457 "Early CSE w/ MemorySSA", false, false)
1458 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1459 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1460 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1461 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1462 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1463 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1464 "Early CSE w/ MemorySSA", false, false)