1 //===- GenericDomTreeConstruction.h - Dominator Calculation ------*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Generic dominator tree construction - This file provides routines to
12 /// construct immediate dominator information for a flow-graph based on the
13 /// Semi-NCA algorithm described in this dissertation:
15 /// Linear-Time Algorithms for Dominators and Related Problems
16 /// Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
17 /// ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
19 /// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
20 /// out that the theoretically slower O(n*log(n)) implementation is actually
21 /// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
23 //===----------------------------------------------------------------------===//
25 #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
26 #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
28 #include "llvm/ADT/DepthFirstIterator.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/Support/GenericDomTree.h"
33 namespace DomTreeBuilder {
35 // Information record used by Semi-NCA during tree construction.
36 template <typename NodeT>
38 using NodePtr = NodeT *;
39 using DomTreeT = DominatorTreeBase<NodeT>;
40 using TreeNodePtr = DomTreeNodeBase<NodeT> *;
46 NodePtr Label = nullptr;
47 NodePtr IDom = nullptr;
50 std::vector<NodePtr> NumToNode;
51 DenseMap<NodePtr, InfoRec> NodeToInfo;
58 NodePtr getIDom(NodePtr BB) const {
59 auto InfoIt = NodeToInfo.find(BB);
60 if (InfoIt == NodeToInfo.end()) return nullptr;
62 return InfoIt->second.IDom;
65 TreeNodePtr getNodeForBlock(NodePtr BB, DomTreeT &DT) {
66 if (TreeNodePtr Node = DT.getNode(BB)) return Node;
68 // Haven't calculated this node yet? Get or calculate the node for the
69 // immediate dominator.
70 NodePtr IDom = getIDom(BB);
72 assert(IDom || DT.DomTreeNodes[nullptr]);
73 TreeNodePtr IDomNode = getNodeForBlock(IDom, DT);
75 // Add a new tree node for this NodeT, and link it as a child of
77 return (DT.DomTreeNodes[BB] = IDomNode->addChild(
78 llvm::make_unique<DomTreeNodeBase<NodeT>>(BB, IDomNode)))
82 // External storage for depth first iterator that reuses the info lookup map
83 // SemiNCAInfo already has. We don't have a set, but a map instead, so we are
84 // converting the one argument insert calls.
85 struct df_iterator_dom_storage {
87 using BaseSet = decltype(NodeToInfo);
88 df_iterator_dom_storage(BaseSet &Storage) : Storage(Storage) {}
90 using iterator = typename BaseSet::iterator;
91 std::pair<iterator, bool> insert(NodePtr N) {
92 return Storage.insert({N, InfoRec()});
94 void completed(NodePtr) {}
100 df_iterator_dom_storage getStorage() { return {NodeToInfo}; }
102 unsigned runReverseDFS(NodePtr V, unsigned N) {
103 auto DFStorage = getStorage();
105 bool IsChildOfArtificialExit = (N != 0);
106 for (auto I = idf_ext_begin(V, DFStorage), E = idf_ext_end(V, DFStorage);
109 auto &BBInfo = NodeToInfo[BB];
110 BBInfo.DFSNum = BBInfo.Semi = ++N;
112 // Set the parent to the top of the visited stack. The stack includes us,
113 // and is 1 based, so we subtract to account for both of these.
114 if (I.getPathLength() > 1)
115 BBInfo.Parent = NodeToInfo[I.getPath(I.getPathLength() - 2)].DFSNum;
116 NumToNode.push_back(BB); // NumToNode[n] = V;
118 if (IsChildOfArtificialExit)
121 IsChildOfArtificialExit = false;
126 unsigned runForwardDFS(NodePtr V, unsigned N) {
127 auto DFStorage = getStorage();
129 for (auto I = df_ext_begin(V, DFStorage), E = df_ext_end(V, DFStorage);
132 auto &BBInfo = NodeToInfo[BB];
133 BBInfo.DFSNum = BBInfo.Semi = ++N;
135 // Set the parent to the top of the visited stack. The stack includes us,
136 // and is 1 based, so we subtract to account for both of these.
137 if (I.getPathLength() > 1)
138 BBInfo.Parent = NodeToInfo[I.getPath(I.getPathLength() - 2)].DFSNum;
139 NumToNode.push_back(BB); // NumToNode[n] = V;
144 NodePtr eval(NodePtr VIn, unsigned LastLinked) {
145 auto &VInInfo = NodeToInfo[VIn];
146 if (VInInfo.DFSNum < LastLinked)
149 SmallVector<NodePtr, 32> Work;
150 SmallPtrSet<NodePtr, 32> Visited;
152 if (VInInfo.Parent >= LastLinked)
155 while (!Work.empty()) {
156 NodePtr V = Work.back();
157 auto &VInfo = NodeToInfo[V];
158 NodePtr VAncestor = NumToNode[VInfo.Parent];
160 // Process Ancestor first
161 if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
162 Work.push_back(VAncestor);
167 // Update VInfo based on Ancestor info
168 if (VInfo.Parent < LastLinked)
171 auto &VAInfo = NodeToInfo[VAncestor];
172 NodePtr VAncestorLabel = VAInfo.Label;
173 NodePtr VLabel = VInfo.Label;
174 if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi)
175 VInfo.Label = VAncestorLabel;
176 VInfo.Parent = VAInfo.Parent;
179 return VInInfo.Label;
182 template <typename NodeType>
183 void runSemiNCA(DomTreeT &DT, unsigned NumBlocks) {
185 NumToNode.push_back(nullptr);
187 bool MultipleRoots = (DT.Roots.size() > 1);
189 auto &BBInfo = NodeToInfo[nullptr];
190 BBInfo.DFSNum = BBInfo.Semi = ++N;
191 BBInfo.Label = nullptr;
193 NumToNode.push_back(nullptr); // NumToNode[n] = V;
196 // Step #1: Number blocks in depth-first order and initialize variables used
197 // in later stages of the algorithm.
198 if (DT.isPostDominator()){
199 for (unsigned i = 0, e = static_cast<unsigned>(DT.Roots.size());
201 N = runReverseDFS(DT.Roots[i], N);
203 N = runForwardDFS(DT.Roots[0], N);
206 // It might be that some blocks did not get a DFS number (e.g., blocks of
207 // infinite loops). In these cases an artificial exit node is required.
208 MultipleRoots |= (DT.isPostDominator() && N != NumBlocks);
210 // Initialize IDoms to spanning tree parents.
211 for (unsigned i = 1; i <= N; ++i) {
212 const NodePtr V = NumToNode[i];
213 auto &VInfo = NodeToInfo[V];
214 VInfo.IDom = NumToNode[VInfo.Parent];
217 // Step #2: Calculate the semidominators of all vertices.
218 for (unsigned i = N; i >= 2; --i) {
219 NodePtr W = NumToNode[i];
220 auto &WInfo = NodeToInfo[W];
222 // Initialize the semi dominator to point to the parent node.
223 WInfo.Semi = WInfo.Parent;
224 for (const auto &N : inverse_children<NodeType>(W))
225 if (NodeToInfo.count(N)) { // Only if this predecessor is reachable!
226 unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
227 if (SemiU < WInfo.Semi)
232 // Step #3: Explicitly define the immediate dominator of each vertex.
233 // IDom[i] = NCA(SDom[i], SpanningTreeParent(i)).
234 // Note that the parents were stored in IDoms and later got invalidated
235 // during path compression in Eval.
236 for (unsigned i = 2; i <= N; ++i) {
237 const NodePtr W = NumToNode[i];
238 auto &WInfo = NodeToInfo[W];
239 const unsigned SDomNum = NodeToInfo[NumToNode[WInfo.Semi]].DFSNum;
240 NodePtr WIDomCandidate = WInfo.IDom;
241 while (NodeToInfo[WIDomCandidate].DFSNum > SDomNum)
242 WIDomCandidate = NodeToInfo[WIDomCandidate].IDom;
244 WInfo.IDom = WIDomCandidate;
247 if (DT.Roots.empty()) return;
249 // Add a node for the root. This node might be the actual root, if there is
250 // one exit block, or it may be the virtual exit (denoted by
251 // (BasicBlock *)0) which postdominates all real exits if there are multiple
252 // exit blocks, or an infinite loop.
253 NodePtr Root = !MultipleRoots ? DT.Roots[0] : nullptr;
256 (DT.DomTreeNodes[Root] =
257 llvm::make_unique<DomTreeNodeBase<NodeT>>(Root, nullptr))
260 // Loop over all of the reachable blocks in the function...
261 for (unsigned i = 2; i <= N; ++i) {
262 NodePtr W = NumToNode[i];
264 // Don't replace this with 'count', the insertion side effect is important
265 if (DT.DomTreeNodes[W])
266 continue; // Haven't calculated this node yet?
268 NodePtr ImmDom = getIDom(W);
270 assert(ImmDom || DT.DomTreeNodes[nullptr]);
272 // Get or calculate the node for the immediate dominator
273 TreeNodePtr IDomNode = getNodeForBlock(ImmDom, DT);
275 // Add a new tree node for this BasicBlock, and link it as a child of
277 DT.DomTreeNodes[W] = IDomNode->addChild(
278 llvm::make_unique<DomTreeNodeBase<NodeT>>(W, IDomNode));
282 void doFullDFSWalk(const DomTreeT &DT) {
283 NumToNode.push_back(nullptr);
285 for (auto *Root : DT.Roots)
286 if (!DT.isPostDominator())
287 Num = runForwardDFS(Root, Num);
289 Num = runReverseDFS(Root, Num);
292 static void PrintBlockOrNullptr(raw_ostream &O, NodePtr Obj) {
296 Obj->printAsOperand(O, false);
299 // Checks if the tree contains all reachable nodes in the input graph.
300 bool verifyReachability(const DomTreeT &DT) {
304 for (auto &NodeToTN : DT.DomTreeNodes) {
305 const TreeNodePtr TN = NodeToTN.second.get();
306 const NodePtr BB = TN->getBlock();
309 if (NodeToInfo.count(BB) == 0) {
310 errs() << "DomTree node ";
311 PrintBlockOrNullptr(errs(), BB);
312 errs() << " not found by DFS walk!\n";
322 // Check if for every parent with a level L in the tree all of its children
324 static bool VerifyLevels(const DomTreeT &DT) {
325 for (auto &NodeToTN : DT.DomTreeNodes) {
326 const TreeNodePtr TN = NodeToTN.second.get();
327 const NodePtr BB = TN->getBlock();
330 const TreeNodePtr IDom = TN->getIDom();
331 if (!IDom && TN->getLevel() != 0) {
332 errs() << "Node without an IDom ";
333 PrintBlockOrNullptr(errs(), BB);
334 errs() << " has a nonzero level " << TN->getLevel() << "!\n";
340 if (IDom && TN->getLevel() != IDom->getLevel() + 1) {
342 PrintBlockOrNullptr(errs(), BB);
343 errs() << " has level " << TN->getLevel() << " while it's IDom ";
344 PrintBlockOrNullptr(errs(), IDom->getBlock());
345 errs() << " has level " << IDom->getLevel() << "!\n";
355 // Checks if for every edge From -> To in the graph
356 // NCD(From, To) == IDom(To) or To.
357 bool verifyNCD(const DomTreeT &DT) {
361 for (auto &BlockToInfo : NodeToInfo) {
362 auto &Info = BlockToInfo.second;
364 const NodePtr From = NumToNode[Info.Parent];
367 const NodePtr To = BlockToInfo.first;
368 const TreeNodePtr ToTN = DT.getNode(To);
371 const NodePtr NCD = DT.findNearestCommonDominator(From, To);
372 const TreeNodePtr NCDTN = NCD ? DT.getNode(NCD) : nullptr;
373 const TreeNodePtr ToIDom = ToTN->getIDom();
374 if (NCDTN != ToTN && NCDTN != ToIDom) {
375 errs() << "NearestCommonDominator verification failed:\n\tNCD(From:";
376 PrintBlockOrNullptr(errs(), From);
378 PrintBlockOrNullptr(errs(), To);
380 PrintBlockOrNullptr(errs(), NCD);
381 errs() << ",\t (should be To or IDom[To]: ";
382 PrintBlockOrNullptr(errs(), ToIDom ? ToIDom->getBlock() : nullptr);
393 // The below routines verify the correctness of the dominator tree relative to
394 // the CFG it's coming from. A tree is a dominator tree iff it has two
395 // properties, called the parent property and the sibling property. Tarjan
396 // and Lengauer prove (but don't explicitly name) the properties as part of
397 // the proofs in their 1972 paper, but the proofs are mostly part of proving
398 // things about semidominators and idoms, and some of them are simply asserted
399 // based on even earlier papers (see, e.g., lemma 2). Some papers refer to
400 // these properties as "valid" and "co-valid". See, e.g., "Dominators,
401 // directed bipolar orders, and independent spanning trees" by Loukas
402 // Georgiadis and Robert E. Tarjan, as well as "Dominator Tree Verification
403 // and Vertex-Disjoint Paths " by the same authors.
405 // A very simple and direct explanation of these properties can be found in
406 // "An Experimental Study of Dynamic Dominators", found at
407 // https://arxiv.org/abs/1604.02711
409 // The easiest way to think of the parent property is that it's a requirement
410 // of being a dominator. Let's just take immediate dominators. For PARENT to
411 // be an immediate dominator of CHILD, all paths in the CFG must go through
412 // PARENT before they hit CHILD. This implies that if you were to cut PARENT
413 // out of the CFG, there should be no paths to CHILD that are reachable. If
414 // there are, then you now have a path from PARENT to CHILD that goes around
415 // PARENT and still reaches CHILD, which by definition, means PARENT can't be
416 // a dominator of CHILD (let alone an immediate one).
418 // The sibling property is similar. It says that for each pair of sibling
419 // nodes in the dominator tree (LEFT and RIGHT) , they must not dominate each
420 // other. If sibling LEFT dominated sibling RIGHT, it means there are no
421 // paths in the CFG from sibling LEFT to sibling RIGHT that do not go through
422 // LEFT, and thus, LEFT is really an ancestor (in the dominator tree) of
423 // RIGHT, not a sibling.
425 // It is possible to verify the parent and sibling properties in
426 // linear time, but the algorithms are complex. Instead, we do it in a
427 // straightforward N^2 and N^3 way below, using direct path reachability.
430 // Checks if the tree has the parent property: if for all edges from V to W in
431 // the input graph, such that V is reachable, the parent of W in the tree is
432 // an ancestor of V in the tree.
434 // This means that if a node gets disconnected from the graph, then all of
435 // the nodes it dominated previously will now become unreachable.
436 bool verifyParentProperty(const DomTreeT &DT) {
437 for (auto &NodeToTN : DT.DomTreeNodes) {
438 const TreeNodePtr TN = NodeToTN.second.get();
439 const NodePtr BB = TN->getBlock();
440 if (!BB || TN->getChildren().empty()) continue;
443 NodeToInfo.insert({BB, {}});
446 for (TreeNodePtr Child : TN->getChildren())
447 if (NodeToInfo.count(Child->getBlock()) != 0) {
449 PrintBlockOrNullptr(errs(), Child->getBlock());
450 errs() << " reachable after its parent ";
451 PrintBlockOrNullptr(errs(), BB);
452 errs() << " is removed!\n";
462 // Check if the tree has sibling property: if a node V does not dominate a
463 // node W for all siblings V and W in the tree.
465 // This means that if a node gets disconnected from the graph, then all of its
466 // siblings will now still be reachable.
467 bool verifySiblingProperty(const DomTreeT &DT) {
468 for (auto &NodeToTN : DT.DomTreeNodes) {
469 const TreeNodePtr TN = NodeToTN.second.get();
470 const NodePtr BB = TN->getBlock();
471 if (!BB || TN->getChildren().empty()) continue;
473 const auto &Siblings = TN->getChildren();
474 for (const TreeNodePtr N : Siblings) {
476 NodeToInfo.insert({N->getBlock(), {}});
479 for (const TreeNodePtr S : Siblings) {
480 if (S == N) continue;
482 if (NodeToInfo.count(S->getBlock()) == 0) {
484 PrintBlockOrNullptr(errs(), S->getBlock());
485 errs() << " not reachable when its sibling ";
486 PrintBlockOrNullptr(errs(), N->getBlock());
487 errs() << " is removed!\n";
500 template <class FuncT, class NodeT>
501 void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT,
503 using NodePtr = typename GraphTraits<NodeT>::NodeRef;
504 static_assert(std::is_pointer<NodePtr>::value,
505 "NodePtr should be a pointer type");
506 SemiNCAInfo<typename std::remove_pointer<NodePtr>::type> SNCA;
507 SNCA.template runSemiNCA<NodeT>(DT, GraphTraits<FuncT *>::size(&F));
510 template <class NodeT>
511 bool Verify(const DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT) {
512 using NodePtr = typename GraphTraits<NodeT>::NodeRef;
513 static_assert(std::is_pointer<NodePtr>::value,
514 "NodePtr should be a pointer type");
515 SemiNCAInfo<typename std::remove_pointer<NodePtr>::type> SNCA;
517 return SNCA.verifyReachability(DT) && SNCA.VerifyLevels(DT) &&
518 SNCA.verifyNCD(DT) && SNCA.verifyParentProperty(DT) &&
519 SNCA.verifySiblingProperty(DT);
522 } // namespace DomTreeBuilder