1 //===- DependenceGraphBuilder.cpp ------------------------------------------==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file implements common steps of the build algorithm for construction
9 // of dependence graphs such as DDG and PDG.
10 //===----------------------------------------------------------------------===//
12 #include "llvm/Analysis/DependenceGraphBuilder.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/EnumeratedArray.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/DDG.h"
21 #define DEBUG_TYPE "dgb"
23 STATISTIC(TotalGraphs, "Number of dependence graphs created.");
24 STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
25 STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
26 STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
27 STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
28 STATISTIC(TotalConfusedEdges,
29 "Number of confused memory dependencies between two nodes.");
30 STATISTIC(TotalEdgeReversals,
31 "Number of times the source and sink of dependence was reversed to "
32 "expose cycles in the graph.");
34 using InstructionListType = SmallVector<Instruction *, 2>;
36 //===--------------------------------------------------------------------===//
37 // AbstractDependenceGraphBuilder implementation
38 //===--------------------------------------------------------------------===//
41 void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
42 // The BBList is expected to be in program order.
43 size_t NextOrdinal = 1;
44 for (auto *BB : BBList)
46 InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
50 void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
52 assert(IMap.empty() && "Expected empty instruction map at start");
53 for (BasicBlock *BB : BBList)
54 for (Instruction &I : *BB) {
55 auto &NewNode = createFineGrainedNode(I);
56 IMap.insert(std::make_pair(&I, &NewNode));
57 NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
58 ++TotalFineGrainedNodes;
63 void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
64 // Create a root node that connects to every connected component of the graph.
65 // This is done to allow graph iterators to visit all the disjoint components
66 // of the graph, in a single walk.
68 // This algorithm works by going through each node of the graph and for each
69 // node N, do a DFS starting from N. A rooted edge is established between the
70 // root node and N (if N is not yet visited). All the nodes reachable from N
71 // are marked as visited and are skipped in the DFS of subsequent nodes.
73 // Note: This algorithm tries to limit the number of edges out of the root
74 // node to some extent, but there may be redundant edges created depending on
75 // the iteration order. For example for a graph {A -> B}, an edge from the
76 // root node is added to both nodes if B is visited before A. While it does
77 // not result in minimal number of edges, this approach saves compile-time
78 // while keeping the number of edges in check.
79 auto &RootNode = createRootNode();
80 df_iterator_default_set<const NodeType *, 4> Visited;
81 for (auto *N : Graph) {
84 for (auto I : depth_first_ext(N, Visited))
86 createRootedEdge(RootNode, *N);
90 template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
91 if (!shouldCreatePiBlocks())
94 LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
96 // The overall algorithm is as follows:
97 // 1. Identify SCCs and for each SCC create a pi-block node containing all
98 // the nodes in that SCC.
99 // 2. Identify incoming edges incident to the nodes inside of the SCC and
100 // reconnect them to the pi-block node.
101 // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
102 // outside of it and reconnect them so that the edges are coming out of the
105 // Adding nodes as we iterate through the SCCs cause the SCC
106 // iterators to get invalidated. To prevent this invalidation, we first
107 // collect a list of nodes that are part of an SCC, and then iterate over
108 // those lists to create the pi-block nodes. Each element of the list is a
109 // list of nodes in an SCC. Note: trivial SCCs containing a single node are
111 SmallVector<NodeListType, 4> ListOfSCCs;
112 for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
114 ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
117 for (NodeListType &NL : ListOfSCCs) {
118 LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
119 << " nodes in it.\n");
121 // SCC iterator may put the nodes in an order that's different from the
122 // program order. To preserve original program order, we sort the list of
123 // nodes based on ordinal numbers computed earlier.
124 llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
125 return getOrdinal(*LHS) < getOrdinal(*RHS);
128 NodeType &PiNode = createPiBlock(NL);
131 // Build a set to speed up the lookup for edges whose targets
132 // are inside the SCC.
133 SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
135 // We have the set of nodes in the SCC. We go through the set of nodes
136 // that are outside of the SCC and look for edges that cross the two sets.
137 for (NodeType *N : Graph) {
139 // Skip the SCC node and all the nodes inside of it.
140 if (*N == PiNode || NodesInSCC.count(N))
143 for (NodeType *SCCNode : NL) {
146 Incoming, // Incoming edges to the SCC
147 Outgoing, // Edges going ot of the SCC
148 DirectionCount // To make the enum usable as an array index.
151 // Use these flags to help us avoid creating redundant edges. If there
152 // are more than one edges from an outside node to inside nodes, we only
153 // keep one edge from that node to the pi-block node. Similarly, if
154 // there are more than one edges from inside nodes to an outside node,
155 // we only keep one edge from the pi-block node to the outside node.
156 // There is a flag defined for each direction (incoming vs outgoing) and
157 // for each type of edge supported, using a two-dimensional boolean
159 using EdgeKind = typename EdgeType::EdgeKind;
160 EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
163 auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
166 case EdgeKind::RegisterDefUse:
167 createDefUseEdge(Src, Dst);
169 case EdgeKind::MemoryDependence:
170 createMemoryEdge(Src, Dst);
172 case EdgeKind::Rooted:
173 createRootedEdge(Src, Dst);
176 llvm_unreachable("Unsupported type of edge.");
180 auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
181 const Direction Dir) {
182 if (!Src->hasEdgeTo(*Dst))
186 << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
187 << ":\nSrc:" << *Src << "\nDst:" << *Dst
188 << "\nNew:" << *New << "\n");
189 assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
190 "Invalid direction.");
192 SmallVector<EdgeType *, 10> EL;
193 Src->findEdgesTo(*Dst, EL);
194 for (EdgeType *OldEdge : EL) {
195 EdgeKind Kind = OldEdge->getKind();
196 if (!EdgeAlreadyCreated[Dir][Kind]) {
197 if (Dir == Direction::Incoming) {
198 createEdgeOfKind(*Src, *New, Kind);
199 LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
200 } else if (Dir == Direction::Outgoing) {
201 createEdgeOfKind(*New, *Dst, Kind);
202 LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
204 EdgeAlreadyCreated[Dir][Kind] = true;
206 Src->removeEdge(*OldEdge);
207 destroyEdge(*OldEdge);
208 LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
212 // Process incoming edges incident to the pi-block node.
213 reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
215 // Process edges that are coming out of the pi-block node.
216 reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
221 // Ordinal maps are no longer needed.
222 InstOrdinalMap.clear();
223 NodeOrdinalMap.clear();
225 LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
228 template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
229 for (NodeType *N : Graph) {
230 InstructionListType SrcIList;
231 N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
233 // Use a set to mark the targets that we link to N, so we don't add
234 // duplicate def-use edges when more than one instruction in a target node
235 // use results of instructions that are contained in N.
236 SmallPtrSet<NodeType *, 4> VisitedTargets;
238 for (Instruction *II : SrcIList) {
239 for (User *U : II->users()) {
240 Instruction *UI = dyn_cast<Instruction>(U);
243 NodeType *DstNode = nullptr;
244 if (IMap.find(UI) != IMap.end())
245 DstNode = IMap.find(UI)->second;
247 // In the case of loops, the scope of the subgraph is all the
248 // basic blocks (and instructions within them) belonging to the loop. We
249 // simply ignore all the edges coming from (or going into) instructions
250 // or basic blocks outside of this range.
254 << "skipped def-use edge since the sink" << *UI
255 << " is outside the range of instructions being considered.\n");
259 // Self dependencies are ignored because they are redundant and
263 << "skipped def-use edge since the sink and the source ("
264 << N << ") are the same.\n");
268 if (VisitedTargets.insert(DstNode).second) {
269 createDefUseEdge(*N, *DstNode);
278 void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
279 using DGIterator = typename G::iterator;
280 auto isMemoryAccess = [](const Instruction *I) {
281 return I->mayReadOrWriteMemory();
283 for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
284 InstructionListType SrcIList;
285 (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
286 if (SrcIList.empty())
289 for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
290 if (**SrcIt == **DstIt)
292 InstructionListType DstIList;
293 (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
294 if (DstIList.empty())
296 bool ForwardEdgeCreated = false;
297 bool BackwardEdgeCreated = false;
298 for (Instruction *ISrc : SrcIList) {
299 for (Instruction *IDst : DstIList) {
300 auto D = DI.depends(ISrc, IDst, true);
304 // If we have a dependence with its left-most non-'=' direction
305 // being '>' we need to reverse the direction of the edge, because
306 // the source of the dependence cannot occur after the sink. For
307 // confused dependencies, we will create edges in both directions to
308 // represent the possibility of a cycle.
310 auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
311 if (!ForwardEdgeCreated) {
312 createMemoryEdge(Src, Dst);
315 if (!BackwardEdgeCreated) {
316 createMemoryEdge(Dst, Src);
319 ForwardEdgeCreated = BackwardEdgeCreated = true;
320 ++TotalConfusedEdges;
323 auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
324 if (!ForwardEdgeCreated) {
325 createMemoryEdge(Src, Dst);
328 ForwardEdgeCreated = true;
331 auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
332 if (!BackwardEdgeCreated) {
333 createMemoryEdge(Dst, Src);
336 BackwardEdgeCreated = true;
340 createConfusedEdges(**SrcIt, **DstIt);
341 else if (D->isOrdered() && !D->isLoopIndependent()) {
342 bool ReversedEdge = false;
343 for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
344 if (D->getDirection(Level) == Dependence::DVEntry::EQ)
346 else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
347 createBackwardEdge(**SrcIt, **DstIt);
349 ++TotalEdgeReversals;
351 } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
354 createConfusedEdges(**SrcIt, **DstIt);
359 createForwardEdge(**SrcIt, **DstIt);
361 createForwardEdge(**SrcIt, **DstIt);
363 // Avoid creating duplicate edges.
364 if (ForwardEdgeCreated && BackwardEdgeCreated)
368 // If we've created edges in both directions, there is no more
369 // unique edge that we can create between these two nodes, so we
371 if (ForwardEdgeCreated && BackwardEdgeCreated)
378 template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
379 if (!shouldSimplify())
381 LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
383 // This algorithm works by first collecting a set of candidate nodes that have
384 // an out-degree of one (in terms of def-use edges), and then ignoring those
385 // whose targets have an in-degree more than one. Each node in the resulting
386 // set can then be merged with its corresponding target and put back into the
387 // worklist until no further merge candidates are available.
388 SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
390 // A mapping between nodes and their in-degree. To save space, this map
391 // only contains nodes that are targets of nodes in the CandidateSourceNodes.
392 DenseMap<NodeType *, unsigned> TargetInDegreeMap;
394 for (NodeType *N : Graph) {
395 if (N->getEdges().size() != 1)
397 EdgeType &Edge = N->back();
398 if (!Edge.isDefUse())
400 CandidateSourceNodes.insert(N);
402 // Insert an element into the in-degree map and initialize to zero. The
403 // count will get updated in the next step.
404 TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
408 dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
409 << "\nNode with single outgoing def-use edge:\n";
410 for (NodeType *N : CandidateSourceNodes) {
415 for (NodeType *N : Graph) {
416 for (EdgeType *E : *N) {
417 NodeType *Tgt = &E->getTargetNode();
418 auto TgtIT = TargetInDegreeMap.find(Tgt);
419 if (TgtIT != TargetInDegreeMap.end())
425 dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
426 << "\nContent of in-degree map:\n";
427 for (auto &I : TargetInDegreeMap) {
428 dbgs() << I.first << " --> " << I.second << "\n";
432 SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
433 CandidateSourceNodes.end());
434 while (!Worklist.empty()) {
435 NodeType &Src = *Worklist.pop_back_val();
436 // As nodes get merged, we need to skip any node that has been removed from
437 // the candidate set (see below).
438 if (!CandidateSourceNodes.erase(&Src))
441 assert(Src.getEdges().size() == 1 &&
442 "Expected a single edge from the candidate src node.");
443 NodeType &Tgt = Src.back().getTargetNode();
444 assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
445 "Expected target to be in the in-degree map.");
447 if (TargetInDegreeMap[&Tgt] != 1)
450 if (!areNodesMergeable(Src, Tgt))
453 // Do not merge if there is also an edge from target to src (immediate
455 if (Tgt.hasEdgeTo(Src))
458 LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
460 mergeNodes(Src, Tgt);
462 // If the target node is in the candidate set itself, we need to put the
463 // src node back into the worklist again so it gives the target a chance
464 // to get merged into it. For example if we have:
465 // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
466 // then after merging (a) and (b) together, we need to put (a,b) back in
467 // the worklist so that (c) can get merged in as well resulting in
469 // We also need to remove the old target (b), from the worklist. We first
470 // remove it from the candidate set here, and skip any item from the
471 // worklist that is not in the set.
472 if (CandidateSourceNodes.erase(&Tgt)) {
473 Worklist.push_back(&Src);
474 CandidateSourceNodes.insert(&Src);
475 LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
478 LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
482 void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
484 // If we don't create pi-blocks, then we may not have a DAG.
485 if (!shouldCreatePiBlocks())
488 SmallVector<NodeType *, 64> NodesInPO;
489 using NodeKind = typename NodeType::NodeKind;
490 for (NodeType *N : post_order(&Graph)) {
491 if (N->getKind() == NodeKind::PiBlock) {
492 // Put members of the pi-block right after the pi-block itself, for
494 const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
495 NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
496 PiBlockMembers.end());
498 NodesInPO.push_back(N);
501 size_t OldSize = Graph.Nodes.size();
503 for (NodeType *N : reverse(NodesInPO))
504 Graph.Nodes.push_back(N);
505 if (Graph.Nodes.size() != OldSize)
507 "Expected the number of nodes to stay the same after the sort");
510 template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
511 template class llvm::DependenceGraphInfo<DDGNode>;