contrib/llvm/include/llvm/Analysis/LazyCallGraph.h

   1 //===- LazyCallGraph.h - Analysis of a Module's call graph ------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 /// \file
  10 ///
  11 /// Implements a lazy call graph analysis and related passes for the new pass
  12 /// manager.
  13 ///
  14 /// NB: This is *not* a traditional call graph! It is a graph which models both
  15 /// the current calls and potential calls. As a consequence there are many
  16 /// edges in this call graph that do not correspond to a 'call' or 'invoke'
  17 /// instruction.
  18 ///
  19 /// The primary use cases of this graph analysis is to facilitate iterating
  20 /// across the functions of a module in ways that ensure all callees are
  21 /// visited prior to a caller (given any SCC constraints), or vice versa. As
  22 /// such is it particularly well suited to organizing CGSCC optimizations such
  23 /// as inlining, outlining, argument promotion, etc. That is its primary use
  24 /// case and motivates the design. It may not be appropriate for other
  25 /// purposes. The use graph of functions or some other conservative analysis of
  26 /// call instructions may be interesting for optimizations and subsequent
  27 /// analyses which don't work in the context of an overly specified
  28 /// potential-call-edge graph.
  29 ///
  30 /// To understand the specific rules and nature of this call graph analysis,
  31 /// see the documentation of the \c LazyCallGraph below.
  32 ///
  33 //===----------------------------------------------------------------------===//
  34
  35 #ifndef LLVM_ANALYSIS_LAZYCALLGRAPH_H
  36 #define LLVM_ANALYSIS_LAZYCALLGRAPH_H
  37
  38 #include "llvm/ADT/DenseMap.h"
  39 #include "llvm/ADT/PointerUnion.h"
  40 #include "llvm/ADT/STLExtras.h"
  41 #include "llvm/ADT/SetVector.h"
  42 #include "llvm/ADT/SmallPtrSet.h"
  43 #include "llvm/ADT/SmallVector.h"
  44 #include "llvm/ADT/iterator.h"
  45 #include "llvm/ADT/iterator_range.h"
  46 #include "llvm/IR/BasicBlock.h"
  47 #include "llvm/IR/Function.h"
  48 #include "llvm/IR/Module.h"
  49 #include "llvm/IR/PassManager.h"
  50 #include "llvm/Support/Allocator.h"
  51 #include "llvm/Support/raw_ostream.h"
  52 #include <iterator>
  53 #include <utility>
  54
  55 namespace llvm {
  56 class PreservedAnalyses;
  57 class raw_ostream;
  58
  59 /// A lazily constructed view of the call graph of a module.
  60 ///
  61 /// With the edges of this graph, the motivating constraint that we are
  62 /// attempting to maintain is that function-local optimization, CGSCC-local
  63 /// optimizations, and optimizations transforming a pair of functions connected
  64 /// by an edge in the graph, do not invalidate a bottom-up traversal of the SCC
  65 /// DAG. That is, no optimizations will delete, remove, or add an edge such
  66 /// that functions already visited in a bottom-up order of the SCC DAG are no
  67 /// longer valid to have visited, or such that functions not yet visited in
  68 /// a bottom-up order of the SCC DAG are not required to have already been
  69 /// visited.
  70 ///
  71 /// Within this constraint, the desire is to minimize the merge points of the
  72 /// SCC DAG. The greater the fanout of the SCC DAG and the fewer merge points
  73 /// in the SCC DAG, the more independence there is in optimizing within it.
  74 /// There is a strong desire to enable parallelization of optimizations over
  75 /// the call graph, and both limited fanout and merge points will (artificially
  76 /// in some cases) limit the scaling of such an effort.
  77 ///
  78 /// To this end, graph represents both direct and any potential resolution to
  79 /// an indirect call edge. Another way to think about it is that it represents
  80 /// both the direct call edges and any direct call edges that might be formed
  81 /// through static optimizations. Specifically, it considers taking the address
  82 /// of a function to be an edge in the call graph because this might be
  83 /// forwarded to become a direct call by some subsequent function-local
  84 /// optimization. The result is that the graph closely follows the use-def
  85 /// edges for functions. Walking "up" the graph can be done by looking at all
  86 /// of the uses of a function.
  87 ///
  88 /// The roots of the call graph are the external functions and functions
  89 /// escaped into global variables. Those functions can be called from outside
  90 /// of the module or via unknowable means in the IR -- we may not be able to
  91 /// form even a potential call edge from a function body which may dynamically
  92 /// load the function and call it.
  93 ///
  94 /// This analysis still requires updates to remain valid after optimizations
  95 /// which could potentially change the set of potential callees. The
  96 /// constraints it operates under only make the traversal order remain valid.
  97 ///
  98 /// The entire analysis must be re-computed if full interprocedural
  99 /// optimizations run at any point. For example, globalopt completely
 100 /// invalidates the information in this analysis.
 101 ///
 102 /// FIXME: This class is named LazyCallGraph in a lame attempt to distinguish
 103 /// it from the existing CallGraph. At some point, it is expected that this
 104 /// will be the only call graph and it will be renamed accordingly.
 105 class LazyCallGraph {
 106 public:
 107   class Node;
 108   class SCC;
 109   class RefSCC;
 110   class edge_iterator;
 111   class call_edge_iterator;
 112
 113   /// A class used to represent edges in the call graph.
 114   ///
 115   /// The lazy call graph models both *call* edges and *reference* edges. Call
 116   /// edges are much what you would expect, and exist when there is a 'call' or
 117   /// 'invoke' instruction of some function. Reference edges are also tracked
 118   /// along side these, and exist whenever any instruction (transitively
 119   /// through its operands) references a function. All call edges are
 120   /// inherently reference edges, and so the reference graph forms a superset
 121   /// of the formal call graph.
 122   ///
 123   /// Furthermore, edges also may point to raw \c Function objects when those
 124   /// functions have not been scanned and incorporated into the graph (yet).
 125   /// This is one of the primary ways in which the graph can be lazy. When
 126   /// functions are scanned and fully incorporated into the graph, all of the
 127   /// edges referencing them are updated to point to the graph \c Node objects
 128   /// instead of to the raw \c Function objects. This class even provides
 129   /// methods to trigger this scan on-demand by attempting to get the target
 130   /// node of the graph and providing a reference back to the graph in order to
 131   /// lazily build it if necessary.
 132   ///
 133   /// All of these forms of edges are fundamentally represented as outgoing
 134   /// edges. The edges are stored in the source node and point at the target
 135   /// node. This allows the edge structure itself to be a very compact data
 136   /// structure: essentially a tagged pointer.
 137   class Edge {
 138   public:
 139     /// The kind of edge in the graph.
 140     enum Kind : bool { Ref = false, Call = true };
 141
 142     Edge();
 143     explicit Edge(Function &F, Kind K);
 144     explicit Edge(Node &N, Kind K);
 145
 146     /// Test whether the edge is null.
 147     ///
 148     /// This happens when an edge has been deleted. We leave the edge objects
 149     /// around but clear them.
 150     operator bool() const;
 151
 152     /// Test whether the edge represents a direct call to a function.
 153     ///
 154     /// This requires that the edge is not null.
 155     bool isCall() const;
 156
 157     /// Get the function referenced by this edge.
 158     ///
 159     /// This requires that the edge is not null, but will succeed whether we
 160     /// have built a graph node for the function yet or not.
 161     Function &getFunction() const;
 162
 163     /// Get the call graph node referenced by this edge if one exists.
 164     ///
 165     /// This requires that the edge is not null. If we have built a graph node
 166     /// for the function this edge points to, this will return that node,
 167     /// otherwise it will return null.
 168     Node *getNode() const;
 169
 170     /// Get the call graph node for this edge, building it if necessary.
 171     ///
 172     /// This requires that the edge is not null. If we have not yet built
 173     /// a graph node for the function this edge points to, this will first ask
 174     /// the graph to build that node, inserting it into all the relevant
 175     /// structures.
 176     Node &getNode(LazyCallGraph &G);
 177
 178   private:
 179     friend class LazyCallGraph::Node;
 180
 181     PointerIntPair<PointerUnion<Function *, Node *>, 1, Kind> Value;
 182
 183     void setKind(Kind K) { Value.setInt(K); }
 184   };
 185
 186   typedef SmallVector<Edge, 4> EdgeVectorT;
 187   typedef SmallVectorImpl<Edge> EdgeVectorImplT;
 188
 189   /// A node in the call graph.
 190   ///
 191   /// This represents a single node. It's primary roles are to cache the list of
 192   /// callees, de-duplicate and provide fast testing of whether a function is
 193   /// a callee, and facilitate iteration of child nodes in the graph.
 194   class Node {
 195     friend class LazyCallGraph;
 196     friend class LazyCallGraph::SCC;
 197
 198     LazyCallGraph *G;
 199     Function &F;
 200
 201     // We provide for the DFS numbering and Tarjan walk lowlink numbers to be
 202     // stored directly within the node. These are both '-1' when nodes are part
 203     // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
 204     int DFSNumber;
 205     int LowLink;
 206
 207     mutable EdgeVectorT Edges;
 208     DenseMap<Function *, int> EdgeIndexMap;
 209
 210     /// Basic constructor implements the scanning of F into Edges and
 211     /// EdgeIndexMap.
 212     Node(LazyCallGraph &G, Function &F);
 213
 214     /// Internal helper to insert an edge to a function.
 215     void insertEdgeInternal(Function &ChildF, Edge::Kind EK);
 216
 217     /// Internal helper to insert an edge to a node.
 218     void insertEdgeInternal(Node &ChildN, Edge::Kind EK);
 219
 220     /// Internal helper to change an edge kind.
 221     void setEdgeKind(Function &ChildF, Edge::Kind EK);
 222
 223     /// Internal helper to remove the edge to the given function.
 224     void removeEdgeInternal(Function &ChildF);
 225
 226     /// Print the name of this node's function.
 227     friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
 228       return OS << N.F.getName();
 229     }
 230
 231     /// Dump the name of this node's function to stderr.
 232     void dump() const;
 233
 234   public:
 235     LazyCallGraph &getGraph() const { return *G; }
 236
 237     Function &getFunction() const { return F; }
 238
 239     edge_iterator begin() const {
 240       return edge_iterator(Edges.begin(), Edges.end());
 241     }
 242     edge_iterator end() const { return edge_iterator(Edges.end(), Edges.end()); }
 243
 244     const Edge &operator[](int i) const { return Edges[i]; }
 245     const Edge &operator[](Function &F) const {
 246       assert(EdgeIndexMap.find(&F) != EdgeIndexMap.end() && "No such edge!");
 247       return Edges[EdgeIndexMap.find(&F)->second];
 248     }
 249     const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; }
 250
 251     call_edge_iterator call_begin() const {
 252       return call_edge_iterator(Edges.begin(), Edges.end());
 253     }
 254     call_edge_iterator call_end() const {
 255       return call_edge_iterator(Edges.end(), Edges.end());
 256     }
 257
 258     iterator_range<call_edge_iterator> calls() const {
 259       return make_range(call_begin(), call_end());
 260     }
 261
 262     /// Equality is defined as address equality.
 263     bool operator==(const Node &N) const { return this == &N; }
 264     bool operator!=(const Node &N) const { return !operator==(N); }
 265   };
 266
 267   /// A lazy iterator used for both the entry nodes and child nodes.
 268   ///
 269   /// When this iterator is dereferenced, if not yet available, a function will
 270   /// be scanned for "calls" or uses of functions and its child information
 271   /// will be constructed. All of these results are accumulated and cached in
 272   /// the graph.
 273   class edge_iterator
 274       : public iterator_adaptor_base<edge_iterator, EdgeVectorImplT::iterator,
 275                                      std::forward_iterator_tag> {
 276     friend class LazyCallGraph;
 277     friend class LazyCallGraph::Node;
 278
 279     EdgeVectorImplT::iterator E;
 280
 281     // Build the iterator for a specific position in the edge list.
 282     edge_iterator(EdgeVectorImplT::iterator BaseI,
 283                   EdgeVectorImplT::iterator E)
 284         : iterator_adaptor_base(BaseI), E(E) {
 285       while (I != E && !*I)
 286         ++I;
 287     }
 288
 289   public:
 290     edge_iterator() {}
 291
 292     using iterator_adaptor_base::operator++;
 293     edge_iterator &operator++() {
 294       do {
 295         ++I;
 296       } while (I != E && !*I);
 297       return *this;
 298     }
 299   };
 300
 301   /// A lazy iterator over specifically call edges.
 302   ///
 303   /// This has the same iteration properties as the \c edge_iterator, but
 304   /// restricts itself to edges which represent actual calls.
 305   class call_edge_iterator
 306       : public iterator_adaptor_base<call_edge_iterator,
 307                                      EdgeVectorImplT::iterator,
 308                                      std::forward_iterator_tag> {
 309     friend class LazyCallGraph;
 310     friend class LazyCallGraph::Node;
 311
 312     EdgeVectorImplT::iterator E;
 313
 314     /// Advance the iterator to the next valid, call edge.
 315     void advanceToNextEdge() {
 316       while (I != E && (!*I || !I->isCall()))
 317         ++I;
 318     }
 319
 320     // Build the iterator for a specific position in the edge list.
 321     call_edge_iterator(EdgeVectorImplT::iterator BaseI,
 322                        EdgeVectorImplT::iterator E)
 323         : iterator_adaptor_base(BaseI), E(E) {
 324       advanceToNextEdge();
 325     }
 326
 327   public:
 328     call_edge_iterator() {}
 329
 330     using iterator_adaptor_base::operator++;
 331     call_edge_iterator &operator++() {
 332       ++I;
 333       advanceToNextEdge();
 334       return *this;
 335     }
 336   };
 337
 338   /// An SCC of the call graph.
 339   ///
 340   /// This represents a Strongly Connected Component of the direct call graph
 341   /// -- ignoring indirect calls and function references. It stores this as
 342   /// a collection of call graph nodes. While the order of nodes in the SCC is
 343   /// stable, it is not any particular order.
 344   ///
 345   /// The SCCs are nested within a \c RefSCC, see below for details about that
 346   /// outer structure. SCCs do not support mutation of the call graph, that
 347   /// must be done through the containing \c RefSCC in order to fully reason
 348   /// about the ordering and connections of the graph.
 349   class SCC {
 350     friend class LazyCallGraph;
 351     friend class LazyCallGraph::Node;
 352
 353     RefSCC *OuterRefSCC;
 354     SmallVector<Node *, 1> Nodes;
 355
 356     template <typename NodeRangeT>
 357     SCC(RefSCC &OuterRefSCC, NodeRangeT &&Nodes)
 358         : OuterRefSCC(&OuterRefSCC), Nodes(std::forward<NodeRangeT>(Nodes)) {}
 359
 360     void clear() {
 361       OuterRefSCC = nullptr;
 362       Nodes.clear();
 363     }
 364
 365     /// Print a short descrtiption useful for debugging or logging.
 366     ///
 367     /// We print the function names in the SCC wrapped in '()'s and skipping
 368     /// the middle functions if there are a large number.
 369     //
 370     // Note: this is defined inline to dodge issues with GCC's interpretation
 371     // of enclosing namespaces for friend function declarations.
 372     friend raw_ostream &operator<<(raw_ostream &OS, const SCC &C) {
 373       OS << '(';
 374       int i = 0;
 375       for (LazyCallGraph::Node &N : C) {
 376         if (i > 0)
 377           OS << ", ";
 378         // Elide the inner elements if there are too many.
 379         if (i > 8) {
 380           OS << "..., " << *C.Nodes.back();
 381           break;
 382         }
 383         OS << N;
 384         ++i;
 385       }
 386       OS << ')';
 387       return OS;
 388     }
 389
 390     /// Dump a short description of this SCC to stderr.
 391     void dump() const;
 392
 393 #ifndef NDEBUG
 394     /// Verify invariants about the SCC.
 395     ///
 396     /// This will attempt to validate all of the basic invariants within an
 397     /// SCC, but not that it is a strongly connected componet per-se. Primarily
 398     /// useful while building and updating the graph to check that basic
 399     /// properties are in place rather than having inexplicable crashes later.
 400     void verify();
 401 #endif
 402
 403   public:
 404     typedef pointee_iterator<SmallVectorImpl<Node *>::const_iterator> iterator;
 405
 406     iterator begin() const { return Nodes.begin(); }
 407     iterator end() const { return Nodes.end(); }
 408
 409     int size() const { return Nodes.size(); }
 410
 411     RefSCC &getOuterRefSCC() const { return *OuterRefSCC; }
 412
 413     /// Provide a short name by printing this SCC to a std::string.
 414     ///
 415     /// This copes with the fact that we don't have a name per-se for an SCC
 416     /// while still making the use of this in debugging and logging useful.
 417     std::string getName() const {
 418       std::string Name;
 419       raw_string_ostream OS(Name);
 420       OS << *this;
 421       OS.flush();
 422       return Name;
 423     }
 424   };
 425
 426   /// A RefSCC of the call graph.
 427   ///
 428   /// This models a Strongly Connected Component of function reference edges in
 429   /// the call graph. As opposed to actual SCCs, these can be used to scope
 430   /// subgraphs of the module which are independent from other subgraphs of the
 431   /// module because they do not reference it in any way. This is also the unit
 432   /// where we do mutation of the graph in order to restrict mutations to those
 433   /// which don't violate this independence.
 434   ///
 435   /// A RefSCC contains a DAG of actual SCCs. All the nodes within the RefSCC
 436   /// are necessarily within some actual SCC that nests within it. Since
 437   /// a direct call *is* a reference, there will always be at least one RefSCC
 438   /// around any SCC.
 439   class RefSCC {
 440     friend class LazyCallGraph;
 441     friend class LazyCallGraph::Node;
 442
 443     LazyCallGraph *G;
 444     SmallPtrSet<RefSCC *, 1> Parents;
 445
 446     /// A postorder list of the inner SCCs.
 447     SmallVector<SCC *, 4> SCCs;
 448
 449     /// A map from SCC to index in the postorder list.
 450     SmallDenseMap<SCC *, int, 4> SCCIndices;
 451
 452     /// Fast-path constructor. RefSCCs should instead be constructed by calling
 453     /// formRefSCCFast on the graph itself.
 454     RefSCC(LazyCallGraph &G);
 455
 456     /// Print a short description useful for debugging or logging.
 457     ///
 458     /// We print the SCCs wrapped in '[]'s and skipping the middle SCCs if
 459     /// there are a large number.
 460     //
 461     // Note: this is defined inline to dodge issues with GCC's interpretation
 462     // of enclosing namespaces for friend function declarations.
 463     friend raw_ostream &operator<<(raw_ostream &OS, const RefSCC &RC) {
 464       OS << '[';
 465       int i = 0;
 466       for (LazyCallGraph::SCC &C : RC) {
 467         if (i > 0)
 468           OS << ", ";
 469         // Elide the inner elements if there are too many.
 470         if (i > 4) {
 471           OS << "..., " << *RC.SCCs.back();
 472           break;
 473         }
 474         OS << C;
 475         ++i;
 476       }
 477       OS << ']';
 478       return OS;
 479     }
 480
 481     /// Dump a short description of this RefSCC to stderr.
 482     void dump() const;
 483
 484 #ifndef NDEBUG
 485     /// Verify invariants about the RefSCC and all its SCCs.
 486     ///
 487     /// This will attempt to validate all of the invariants *within* the
 488     /// RefSCC, but not that it is a strongly connected component of the larger
 489     /// graph. This makes it useful even when partially through an update.
 490     ///
 491     /// Invariants checked:
 492     /// - SCCs and their indices match.
 493     /// - The SCCs list is in fact in post-order.
 494     void verify();
 495 #endif
 496
 497   public:
 498     typedef pointee_iterator<SmallVectorImpl<SCC *>::const_iterator> iterator;
 499     typedef iterator_range<iterator> range;
 500     typedef pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator>
 501         parent_iterator;
 502
 503     iterator begin() const { return SCCs.begin(); }
 504     iterator end() const { return SCCs.end(); }
 505
 506     ssize_t size() const { return SCCs.size(); }
 507
 508     SCC &operator[](int Idx) { return *SCCs[Idx]; }
 509
 510     iterator find(SCC &C) const {
 511       return SCCs.begin() + SCCIndices.find(&C)->second;
 512     }
 513
 514     parent_iterator parent_begin() const { return Parents.begin(); }
 515     parent_iterator parent_end() const { return Parents.end(); }
 516
 517     iterator_range<parent_iterator> parents() const {
 518       return make_range(parent_begin(), parent_end());
 519     }
 520
 521     /// Test if this SCC is a parent of \a C.
 522     bool isParentOf(const RefSCC &C) const { return C.isChildOf(*this); }
 523
 524     /// Test if this RefSCC is an ancestor of \a C.
 525     bool isAncestorOf(const RefSCC &C) const { return C.isDescendantOf(*this); }
 526
 527     /// Test if this RefSCC is a child of \a C.
 528     bool isChildOf(const RefSCC &C) const {
 529       return Parents.count(const_cast<RefSCC *>(&C));
 530     }
 531
 532     /// Test if this RefSCC is a descendant of \a C.
 533     bool isDescendantOf(const RefSCC &C) const;
 534
 535     /// Provide a short name by printing this SCC to a std::string.
 536     ///
 537     /// This copes with the fact that we don't have a name per-se for an SCC
 538     /// while still making the use of this in debugging and logging useful.
 539     std::string getName() const {
 540       std::string Name;
 541       raw_string_ostream OS(Name);
 542       OS << *this;
 543       OS.flush();
 544       return Name;
 545     }
 546
 547     ///@{
 548     /// \name Mutation API
 549     ///
 550     /// These methods provide the core API for updating the call graph in the
 551     /// presence of a (potentially still in-flight) DFS-found SCCs.
 552     ///
 553     /// Note that these methods sometimes have complex runtimes, so be careful
 554     /// how you call them.
 555
 556     /// Make an existing internal ref edge into a call edge.
 557     ///
 558     /// This may form a larger cycle and thus collapse SCCs into TargetN's SCC.
 559     /// If that happens, the deleted SCC pointers are returned. These SCCs are
 560     /// not in a valid state any longer but the pointers will remain valid
 561     /// until destruction of the parent graph instance for the purpose of
 562     /// clearing cached information.
 563     ///
 564     /// After this operation, both SourceN's SCC and TargetN's SCC may move
 565     /// position within this RefSCC's postorder list. Any SCCs merged are
 566     /// merged into the TargetN's SCC in order to preserve reachability analyses
 567     /// which took place on that SCC.
 568     SmallVector<SCC *, 1> switchInternalEdgeToCall(Node &SourceN,
 569                                                    Node &TargetN);
 570
 571     /// Make an existing internal call edge into a ref edge.
 572     ///
 573     /// If SourceN and TargetN are part of a single SCC, it may be split up due
 574     /// to breaking a cycle in the call edges that formed it. If that happens,
 575     /// then this routine will insert new SCCs into the postorder list *before*
 576     /// the SCC of TargetN (previously the SCC of both). This preserves
 577     /// postorder as the TargetN can reach all of the other nodes by definition
 578     /// of previously being in a single SCC formed by the cycle from SourceN to
 579     /// TargetN. The newly added nodes are added *immediately* and contiguously
 580     /// prior to the TargetN SCC and so they may be iterated starting from
 581     /// there.
 582     void switchInternalEdgeToRef(Node &SourceN, Node &TargetN);
 583
 584     /// Make an existing outgoing ref edge into a call edge.
 585     ///
 586     /// Note that this is trivial as there are no cyclic impacts and there
 587     /// remains a reference edge.
 588     void switchOutgoingEdgeToCall(Node &SourceN, Node &TargetN);
 589
 590     /// Make an existing outgoing call edge into a ref edge.
 591     ///
 592     /// This is trivial as there are no cyclic impacts and there remains
 593     /// a reference edge.
 594     void switchOutgoingEdgeToRef(Node &SourceN, Node &TargetN);
 595
 596     /// Insert a ref edge from one node in this RefSCC to another in this
 597     /// RefSCC.
 598     ///
 599     /// This is always a trivial operation as it doesn't change any part of the
 600     /// graph structure besides connecting the two nodes.
 601     ///
 602     /// Note that we don't support directly inserting internal *call* edges
 603     /// because that could change the graph structure and requires returning
 604     /// information about what became invalid. As a consequence, the pattern
 605     /// should be to first insert the necessary ref edge, and then to switch it
 606     /// to a call edge if needed and handle any invalidation that results. See
 607     /// the \c switchInternalEdgeToCall routine for details.
 608     void insertInternalRefEdge(Node &SourceN, Node &TargetN);
 609
 610     /// Insert an edge whose parent is in this RefSCC and child is in some
 611     /// child RefSCC.
 612     ///
 613     /// There must be an existing path from the \p SourceN to the \p TargetN.
 614     /// This operation is inexpensive and does not change the set of SCCs and
 615     /// RefSCCs in the graph.
 616     void insertOutgoingEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);
 617
 618     /// Insert an edge whose source is in a descendant RefSCC and target is in
 619     /// this RefSCC.
 620     ///
 621     /// There must be an existing path from the target to the source in this
 622     /// case.
 623     ///
 624     /// NB! This is has the potential to be a very expensive function. It
 625     /// inherently forms a cycle in the prior RefSCC DAG and we have to merge
 626     /// RefSCCs to resolve that cycle. But finding all of the RefSCCs which
 627     /// participate in the cycle can in the worst case require traversing every
 628     /// RefSCC in the graph. Every attempt is made to avoid that, but passes
 629     /// must still exercise caution calling this routine repeatedly.
 630     ///
 631     /// Also note that this can only insert ref edges. In order to insert
 632     /// a call edge, first insert a ref edge and then switch it to a call edge.
 633     /// These are intentionally kept as separate interfaces because each step
 634     /// of the operation invalidates a different set of data structures.
 635     ///
 636     /// This returns all the RefSCCs which were merged into the this RefSCC
 637     /// (the target's). This allows callers to invalidate any cached
 638     /// information.
 639     ///
 640     /// FIXME: We could possibly optimize this quite a bit for cases where the
 641     /// caller and callee are very nearby in the graph. See comments in the
 642     /// implementation for details, but that use case might impact users.
 643     SmallVector<RefSCC *, 1> insertIncomingRefEdge(Node &SourceN,
 644                                                    Node &TargetN);
 645
 646     /// Remove an edge whose source is in this RefSCC and target is *not*.
 647     ///
 648     /// This removes an inter-RefSCC edge. All inter-RefSCC edges originating
 649     /// from this SCC have been fully explored by any in-flight DFS graph
 650     /// formation, so this is always safe to call once you have the source
 651     /// RefSCC.
 652     ///
 653     /// This operation does not change the cyclic structure of the graph and so
 654     /// is very inexpensive. It may change the connectivity graph of the SCCs
 655     /// though, so be careful calling this while iterating over them.
 656     void removeOutgoingEdge(Node &SourceN, Node &TargetN);
 657
 658     /// Remove a ref edge which is entirely within this RefSCC.
 659     ///
 660     /// Both the \a SourceN and the \a TargetN must be within this RefSCC.
 661     /// Removing such an edge may break cycles that form this RefSCC and thus
 662     /// this operation may change the RefSCC graph significantly. In
 663     /// particular, this operation will re-form new RefSCCs based on the
 664     /// remaining connectivity of the graph. The following invariants are
 665     /// guaranteed to hold after calling this method:
 666     ///
 667     /// 1) This RefSCC is still a RefSCC in the graph.
 668     /// 2) This RefSCC will be the parent of any new RefSCCs. Thus, this RefSCC
 669     ///    is preserved as the root of any new RefSCC DAG formed.
 670     /// 3) No RefSCC other than this RefSCC has its member set changed (this is
 671     ///    inherent in the definition of removing such an edge).
 672     /// 4) All of the parent links of the RefSCC graph will be updated to
 673     ///    reflect the new RefSCC structure.
 674     /// 5) All RefSCCs formed out of this RefSCC, excluding this RefSCC, will
 675     ///    be returned in post-order.
 676     /// 6) The order of the RefSCCs in the vector will be a valid postorder
 677     ///    traversal of the new RefSCCs.
 678     ///
 679     /// These invariants are very important to ensure that we can build
 680     /// optimization pipelines on top of the CGSCC pass manager which
 681     /// intelligently update the RefSCC graph without invalidating other parts
 682     /// of the RefSCC graph.
 683     ///
 684     /// Note that we provide no routine to remove a *call* edge. Instead, you
 685     /// must first switch it to a ref edge using \c switchInternalEdgeToRef.
 686     /// This split API is intentional as each of these two steps can invalidate
 687     /// a different aspect of the graph structure and needs to have the
 688     /// invalidation handled independently.
 689     ///
 690     /// The runtime complexity of this method is, in the worst case, O(V+E)
 691     /// where V is the number of nodes in this RefSCC and E is the number of
 692     /// edges leaving the nodes in this RefSCC. Note that E includes both edges
 693     /// within this RefSCC and edges from this RefSCC to child RefSCCs. Some
 694     /// effort has been made to minimize the overhead of common cases such as
 695     /// self-edges and edge removals which result in a spanning tree with no
 696     /// more cycles. There are also detailed comments within the implementation
 697     /// on techniques which could substantially improve this routine's
 698     /// efficiency.
 699     SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN,
 700                                                    Node &TargetN);
 701
 702     ///@}
 703   };
 704
 705   /// A post-order depth-first SCC iterator over the call graph.
 706   ///
 707   /// This iterator triggers the Tarjan DFS-based formation of the SCC DAG for
 708   /// the call graph, walking it lazily in depth-first post-order. That is, it
 709   /// always visits SCCs for a callee prior to visiting the SCC for a caller
 710   /// (when they are in different SCCs).
 711   class postorder_ref_scc_iterator
 712       : public iterator_facade_base<postorder_ref_scc_iterator,
 713                                     std::forward_iterator_tag, RefSCC> {
 714     friend class LazyCallGraph;
 715     friend class LazyCallGraph::Node;
 716
 717     /// Nonce type to select the constructor for the end iterator.
 718     struct IsAtEndT {};
 719
 720     LazyCallGraph *G;
 721     RefSCC *C;
 722
 723     // Build the begin iterator for a node.
 724     postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G) {
 725       C = G.getNextRefSCCInPostOrder();
 726     }
 727
 728     // Build the end iterator for a node. This is selected purely by overload.
 729     postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/)
 730         : G(&G), C(nullptr) {}
 731
 732   public:
 733     bool operator==(const postorder_ref_scc_iterator &Arg) const {
 734       return G == Arg.G && C == Arg.C;
 735     }
 736
 737     reference operator*() const { return *C; }
 738
 739     using iterator_facade_base::operator++;
 740     postorder_ref_scc_iterator &operator++() {
 741       C = G->getNextRefSCCInPostOrder();
 742       return *this;
 743     }
 744   };
 745
 746   /// Construct a graph for the given module.
 747   ///
 748   /// This sets up the graph and computes all of the entry points of the graph.
 749   /// No function definitions are scanned until their nodes in the graph are
 750   /// requested during traversal.
 751   LazyCallGraph(Module &M);
 752
 753   LazyCallGraph(LazyCallGraph &&G);
 754   LazyCallGraph &operator=(LazyCallGraph &&RHS);
 755
 756   edge_iterator begin() {
 757     return edge_iterator(EntryEdges.begin(), EntryEdges.end());
 758   }
 759   edge_iterator end() {
 760     return edge_iterator(EntryEdges.end(), EntryEdges.end());
 761   }
 762
 763   postorder_ref_scc_iterator postorder_ref_scc_begin() {
 764     return postorder_ref_scc_iterator(*this);
 765   }
 766   postorder_ref_scc_iterator postorder_ref_scc_end() {
 767     return postorder_ref_scc_iterator(*this,
 768                                       postorder_ref_scc_iterator::IsAtEndT());
 769   }
 770
 771   iterator_range<postorder_ref_scc_iterator> postorder_ref_sccs() {
 772     return make_range(postorder_ref_scc_begin(), postorder_ref_scc_end());
 773   }
 774
 775   /// Lookup a function in the graph which has already been scanned and added.
 776   Node *lookup(const Function &F) const { return NodeMap.lookup(&F); }
 777
 778   /// Lookup a function's SCC in the graph.
 779   ///
 780   /// \returns null if the function hasn't been assigned an SCC via the SCC
 781   /// iterator walk.
 782   SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); }
 783
 784   /// Lookup a function's RefSCC in the graph.
 785   ///
 786   /// \returns null if the function hasn't been assigned a RefSCC via the
 787   /// RefSCC iterator walk.
 788   RefSCC *lookupRefSCC(Node &N) const {
 789     if (SCC *C = lookupSCC(N))
 790       return &C->getOuterRefSCC();
 791
 792     return nullptr;
 793   }
 794
 795   /// Get a graph node for a given function, scanning it to populate the graph
 796   /// data as necessary.
 797   Node &get(Function &F) {
 798     Node *&N = NodeMap[&F];
 799     if (N)
 800       return *N;
 801
 802     return insertInto(F, N);
 803   }
 804
 805   ///@{
 806   /// \name Pre-SCC Mutation API
 807   ///
 808   /// These methods are only valid to call prior to forming any SCCs for this
 809   /// call graph. They can be used to update the core node-graph during
 810   /// a node-based inorder traversal that precedes any SCC-based traversal.
 811   ///
 812   /// Once you begin manipulating a call graph's SCCs, you must perform all
 813   /// mutation of the graph via the SCC methods.
 814
 815   /// Update the call graph after inserting a new edge.
 816   void insertEdge(Node &Caller, Function &Callee, Edge::Kind EK);
 817
 818   /// Update the call graph after inserting a new edge.
 819   void insertEdge(Function &Caller, Function &Callee, Edge::Kind EK) {
 820     return insertEdge(get(Caller), Callee, EK);
 821   }
 822
 823   /// Update the call graph after deleting an edge.
 824   void removeEdge(Node &Caller, Function &Callee);
 825
 826   /// Update the call graph after deleting an edge.
 827   void removeEdge(Function &Caller, Function &Callee) {
 828     return removeEdge(get(Caller), Callee);
 829   }
 830
 831   ///@}
 832
 833 private:
 834   typedef SmallVectorImpl<Node *>::reverse_iterator node_stack_iterator;
 835   typedef iterator_range<node_stack_iterator> node_stack_range;
 836
 837   /// Allocator that holds all the call graph nodes.
 838   SpecificBumpPtrAllocator<Node> BPA;
 839
 840   /// Maps function->node for fast lookup.
 841   DenseMap<const Function *, Node *> NodeMap;
 842
 843   /// The entry nodes to the graph.
 844   ///
 845   /// These nodes are reachable through "external" means. Put another way, they
 846   /// escape at the module scope.
 847   EdgeVectorT EntryEdges;
 848
 849   /// Map of the entry nodes in the graph to their indices in \c EntryEdges.
 850   DenseMap<Function *, int> EntryIndexMap;
 851
 852   /// Allocator that holds all the call graph SCCs.
 853   SpecificBumpPtrAllocator<SCC> SCCBPA;
 854
 855   /// Maps Function -> SCC for fast lookup.
 856   DenseMap<Node *, SCC *> SCCMap;
 857
 858   /// Allocator that holds all the call graph RefSCCs.
 859   SpecificBumpPtrAllocator<RefSCC> RefSCCBPA;
 860
 861   /// The leaf RefSCCs of the graph.
 862   ///
 863   /// These are all of the RefSCCs which have no children.
 864   SmallVector<RefSCC *, 4> LeafRefSCCs;
 865
 866   /// Stack of nodes in the DFS walk.
 867   SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack;
 868
 869   /// Set of entry nodes not-yet-processed into RefSCCs.
 870   SmallVector<Function *, 4> RefSCCEntryNodes;
 871
 872   /// Stack of nodes the DFS has walked but not yet put into a SCC.
 873   SmallVector<Node *, 4> PendingRefSCCStack;
 874
 875   /// Counter for the next DFS number to assign.
 876   int NextDFSNumber;
 877
 878   /// Helper to insert a new function, with an already looked-up entry in
 879   /// the NodeMap.
 880   Node &insertInto(Function &F, Node *&MappedN);
 881
 882   /// Helper to update pointers back to the graph object during moves.
 883   void updateGraphPtrs();
 884
 885   /// Allocates an SCC and constructs it using the graph allocator.
 886   ///
 887   /// The arguments are forwarded to the constructor.
 888   template <typename... Ts> SCC *createSCC(Ts &&... Args) {
 889     return new (SCCBPA.Allocate()) SCC(std::forward<Ts>(Args)...);
 890   }
 891
 892   /// Allocates a RefSCC and constructs it using the graph allocator.
 893   ///
 894   /// The arguments are forwarded to the constructor.
 895   template <typename... Ts> RefSCC *createRefSCC(Ts &&... Args) {
 896     return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...);
 897   }
 898
 899   /// Build the SCCs for a RefSCC out of a list of nodes.
 900   void buildSCCs(RefSCC &RC, node_stack_range Nodes);
 901
 902   /// Connect a RefSCC into the larger graph.
 903   ///
 904   /// This walks the edges to connect the RefSCC to its children's parent set,
 905   /// and updates the root leaf list.
 906   void connectRefSCC(RefSCC &RC);
 907
 908   /// Retrieve the next node in the post-order RefSCC walk of the call graph.
 909   RefSCC *getNextRefSCCInPostOrder();
 910 };
 911
 912 inline LazyCallGraph::Edge::Edge() : Value() {}
 913 inline LazyCallGraph::Edge::Edge(Function &F, Kind K) : Value(&F, K) {}
 914 inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}
 915
 916 inline LazyCallGraph::Edge::operator bool() const {
 917   return !Value.getPointer().isNull();
 918 }
 919
 920 inline bool LazyCallGraph::Edge::isCall() const {
 921   assert(*this && "Queried a null edge!");
 922   return Value.getInt() == Call;
 923 }
 924
 925 inline Function &LazyCallGraph::Edge::getFunction() const {
 926   assert(*this && "Queried a null edge!");
 927   auto P = Value.getPointer();
 928   if (auto *F = P.dyn_cast<Function *>())
 929     return *F;
 930
 931   return P.get<Node *>()->getFunction();
 932 }
 933
 934 inline LazyCallGraph::Node *LazyCallGraph::Edge::getNode() const {
 935   assert(*this && "Queried a null edge!");
 936   auto P = Value.getPointer();
 937   if (auto *N = P.dyn_cast<Node *>())
 938     return N;
 939
 940   return nullptr;
 941 }
 942
 943 inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode(LazyCallGraph &G) {
 944   assert(*this && "Queried a null edge!");
 945   auto P = Value.getPointer();
 946   if (auto *N = P.dyn_cast<Node *>())
 947     return *N;
 948
 949   Node &N = G.get(*P.get<Function *>());
 950   Value.setPointer(&N);
 951   return N;
 952 }
 953
 954 // Provide GraphTraits specializations for call graphs.
 955 template <> struct GraphTraits<LazyCallGraph::Node *> {
 956   typedef LazyCallGraph::Node NodeType;
 957   typedef LazyCallGraph::edge_iterator ChildIteratorType;
 958
 959   static NodeType *getEntryNode(NodeType *N) { return N; }
 960   static ChildIteratorType child_begin(NodeType *N) { return N->begin(); }
 961   static ChildIteratorType child_end(NodeType *N) { return N->end(); }
 962 };
 963 template <> struct GraphTraits<LazyCallGraph *> {
 964   typedef LazyCallGraph::Node NodeType;
 965   typedef LazyCallGraph::edge_iterator ChildIteratorType;
 966
 967   static NodeType *getEntryNode(NodeType *N) { return N; }
 968   static ChildIteratorType child_begin(NodeType *N) { return N->begin(); }
 969   static ChildIteratorType child_end(NodeType *N) { return N->end(); }
 970 };
 971
 972 /// An analysis pass which computes the call graph for a module.
 973 class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> {
 974   friend AnalysisInfoMixin<LazyCallGraphAnalysis>;
 975   static char PassID;
 976
 977 public:
 978   /// Inform generic clients of the result type.
 979   typedef LazyCallGraph Result;
 980
 981   /// Compute the \c LazyCallGraph for the module \c M.
 982   ///
 983   /// This just builds the set of entry points to the call graph. The rest is
 984   /// built lazily as it is walked.
 985   LazyCallGraph run(Module &M, ModuleAnalysisManager &) {
 986     return LazyCallGraph(M);
 987   }
 988 };
 989
 990 /// A pass which prints the call graph to a \c raw_ostream.
 991 ///
 992 /// This is primarily useful for testing the analysis.
 993 class LazyCallGraphPrinterPass
 994     : public PassInfoMixin<LazyCallGraphPrinterPass> {
 995   raw_ostream &OS;
 996
 997 public:
 998   explicit LazyCallGraphPrinterPass(raw_ostream &OS);
 999
1000   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
1001 };
1002
1003 /// A pass which prints the call graph as a DOT file to a \c raw_ostream.
1004 ///
1005 /// This is primarily useful for visualization purposes.
1006 class LazyCallGraphDOTPrinterPass
1007     : public PassInfoMixin<LazyCallGraphDOTPrinterPass> {
1008   raw_ostream &OS;
1009
1010 public:
1011   explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS);
1012
1013   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
1014 };
1015 }
1016
1017 #endif