contrib/compiler-rt/lib/xray/xray_function_call_trie.h

   1 //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file is a part of XRay, a dynamic runtime instrumentation system.
  11 //
  12 // This file defines the interface for a function call trie.
  13 //
  14 //===----------------------------------------------------------------------===//
  15 #ifndef XRAY_FUNCTION_CALL_TRIE_H
  16 #define XRAY_FUNCTION_CALL_TRIE_H
  17
  18 #include "sanitizer_common/sanitizer_allocator_internal.h"
  19 #include "xray_profiling_flags.h"
  20 #include "xray_segmented_array.h"
  21 #include <memory> // For placement new.
  22 #include <utility>
  23
  24 namespace __xray {
  25
  26 /// A FunctionCallTrie represents the stack traces of XRay instrumented
  27 /// functions that we've encountered, where a node corresponds to a function and
  28 /// the path from the root to the node its stack trace. Each node in the trie
  29 /// will contain some useful values, including:
  30 ///
  31 ///   * The cumulative amount of time spent in this particular node/stack.
  32 ///   * The number of times this stack has appeared.
  33 ///   * A histogram of latencies for that particular node.
  34 ///
  35 /// Each node in the trie will also contain a list of callees, represented using
  36 /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
  37 /// ID of the callee, and a pointer to the node.
  38 ///
  39 /// If we visualise this data structure, we'll find the following potential
  40 /// representation:
  41 ///
  42 ///   [function id node] -> [callees] [cumulative time]
  43 ///                         [call counter] [latency histogram]
  44 ///
  45 /// As an example, when we have a function in this pseudocode:
  46 ///
  47 ///   func f(N) {
  48 ///     g()
  49 ///     h()
  50 ///     for i := 1..N { j() }
  51 ///   }
  52 ///
  53 /// We may end up with a trie of the following form:
  54 ///
  55 ///   f -> [ g, h, j ] [...] [1] [...]
  56 ///   g -> [ ... ] [...] [1] [...]
  57 ///   h -> [ ... ] [...] [1] [...]
  58 ///   j -> [ ... ] [...] [N] [...]
  59 ///
  60 /// If for instance the function g() called j() like so:
  61 ///
  62 ///   func g() {
  63 ///     for i := 1..10 { j() }
  64 ///   }
  65 ///
  66 /// We'll find the following updated trie:
  67 ///
  68 ///   f -> [ g, h, j ] [...] [1] [...]
  69 ///   g -> [ j' ] [...] [1] [...]
  70 ///   h -> [ ... ] [...] [1] [...]
  71 ///   j -> [ ... ] [...] [N] [...]
  72 ///   j' -> [ ... ] [...] [10] [...]
  73 ///
  74 /// Note that we'll have a new node representing the path `f -> g -> j'` with
  75 /// isolated data. This isolation gives us a means of representing the stack
  76 /// traces as a path, as opposed to a key in a table. The alternative
  77 /// implementation here would be to use a separate table for the path, and use
  78 /// hashes of the path as an identifier to accumulate the information. We've
  79 /// moved away from this approach as it takes a lot of time to compute the hash
  80 /// every time we need to update a function's call information as we're handling
  81 /// the entry and exit events.
  82 ///
  83 /// This approach allows us to maintain a shadow stack, which represents the
  84 /// currently executing path, and on function exits quickly compute the amount
  85 /// of time elapsed from the entry, then update the counters for the node
  86 /// already represented in the trie. This necessitates an efficient
  87 /// representation of the various data structures (the list of callees must be
  88 /// cache-aware and efficient to look up, and the histogram must be compact and
  89 /// quick to update) to enable us to keep the overheads of this implementation
  90 /// to the minimum.
  91 class FunctionCallTrie {
  92 public:
  93   struct Node;
  94
  95   // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
  96   // standard library types in this header.
  97   struct NodeIdPair {
  98     Node *NodePtr;
  99     int32_t FId;
 100
 101     // Constructor for inplace-construction.
 102     NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
 103   };
 104
 105   using NodeIdPairArray = Array<NodeIdPair>;
 106   using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
 107
 108   // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
 109   // number of times this node actually appeared, the cumulative amount of time
 110   // for this particular node including its children call times, and just the
 111   // local time spent on this node. Each Node will have the ID of the XRay
 112   // instrumented function that it is associated to.
 113   struct Node {
 114     Node *Parent;
 115     NodeIdPairArray Callees;
 116     int64_t CallCount;
 117     int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
 118     int32_t FId;
 119
 120     // We add a constructor here to allow us to inplace-construct through
 121     // Array<...>'s AppendEmplace.
 122     Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
 123          int32_t F)
 124         : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
 125           FId(F) {}
 126
 127     // TODO: Include the compact histogram.
 128   };
 129
 130 private:
 131   struct ShadowStackEntry {
 132     uint64_t EntryTSC;
 133     Node *NodePtr;
 134
 135     // We add a constructor here to allow us to inplace-construct through
 136     // Array<...>'s AppendEmplace.
 137     ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {}
 138   };
 139
 140   using NodeArray = Array<Node>;
 141   using RootArray = Array<Node *>;
 142   using ShadowStackArray = Array<ShadowStackEntry>;
 143
 144 public:
 145   // We collate the allocators we need into a single struct, as a convenience to
 146   // allow us to initialize these as a group.
 147   struct Allocators {
 148     using NodeAllocatorType = NodeArray::AllocatorType;
 149     using RootAllocatorType = RootArray::AllocatorType;
 150     using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
 151
 152     NodeAllocatorType *NodeAllocator = nullptr;
 153     RootAllocatorType *RootAllocator = nullptr;
 154     ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
 155     NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
 156
 157     Allocators() {}
 158     Allocators(const Allocators &) = delete;
 159     Allocators &operator=(const Allocators &) = delete;
 160
 161     Allocators(Allocators &&O)
 162         : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
 163           ShadowStackAllocator(O.ShadowStackAllocator),
 164           NodeIdPairAllocator(O.NodeIdPairAllocator) {
 165       O.NodeAllocator = nullptr;
 166       O.RootAllocator = nullptr;
 167       O.ShadowStackAllocator = nullptr;
 168       O.NodeIdPairAllocator = nullptr;
 169     }
 170
 171     Allocators &operator=(Allocators &&O) {
 172       {
 173         auto Tmp = O.NodeAllocator;
 174         O.NodeAllocator = this->NodeAllocator;
 175         this->NodeAllocator = Tmp;
 176       }
 177       {
 178         auto Tmp = O.RootAllocator;
 179         O.RootAllocator = this->RootAllocator;
 180         this->RootAllocator = Tmp;
 181       }
 182       {
 183         auto Tmp = O.ShadowStackAllocator;
 184         O.ShadowStackAllocator = this->ShadowStackAllocator;
 185         this->ShadowStackAllocator = Tmp;
 186       }
 187       {
 188         auto Tmp = O.NodeIdPairAllocator;
 189         O.NodeIdPairAllocator = this->NodeIdPairAllocator;
 190         this->NodeIdPairAllocator = Tmp;
 191       }
 192       return *this;
 193     }
 194
 195     ~Allocators() {
 196       // Note that we cannot use delete on these pointers, as they need to be
 197       // returned to the sanitizer_common library's internal memory tracking
 198       // system.
 199       if (NodeAllocator != nullptr) {
 200         NodeAllocator->~NodeAllocatorType();
 201         InternalFree(NodeAllocator);
 202         NodeAllocator = nullptr;
 203       }
 204       if (RootAllocator != nullptr) {
 205         RootAllocator->~RootAllocatorType();
 206         InternalFree(RootAllocator);
 207         RootAllocator = nullptr;
 208       }
 209       if (ShadowStackAllocator != nullptr) {
 210         ShadowStackAllocator->~ShadowStackAllocatorType();
 211         InternalFree(ShadowStackAllocator);
 212         ShadowStackAllocator = nullptr;
 213       }
 214       if (NodeIdPairAllocator != nullptr) {
 215         NodeIdPairAllocator->~NodeIdPairAllocatorType();
 216         InternalFree(NodeIdPairAllocator);
 217         NodeIdPairAllocator = nullptr;
 218       }
 219     }
 220   };
 221
 222   // TODO: Support configuration of options through the arguments.
 223   static Allocators InitAllocators() {
 224     return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
 225   }
 226
 227   static Allocators InitAllocatorsCustom(uptr Max) {
 228     Allocators A;
 229     auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
 230         InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
 231     new (NodeAllocator) Allocators::NodeAllocatorType(Max);
 232     A.NodeAllocator = NodeAllocator;
 233
 234     auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
 235         InternalAlloc(sizeof(Allocators::RootAllocatorType)));
 236     new (RootAllocator) Allocators::RootAllocatorType(Max);
 237     A.RootAllocator = RootAllocator;
 238
 239     auto ShadowStackAllocator =
 240         reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
 241             InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
 242     new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
 243     A.ShadowStackAllocator = ShadowStackAllocator;
 244
 245     auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
 246         InternalAlloc(sizeof(NodeIdPairAllocatorType)));
 247     new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
 248     A.NodeIdPairAllocator = NodeIdPairAllocator;
 249     return A;
 250   }
 251
 252 private:
 253   NodeArray Nodes;
 254   RootArray Roots;
 255   ShadowStackArray ShadowStack;
 256   NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
 257
 258 public:
 259   explicit FunctionCallTrie(const Allocators &A)
 260       : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
 261         ShadowStack(*A.ShadowStackAllocator),
 262         NodeIdPairAllocator(A.NodeIdPairAllocator) {}
 263
 264   void enterFunction(const int32_t FId, uint64_t TSC) {
 265     DCHECK_NE(FId, 0);
 266     // This function primarily deals with ensuring that the ShadowStack is
 267     // consistent and ready for when an exit event is encountered.
 268     if (UNLIKELY(ShadowStack.empty())) {
 269       auto NewRoot =
 270           Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
 271       if (UNLIKELY(NewRoot == nullptr))
 272         return;
 273       Roots.Append(NewRoot);
 274       ShadowStack.AppendEmplace(TSC, NewRoot);
 275       return;
 276     }
 277
 278     auto &Top = ShadowStack.back();
 279     auto TopNode = Top.NodePtr;
 280     DCHECK_NE(TopNode, nullptr);
 281
 282     // If we've seen this callee before, then we just access that node and place
 283     // that on the top of the stack.
 284     auto Callee = TopNode->Callees.find_element(
 285         [FId](const NodeIdPair &NR) { return NR.FId == FId; });
 286     if (Callee != nullptr) {
 287       CHECK_NE(Callee->NodePtr, nullptr);
 288       ShadowStack.AppendEmplace(TSC, Callee->NodePtr);
 289       return;
 290     }
 291
 292     // This means we've never seen this stack before, create a new node here.
 293     auto NewNode =
 294         Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
 295     if (UNLIKELY(NewNode == nullptr))
 296       return;
 297     DCHECK_NE(NewNode, nullptr);
 298     TopNode->Callees.AppendEmplace(NewNode, FId);
 299     ShadowStack.AppendEmplace(TSC, NewNode);
 300     DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
 301     return;
 302   }
 303
 304   void exitFunction(int32_t FId, uint64_t TSC) {
 305     // When we exit a function, we look up the ShadowStack to see whether we've
 306     // entered this function before. We do as little processing here as we can,
 307     // since most of the hard work would have already been done at function
 308     // entry.
 309     uint64_t CumulativeTreeTime = 0;
 310     while (!ShadowStack.empty()) {
 311       const auto &Top = ShadowStack.back();
 312       auto TopNode = Top.NodePtr;
 313       DCHECK_NE(TopNode, nullptr);
 314       auto LocalTime = TSC - Top.EntryTSC;
 315       TopNode->CallCount++;
 316       TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
 317       CumulativeTreeTime += LocalTime;
 318       ShadowStack.trim(1);
 319
 320       // TODO: Update the histogram for the node.
 321       if (TopNode->FId == FId)
 322         break;
 323     }
 324   }
 325
 326   const RootArray &getRoots() const { return Roots; }
 327
 328   // The deepCopyInto operation will update the provided FunctionCallTrie by
 329   // re-creating the contents of this particular FunctionCallTrie in the other
 330   // FunctionCallTrie. It will do this using a Depth First Traversal from the
 331   // roots, and while doing so recreating the traversal in the provided
 332   // FunctionCallTrie.
 333   //
 334   // This operation will *not* destroy the state in `O`, and thus may cause some
 335   // duplicate entries in `O` if it is not empty.
 336   //
 337   // This function is *not* thread-safe, and may require external
 338   // synchronisation of both "this" and |O|.
 339   //
 340   // This function must *not* be called with a non-empty FunctionCallTrie |O|.
 341   void deepCopyInto(FunctionCallTrie &O) const {
 342     DCHECK(O.getRoots().empty());
 343
 344     // We then push the root into a stack, to use as the parent marker for new
 345     // nodes we push in as we're traversing depth-first down the call tree.
 346     struct NodeAndParent {
 347       FunctionCallTrie::Node *Node;
 348       FunctionCallTrie::Node *NewNode;
 349     };
 350     using Stack = Array<NodeAndParent>;
 351
 352     typename Stack::AllocatorType StackAllocator(
 353         profilingFlags()->stack_allocator_max);
 354     Stack DFSStack(StackAllocator);
 355
 356     for (const auto Root : getRoots()) {
 357       // Add a node in O for this root.
 358       auto NewRoot = O.Nodes.AppendEmplace(
 359           nullptr, *O.NodeIdPairAllocator, Root->CallCount,
 360           Root->CumulativeLocalTime, Root->FId);
 361
 362       // Because we cannot allocate more memory we should bail out right away.
 363       if (UNLIKELY(NewRoot == nullptr))
 364         return;
 365
 366       O.Roots.Append(NewRoot);
 367
 368       // TODO: Figure out what to do if we fail to allocate any more stack
 369       // space. Maybe warn or report once?
 370       DFSStack.AppendEmplace(Root, NewRoot);
 371       while (!DFSStack.empty()) {
 372         NodeAndParent NP = DFSStack.back();
 373         DCHECK_NE(NP.Node, nullptr);
 374         DCHECK_NE(NP.NewNode, nullptr);
 375         DFSStack.trim(1);
 376         for (const auto Callee : NP.Node->Callees) {
 377           auto NewNode = O.Nodes.AppendEmplace(
 378               NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
 379               Callee.NodePtr->CumulativeLocalTime, Callee.FId);
 380           if (UNLIKELY(NewNode == nullptr))
 381             return;
 382           NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
 383           DFSStack.AppendEmplace(Callee.NodePtr, NewNode);
 384         }
 385       }
 386     }
 387   }
 388
 389   // The mergeInto operation will update the provided FunctionCallTrie by
 390   // traversing the current trie's roots and updating (i.e. merging) the data in
 391   // the nodes with the data in the target's nodes. If the node doesn't exist in
 392   // the provided trie, we add a new one in the right position, and inherit the
 393   // data from the original (current) trie, along with all its callees.
 394   //
 395   // This function is *not* thread-safe, and may require external
 396   // synchronisation of both "this" and |O|.
 397   void mergeInto(FunctionCallTrie &O) const {
 398     struct NodeAndTarget {
 399       FunctionCallTrie::Node *OrigNode;
 400       FunctionCallTrie::Node *TargetNode;
 401     };
 402     using Stack = Array<NodeAndTarget>;
 403     typename Stack::AllocatorType StackAllocator(
 404         profilingFlags()->stack_allocator_max);
 405     Stack DFSStack(StackAllocator);
 406
 407     for (const auto Root : getRoots()) {
 408       Node *TargetRoot = nullptr;
 409       auto R = O.Roots.find_element(
 410           [&](const Node *Node) { return Node->FId == Root->FId; });
 411       if (R == nullptr) {
 412         TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
 413                                            0, Root->FId);
 414         if (UNLIKELY(TargetRoot == nullptr))
 415           return;
 416
 417         O.Roots.Append(TargetRoot);
 418       } else {
 419         TargetRoot = *R;
 420       }
 421
 422       DFSStack.Append(NodeAndTarget{Root, TargetRoot});
 423       while (!DFSStack.empty()) {
 424         NodeAndTarget NT = DFSStack.back();
 425         DCHECK_NE(NT.OrigNode, nullptr);
 426         DCHECK_NE(NT.TargetNode, nullptr);
 427         DFSStack.trim(1);
 428         // TODO: Update the histogram as well when we have it ready.
 429         NT.TargetNode->CallCount += NT.OrigNode->CallCount;
 430         NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
 431         for (const auto Callee : NT.OrigNode->Callees) {
 432           auto TargetCallee = NT.TargetNode->Callees.find_element(
 433               [&](const FunctionCallTrie::NodeIdPair &C) {
 434                 return C.FId == Callee.FId;
 435               });
 436           if (TargetCallee == nullptr) {
 437             auto NewTargetNode = O.Nodes.AppendEmplace(
 438                 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
 439
 440             if (UNLIKELY(NewTargetNode == nullptr))
 441               return;
 442
 443             TargetCallee =
 444                 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
 445           }
 446           DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
 447         }
 448       }
 449     }
 450   }
 451 };
 452
 453 } // namespace __xray
 454
 455 #endif // XRAY_FUNCTION_CALL_TRIE_H