1 //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a part of XRay, a dynamic runtime instrumentation system.
12 // This file defines the interface for a function call trie.
14 //===----------------------------------------------------------------------===//
15 #ifndef XRAY_FUNCTION_CALL_TRIE_H
16 #define XRAY_FUNCTION_CALL_TRIE_H
18 #include "sanitizer_common/sanitizer_allocator_internal.h"
19 #include "xray_profiling_flags.h"
20 #include "xray_segmented_array.h"
21 #include <memory> // For placement new.
26 /// A FunctionCallTrie represents the stack traces of XRay instrumented
27 /// functions that we've encountered, where a node corresponds to a function and
28 /// the path from the root to the node its stack trace. Each node in the trie
29 /// will contain some useful values, including:
31 /// * The cumulative amount of time spent in this particular node/stack.
32 /// * The number of times this stack has appeared.
33 /// * A histogram of latencies for that particular node.
35 /// Each node in the trie will also contain a list of callees, represented using
36 /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
37 /// ID of the callee, and a pointer to the node.
39 /// If we visualise this data structure, we'll find the following potential
42 /// [function id node] -> [callees] [cumulative time]
43 /// [call counter] [latency histogram]
45 /// As an example, when we have a function in this pseudocode:
50 /// for i := 1..N { j() }
53 /// We may end up with a trie of the following form:
55 /// f -> [ g, h, j ] [...] [1] [...]
56 /// g -> [ ... ] [...] [1] [...]
57 /// h -> [ ... ] [...] [1] [...]
58 /// j -> [ ... ] [...] [N] [...]
60 /// If for instance the function g() called j() like so:
63 /// for i := 1..10 { j() }
66 /// We'll find the following updated trie:
68 /// f -> [ g, h, j ] [...] [1] [...]
69 /// g -> [ j' ] [...] [1] [...]
70 /// h -> [ ... ] [...] [1] [...]
71 /// j -> [ ... ] [...] [N] [...]
72 /// j' -> [ ... ] [...] [10] [...]
74 /// Note that we'll have a new node representing the path `f -> g -> j'` with
75 /// isolated data. This isolation gives us a means of representing the stack
76 /// traces as a path, as opposed to a key in a table. The alternative
77 /// implementation here would be to use a separate table for the path, and use
78 /// hashes of the path as an identifier to accumulate the information. We've
79 /// moved away from this approach as it takes a lot of time to compute the hash
80 /// every time we need to update a function's call information as we're handling
81 /// the entry and exit events.
83 /// This approach allows us to maintain a shadow stack, which represents the
84 /// currently executing path, and on function exits quickly compute the amount
85 /// of time elapsed from the entry, then update the counters for the node
86 /// already represented in the trie. This necessitates an efficient
87 /// representation of the various data structures (the list of callees must be
88 /// cache-aware and efficient to look up, and the histogram must be compact and
89 /// quick to update) to enable us to keep the overheads of this implementation
91 class FunctionCallTrie {
95 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
96 // standard library types in this header.
101 // Constructor for inplace-construction.
102 NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
105 using NodeIdPairArray = Array<NodeIdPair>;
106 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
108 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
109 // number of times this node actually appeared, the cumulative amount of time
110 // for this particular node including its children call times, and just the
111 // local time spent on this node. Each Node will have the ID of the XRay
112 // instrumented function that it is associated to.
115 NodeIdPairArray Callees;
117 int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
120 // We add a constructor here to allow us to inplace-construct through
121 // Array<...>'s AppendEmplace.
122 Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
124 : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
127 // TODO: Include the compact histogram.
131 struct ShadowStackEntry {
135 // We add a constructor here to allow us to inplace-construct through
136 // Array<...>'s AppendEmplace.
137 ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {}
140 using NodeArray = Array<Node>;
141 using RootArray = Array<Node *>;
142 using ShadowStackArray = Array<ShadowStackEntry>;
145 // We collate the allocators we need into a single struct, as a convenience to
146 // allow us to initialize these as a group.
148 using NodeAllocatorType = NodeArray::AllocatorType;
149 using RootAllocatorType = RootArray::AllocatorType;
150 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
152 NodeAllocatorType *NodeAllocator = nullptr;
153 RootAllocatorType *RootAllocator = nullptr;
154 ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
155 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
158 Allocators(const Allocators &) = delete;
159 Allocators &operator=(const Allocators &) = delete;
161 Allocators(Allocators &&O)
162 : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
163 ShadowStackAllocator(O.ShadowStackAllocator),
164 NodeIdPairAllocator(O.NodeIdPairAllocator) {
165 O.NodeAllocator = nullptr;
166 O.RootAllocator = nullptr;
167 O.ShadowStackAllocator = nullptr;
168 O.NodeIdPairAllocator = nullptr;
171 Allocators &operator=(Allocators &&O) {
173 auto Tmp = O.NodeAllocator;
174 O.NodeAllocator = this->NodeAllocator;
175 this->NodeAllocator = Tmp;
178 auto Tmp = O.RootAllocator;
179 O.RootAllocator = this->RootAllocator;
180 this->RootAllocator = Tmp;
183 auto Tmp = O.ShadowStackAllocator;
184 O.ShadowStackAllocator = this->ShadowStackAllocator;
185 this->ShadowStackAllocator = Tmp;
188 auto Tmp = O.NodeIdPairAllocator;
189 O.NodeIdPairAllocator = this->NodeIdPairAllocator;
190 this->NodeIdPairAllocator = Tmp;
196 // Note that we cannot use delete on these pointers, as they need to be
197 // returned to the sanitizer_common library's internal memory tracking
199 if (NodeAllocator != nullptr) {
200 NodeAllocator->~NodeAllocatorType();
201 InternalFree(NodeAllocator);
202 NodeAllocator = nullptr;
204 if (RootAllocator != nullptr) {
205 RootAllocator->~RootAllocatorType();
206 InternalFree(RootAllocator);
207 RootAllocator = nullptr;
209 if (ShadowStackAllocator != nullptr) {
210 ShadowStackAllocator->~ShadowStackAllocatorType();
211 InternalFree(ShadowStackAllocator);
212 ShadowStackAllocator = nullptr;
214 if (NodeIdPairAllocator != nullptr) {
215 NodeIdPairAllocator->~NodeIdPairAllocatorType();
216 InternalFree(NodeIdPairAllocator);
217 NodeIdPairAllocator = nullptr;
222 // TODO: Support configuration of options through the arguments.
223 static Allocators InitAllocators() {
224 return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
227 static Allocators InitAllocatorsCustom(uptr Max) {
229 auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
230 InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
231 new (NodeAllocator) Allocators::NodeAllocatorType(Max);
232 A.NodeAllocator = NodeAllocator;
234 auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
235 InternalAlloc(sizeof(Allocators::RootAllocatorType)));
236 new (RootAllocator) Allocators::RootAllocatorType(Max);
237 A.RootAllocator = RootAllocator;
239 auto ShadowStackAllocator =
240 reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
241 InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
242 new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
243 A.ShadowStackAllocator = ShadowStackAllocator;
245 auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
246 InternalAlloc(sizeof(NodeIdPairAllocatorType)));
247 new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
248 A.NodeIdPairAllocator = NodeIdPairAllocator;
255 ShadowStackArray ShadowStack;
256 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
259 explicit FunctionCallTrie(const Allocators &A)
260 : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
261 ShadowStack(*A.ShadowStackAllocator),
262 NodeIdPairAllocator(A.NodeIdPairAllocator) {}
264 void enterFunction(const int32_t FId, uint64_t TSC) {
266 // This function primarily deals with ensuring that the ShadowStack is
267 // consistent and ready for when an exit event is encountered.
268 if (UNLIKELY(ShadowStack.empty())) {
270 Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId);
271 if (UNLIKELY(NewRoot == nullptr))
273 Roots.Append(NewRoot);
274 ShadowStack.AppendEmplace(TSC, NewRoot);
278 auto &Top = ShadowStack.back();
279 auto TopNode = Top.NodePtr;
280 DCHECK_NE(TopNode, nullptr);
282 // If we've seen this callee before, then we just access that node and place
283 // that on the top of the stack.
284 auto Callee = TopNode->Callees.find_element(
285 [FId](const NodeIdPair &NR) { return NR.FId == FId; });
286 if (Callee != nullptr) {
287 CHECK_NE(Callee->NodePtr, nullptr);
288 ShadowStack.AppendEmplace(TSC, Callee->NodePtr);
292 // This means we've never seen this stack before, create a new node here.
294 Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
295 if (UNLIKELY(NewNode == nullptr))
297 DCHECK_NE(NewNode, nullptr);
298 TopNode->Callees.AppendEmplace(NewNode, FId);
299 ShadowStack.AppendEmplace(TSC, NewNode);
300 DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
304 void exitFunction(int32_t FId, uint64_t TSC) {
305 // When we exit a function, we look up the ShadowStack to see whether we've
306 // entered this function before. We do as little processing here as we can,
307 // since most of the hard work would have already been done at function
309 uint64_t CumulativeTreeTime = 0;
310 while (!ShadowStack.empty()) {
311 const auto &Top = ShadowStack.back();
312 auto TopNode = Top.NodePtr;
313 DCHECK_NE(TopNode, nullptr);
314 auto LocalTime = TSC - Top.EntryTSC;
315 TopNode->CallCount++;
316 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
317 CumulativeTreeTime += LocalTime;
320 // TODO: Update the histogram for the node.
321 if (TopNode->FId == FId)
326 const RootArray &getRoots() const { return Roots; }
328 // The deepCopyInto operation will update the provided FunctionCallTrie by
329 // re-creating the contents of this particular FunctionCallTrie in the other
330 // FunctionCallTrie. It will do this using a Depth First Traversal from the
331 // roots, and while doing so recreating the traversal in the provided
334 // This operation will *not* destroy the state in `O`, and thus may cause some
335 // duplicate entries in `O` if it is not empty.
337 // This function is *not* thread-safe, and may require external
338 // synchronisation of both "this" and |O|.
340 // This function must *not* be called with a non-empty FunctionCallTrie |O|.
341 void deepCopyInto(FunctionCallTrie &O) const {
342 DCHECK(O.getRoots().empty());
344 // We then push the root into a stack, to use as the parent marker for new
345 // nodes we push in as we're traversing depth-first down the call tree.
346 struct NodeAndParent {
347 FunctionCallTrie::Node *Node;
348 FunctionCallTrie::Node *NewNode;
350 using Stack = Array<NodeAndParent>;
352 typename Stack::AllocatorType StackAllocator(
353 profilingFlags()->stack_allocator_max);
354 Stack DFSStack(StackAllocator);
356 for (const auto Root : getRoots()) {
357 // Add a node in O for this root.
358 auto NewRoot = O.Nodes.AppendEmplace(
359 nullptr, *O.NodeIdPairAllocator, Root->CallCount,
360 Root->CumulativeLocalTime, Root->FId);
362 // Because we cannot allocate more memory we should bail out right away.
363 if (UNLIKELY(NewRoot == nullptr))
366 O.Roots.Append(NewRoot);
368 // TODO: Figure out what to do if we fail to allocate any more stack
369 // space. Maybe warn or report once?
370 DFSStack.AppendEmplace(Root, NewRoot);
371 while (!DFSStack.empty()) {
372 NodeAndParent NP = DFSStack.back();
373 DCHECK_NE(NP.Node, nullptr);
374 DCHECK_NE(NP.NewNode, nullptr);
376 for (const auto Callee : NP.Node->Callees) {
377 auto NewNode = O.Nodes.AppendEmplace(
378 NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
379 Callee.NodePtr->CumulativeLocalTime, Callee.FId);
380 if (UNLIKELY(NewNode == nullptr))
382 NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
383 DFSStack.AppendEmplace(Callee.NodePtr, NewNode);
389 // The mergeInto operation will update the provided FunctionCallTrie by
390 // traversing the current trie's roots and updating (i.e. merging) the data in
391 // the nodes with the data in the target's nodes. If the node doesn't exist in
392 // the provided trie, we add a new one in the right position, and inherit the
393 // data from the original (current) trie, along with all its callees.
395 // This function is *not* thread-safe, and may require external
396 // synchronisation of both "this" and |O|.
397 void mergeInto(FunctionCallTrie &O) const {
398 struct NodeAndTarget {
399 FunctionCallTrie::Node *OrigNode;
400 FunctionCallTrie::Node *TargetNode;
402 using Stack = Array<NodeAndTarget>;
403 typename Stack::AllocatorType StackAllocator(
404 profilingFlags()->stack_allocator_max);
405 Stack DFSStack(StackAllocator);
407 for (const auto Root : getRoots()) {
408 Node *TargetRoot = nullptr;
409 auto R = O.Roots.find_element(
410 [&](const Node *Node) { return Node->FId == Root->FId; });
412 TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
414 if (UNLIKELY(TargetRoot == nullptr))
417 O.Roots.Append(TargetRoot);
422 DFSStack.Append(NodeAndTarget{Root, TargetRoot});
423 while (!DFSStack.empty()) {
424 NodeAndTarget NT = DFSStack.back();
425 DCHECK_NE(NT.OrigNode, nullptr);
426 DCHECK_NE(NT.TargetNode, nullptr);
428 // TODO: Update the histogram as well when we have it ready.
429 NT.TargetNode->CallCount += NT.OrigNode->CallCount;
430 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
431 for (const auto Callee : NT.OrigNode->Callees) {
432 auto TargetCallee = NT.TargetNode->Callees.find_element(
433 [&](const FunctionCallTrie::NodeIdPair &C) {
434 return C.FId == Callee.FId;
436 if (TargetCallee == nullptr) {
437 auto NewTargetNode = O.Nodes.AppendEmplace(
438 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
440 if (UNLIKELY(NewTargetNode == nullptr))
444 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
446 DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
453 } // namespace __xray
455 #endif // XRAY_FUNCTION_CALL_TRIE_H