1 //===-- xray_profiling.cc ---------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a part of XRay, a dynamic runtime instrumentation system.
12 // This is the implementation of a profiling handler.
14 //===----------------------------------------------------------------------===//
18 #include "sanitizer_common/sanitizer_atomic.h"
19 #include "sanitizer_common/sanitizer_flags.h"
20 #include "xray/xray_interface.h"
21 #include "xray/xray_log_interface.h"
22 #include "xray_buffer_queue.h"
23 #include "xray_flags.h"
24 #include "xray_profile_collector.h"
25 #include "xray_profiling_flags.h"
26 #include "xray_recursion_guard.h"
28 #include "xray_utils.h"
35 static atomic_sint32_t ProfilerLogFlushStatus = {
36 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
38 static atomic_sint32_t ProfilerLogStatus = {
39 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
41 static SpinMutex ProfilerOptionsMutex;
43 struct ProfilingData {
44 atomic_uintptr_t Allocators;
48 static pthread_key_t ProfilingKey;
50 // We use a global buffer queue, which gets initialized once at initialisation
51 // time, and gets reset when profiling is "done".
52 static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
54 static BufferQueue *BQ = nullptr;
56 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
57 thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
58 alignof(FunctionCallTrie::Allocators)>::type
60 thread_local std::aligned_storage<sizeof(FunctionCallTrie),
61 alignof(FunctionCallTrie)>::type
62 FunctionCallTrieStorage;
63 thread_local ProfilingData TLD{{0}, {0}};
64 thread_local atomic_uint8_t ReentranceGuard{0};
66 // We use a separate guard for ensuring that for this thread, if we're already
67 // cleaning up, that any signal handlers don't attempt to cleanup nor
69 thread_local atomic_uint8_t TLDInitGuard{0};
71 // We also use a separate latch to signal that the thread is exiting, and
72 // non-essential work should be ignored (things like recording events, etc.).
73 thread_local atomic_uint8_t ThreadExitingLatch{0};
75 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
76 thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
77 pthread_setspecific(ProfilingKey, &TLD);
82 RecursionGuard TLDInit(TLDInitGuard);
86 if (atomic_load_relaxed(&ThreadExitingLatch))
90 if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
91 memory_order_acq_rel)) {
93 auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
95 atomic_store(&TLD.Allocators, 0, memory_order_release);
98 // Acquire a set of buffers for this thread.
102 if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
104 auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
106 BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
109 if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
111 auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
113 BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
116 if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
117 BufferQueue::ErrorCode::Ok)
119 auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
121 BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
124 if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
125 BufferQueue::ErrorCode::Ok)
129 new (&AllocatorsStorage) FunctionCallTrie::Allocators(
130 FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
131 Allocators = reinterpret_cast<uptr>(
132 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
133 atomic_store(&TLD.Allocators, Allocators, memory_order_release);
140 if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
141 new (&FunctionCallTrieStorage)
142 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
143 atomic_load_relaxed(&TLD.Allocators)));
144 FCT = reinterpret_cast<uptr>(
145 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
146 atomic_store(&TLD.FCT, FCT, memory_order_release);
155 static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
156 auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
157 if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
158 &FunctionCallTrieStorage)))
159 reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
161 auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
163 reinterpret_cast<uptr>(
164 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
165 reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
168 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
169 RecursionGuard TLDInit(TLDInitGuard);
173 uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
174 if (P != reinterpret_cast<uptr>(
175 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
178 auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
179 DCHECK_NE(FCT, nullptr);
181 uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
183 reinterpret_cast<uptr>(
184 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
187 auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
188 DCHECK_NE(Allocators, nullptr);
190 // Always move the data into the profile collector.
191 profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
192 std::move(ThreadBuffers), GetTid());
194 // Re-initialize the ThreadBuffers object to a known "default" state.
195 ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
200 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
201 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS
202 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
208 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
209 if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
210 XRayLogInitStatus::XRAY_LOG_FINALIZED) {
212 Report("Not flushing profiles, profiling not been finalized.\n");
213 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
216 RecursionGuard SignalGuard(ReentranceGuard);
219 Report("Cannot finalize properly inside a signal handler!\n");
220 atomic_store(&ProfilerLogFlushStatus,
221 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
222 memory_order_release);
223 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
226 s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
227 XRayLogFlushStatus::XRAY_LOG_FLUSHING,
228 memory_order_acq_rel);
229 if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
231 Report("Not flushing profiles, implementation still flushing.\n");
232 return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
235 // At this point, we'll create the file that will contain the profile, but
236 // only if the options say so.
237 if (!profilingFlags()->no_flush) {
238 // First check whether we have data in the profile collector service
239 // before we try and write anything down.
240 XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
241 if (B.Data == nullptr) {
243 Report("profiling: No data to flush.\n");
245 LogWriter *LW = LogWriter::Open();
248 Report("profiling: Failed to flush to file, dropping data.\n");
250 // Now for each of the buffers, write out the profile data as we would
251 // see it in memory, verbatim.
252 while (B.Data != nullptr && B.Size != 0) {
253 LW->WriteAll(reinterpret_cast<const char *>(B.Data),
254 reinterpret_cast<const char *>(B.Data) + B.Size);
255 B = profileCollectorService::nextBuffer(B);
258 LogWriter::Close(LW);
262 profileCollectorService::reset();
264 atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
265 memory_order_release);
266 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
267 memory_order_release);
269 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
272 void profilingHandleArg0(int32_t FuncId,
273 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
275 auto TSC = readTSC(CPU);
276 RecursionGuard G(ReentranceGuard);
280 auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
281 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
282 Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
285 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
286 Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
287 postCurrentThreadFCT(TLD);
291 auto T = getThreadLocalData();
295 auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
297 case XRayEntryType::ENTRY:
298 case XRayEntryType::LOG_ARGS_ENTRY:
299 FCT->enterFunction(FuncId, TSC, CPU);
301 case XRayEntryType::EXIT:
302 case XRayEntryType::TAIL:
303 FCT->exitFunction(FuncId, TSC, CPU);
306 // FIXME: Handle bugs.
311 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
312 uint64_t) XRAY_NEVER_INSTRUMENT {
313 return profilingHandleArg0(FuncId, Entry);
316 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
317 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
318 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
319 XRayLogInitStatus::XRAY_LOG_FINALIZING,
320 memory_order_release)) {
322 Report("Cannot finalize profile, the profiling is not initialized.\n");
323 return static_cast<XRayLogInitStatus>(CurrentStatus);
326 // Mark then finalize the current generation of buffers. This allows us to let
327 // the threads currently holding onto new buffers still use them, but let the
328 // last reference do the memory cleanup.
329 DCHECK_NE(BQ, nullptr);
332 // Wait a grace period to allow threads to see that we're finalizing.
333 SleepForMillis(profilingFlags()->grace_period_ms);
335 // If we for some reason are entering this function from an instrumented
336 // handler, we bail out.
337 RecursionGuard G(ReentranceGuard);
339 return static_cast<XRayLogInitStatus>(CurrentStatus);
341 // Post the current thread's data if we have any.
342 postCurrentThreadFCT(TLD);
344 // Then we force serialize the log data.
345 profileCollectorService::serialize();
347 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
348 memory_order_release);
349 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
353 profilingLoggingInit(size_t, size_t, void *Options,
354 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
355 RecursionGuard G(ReentranceGuard);
357 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
359 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
360 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
361 XRayLogInitStatus::XRAY_LOG_INITIALIZING,
362 memory_order_acq_rel)) {
364 Report("Cannot initialize already initialised profiling "
365 "implementation.\n");
366 return static_cast<XRayLogInitStatus>(CurrentStatus);
370 SpinMutexLock Lock(&ProfilerOptionsMutex);
371 FlagParser ConfigParser;
374 registerProfilerFlags(&ConfigParser, &Flags);
375 ConfigParser.ParseString(profilingCompilerDefinedFlags());
376 const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
379 ConfigParser.ParseString(Env);
381 // Then parse the configuration string provided.
382 ConfigParser.ParseString(static_cast<const char *>(Options));
384 ReportUnrecognizedFlags();
385 *profilingFlags() = Flags;
388 // We need to reset the profile data collection implementation now.
389 profileCollectorService::reset();
391 // Then also reset the buffer queue implementation.
393 bool Success = false;
394 new (&BufferQueueStorage)
395 BufferQueue(profilingFlags()->per_thread_allocator_max,
396 profilingFlags()->buffers_max, Success);
399 Report("Failed to initialize preallocated memory buffers!");
400 atomic_store(&ProfilerLogStatus,
401 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
402 memory_order_release);
403 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
406 // If we've succeded, set the global pointer to the initialised storage.
407 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
410 auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
411 profilingFlags()->buffers_max);
413 if (InitStatus != BufferQueue::ErrorCode::Ok) {
415 Report("Failed to initialize preallocated memory buffers; error: %s",
416 BufferQueue::getErrorString(InitStatus));
417 atomic_store(&ProfilerLogStatus,
418 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
419 memory_order_release);
420 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
423 DCHECK(!BQ->finalizing());
426 // We need to set up the exit handlers.
427 static pthread_once_t Once = PTHREAD_ONCE_INIT;
431 &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
432 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
438 auto T = reinterpret_cast<ProfilingData *>(P);
439 if (atomic_load_relaxed(&T->Allocators) == 0)
443 // If we're somehow executing this while inside a
444 // non-reentrant-friendly context, we skip attempting to post
445 // the current thread's data.
446 RecursionGuard G(ReentranceGuard);
450 postCurrentThreadFCT(*T);
454 // We also need to set up an exit handler, so that we can get the
455 // profile information at exit time. We use the C API to do this, to not
456 // rely on C++ ABI functions for registering exit handlers.
457 Atexit(+[]() XRAY_NEVER_INSTRUMENT {
458 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
462 at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
464 // Finalize and flush.
465 if (profilingFinalize() != XRAY_LOG_FINALIZED ||
466 profilingFlush() != XRAY_LOG_FLUSHED)
470 Report("XRay Profile flushed at exit.");
474 __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
475 __xray_set_handler(profilingHandleArg0);
476 __xray_set_handler_arg1(profilingHandleArg1);
478 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
479 memory_order_release);
481 Report("XRay Profiling init successful.\n");
483 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
486 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
487 // Set up the flag defaults from the static defaults and the
488 // compiler-provided defaults.
490 SpinMutexLock Lock(&ProfilerOptionsMutex);
491 auto *F = profilingFlags();
493 FlagParser ProfilingParser;
494 registerProfilerFlags(&ProfilingParser, F);
495 ProfilingParser.ParseString(profilingCompilerDefinedFlags());
499 profilingLoggingInit,
504 auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
505 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
507 Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
513 if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
514 __xray_log_select_mode("xray_profiling");
518 } // namespace __xray
520 static auto UNUSED Unused = __xray::profilingDynamicInitializer();