1 //===-- sanitizer_symbolizer_posix_libcdep.cc -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is shared between AddressSanitizer and ThreadSanitizer
11 // run-time libraries.
12 // POSIX-specific implementation of symbolizer parts.
13 //===----------------------------------------------------------------------===//
15 #include "sanitizer_platform.h"
17 #include "sanitizer_allocator_internal.h"
18 #include "sanitizer_common.h"
19 #include "sanitizer_flags.h"
20 #include "sanitizer_internal_defs.h"
21 #include "sanitizer_linux.h"
22 #include "sanitizer_placement_new.h"
23 #include "sanitizer_procmaps.h"
24 #include "sanitizer_symbolizer.h"
25 #include "sanitizer_symbolizer_libbacktrace.h"
32 // C++ demangling function, as required by Itanium C++ ABI. This is weak,
33 // because we do not require a C++ ABI library to be linked to a program
34 // using sanitizers; if it's not present, we'll just use the mangled name.
35 namespace __cxxabiv1 {
36 extern "C" SANITIZER_WEAK_ATTRIBUTE
37 char *__cxa_demangle(const char *mangled, char *buffer,
38 size_t *length, int *status);
41 namespace __sanitizer {
43 // Attempts to demangle the name via __cxa_demangle from __cxxabiv1.
44 static const char *DemangleCXXABI(const char *name) {
45 // FIXME: __cxa_demangle aggressively insists on allocating memory.
46 // There's not much we can do about that, short of providing our
47 // own demangler (libc++abi's implementation could be adapted so that
48 // it does not allocate). For now, we just call it anyway, and we leak
49 // the returned value.
50 if (__cxxabiv1::__cxa_demangle)
51 if (const char *demangled_name =
52 __cxxabiv1::__cxa_demangle(name, 0, 0, 0))
53 return demangled_name;
58 // Extracts the prefix of "str" that consists of any characters not
59 // present in "delims" string, and copies this prefix to "result", allocating
61 // Returns a pointer to "str" after skipping extracted prefix and first
63 static const char *ExtractToken(const char *str, const char *delims,
65 uptr prefix_len = internal_strcspn(str, delims);
66 *result = (char*)InternalAlloc(prefix_len + 1);
67 internal_memcpy(*result, str, prefix_len);
68 (*result)[prefix_len] = '\0';
69 const char *prefix_end = str + prefix_len;
70 if (*prefix_end != '\0') prefix_end++;
74 // Same as ExtractToken, but converts extracted token to integer.
75 static const char *ExtractInt(const char *str, const char *delims,
78 const char *ret = ExtractToken(str, delims, &buff);
80 *result = (int)internal_atoll(buff);
86 static const char *ExtractUptr(const char *str, const char *delims,
89 const char *ret = ExtractToken(str, delims, &buff);
91 *result = (uptr)internal_atoll(buff);
97 class ExternalSymbolizerInterface {
99 // Can't declare pure virtual functions in sanitizer runtimes:
100 // __cxa_pure_virtual might be unavailable.
101 virtual char *SendCommand(bool is_data, const char *module_name,
102 uptr module_offset) {
107 // SymbolizerProcess encapsulates communication between the tool and
108 // external symbolizer program, running in a different subprocess.
109 // SymbolizerProcess may not be used from two threads simultaneously.
110 class SymbolizerProcess : public ExternalSymbolizerInterface {
112 explicit SymbolizerProcess(const char *path)
114 input_fd_(kInvalidFd),
115 output_fd_(kInvalidFd),
117 failed_to_start_(false),
118 reported_invalid_path_(false) {
120 CHECK_NE(path_[0], '\0');
123 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
124 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
125 // Start or restart symbolizer if we failed to send command to it.
126 if (char *res = SendCommandImpl(is_data, module_name, module_offset))
130 if (!failed_to_start_) {
131 Report("WARNING: Failed to use and restart external symbolizer!\n");
132 failed_to_start_ = true;
139 if (input_fd_ != kInvalidFd)
140 internal_close(input_fd_);
141 if (output_fd_ != kInvalidFd)
142 internal_close(output_fd_);
143 return StartSymbolizerSubprocess();
146 char *SendCommandImpl(bool is_data, const char *module_name,
147 uptr module_offset) {
148 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
151 if (!RenderInputCommand(buffer_, kBufferSize, is_data, module_name,
154 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
156 if (!readFromSymbolizer(buffer_, kBufferSize))
161 bool readFromSymbolizer(char *buffer, uptr max_length) {
166 uptr just_read = internal_read(input_fd_, buffer + read_len,
167 max_length - read_len - 1);
168 // We can't read 0 bytes, as we don't expect external symbolizer to close
170 if (just_read == 0 || just_read == (uptr)-1) {
171 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
174 read_len += just_read;
175 if (ReachedEndOfOutput(buffer, read_len))
178 buffer[read_len] = '\0';
182 bool writeToSymbolizer(const char *buffer, uptr length) {
185 uptr write_len = internal_write(output_fd_, buffer, length);
186 if (write_len == 0 || write_len == (uptr)-1) {
187 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
193 bool StartSymbolizerSubprocess() {
194 if (!FileExists(path_)) {
195 if (!reported_invalid_path_) {
196 Report("WARNING: invalid path to external symbolizer!\n");
197 reported_invalid_path_ = true;
204 // The client program may close its stdin and/or stdout and/or stderr
205 // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
206 // In this case the communication between the forked processes may be
207 // broken if either the parent or the child tries to close or duplicate
208 // these descriptors. The loop below produces two pairs of file
209 // descriptors, each greater than 2 (stderr).
211 for (int i = 0; i < 5; i++) {
212 if (pipe(sock_pair[i]) == -1) {
213 for (int j = 0; j < i; j++) {
214 internal_close(sock_pair[j][0]);
215 internal_close(sock_pair[j][1]);
217 Report("WARNING: Can't create a socket pair to start "
218 "external symbolizer (errno: %d)\n", errno);
220 } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) {
224 outfd = sock_pair[i];
225 for (int j = 0; j < i; j++) {
226 if (sock_pair[j] == infd) continue;
227 internal_close(sock_pair[j][0]);
228 internal_close(sock_pair[j][1]);
237 // Real fork() may call user callbacks registered with pthread_atfork().
238 int pid = internal_fork();
241 internal_close(infd[0]);
242 internal_close(infd[1]);
243 internal_close(outfd[0]);
244 internal_close(outfd[1]);
245 Report("WARNING: failed to fork external symbolizer "
246 " (errno: %d)\n", errno);
248 } else if (pid == 0) {
250 internal_close(STDOUT_FILENO);
251 internal_close(STDIN_FILENO);
252 internal_dup2(outfd[0], STDIN_FILENO);
253 internal_dup2(infd[1], STDOUT_FILENO);
254 internal_close(outfd[0]);
255 internal_close(outfd[1]);
256 internal_close(infd[0]);
257 internal_close(infd[1]);
258 for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--)
260 ExecuteWithDefaultArgs(path_);
264 // Continue execution in parent process.
265 internal_close(outfd[0]);
266 internal_close(infd[1]);
268 output_fd_ = outfd[1];
270 // Check that symbolizer subprocess started successfully.
272 SleepForMillis(kSymbolizerStartupTimeMillis);
273 int exited_pid = waitpid(pid, &pid_status, WNOHANG);
274 if (exited_pid != 0) {
275 // Either waitpid failed, or child has already exited.
276 Report("WARNING: external symbolizer didn't start up correctly!\n");
283 virtual bool RenderInputCommand(char *buffer, uptr max_length, bool is_data,
284 const char *module_name,
285 uptr module_offset) const {
289 virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
293 virtual void ExecuteWithDefaultArgs(const char *path_to_binary) const {
301 static const uptr kBufferSize = 16 * 1024;
302 char buffer_[kBufferSize];
304 static const uptr kMaxTimesRestarted = 5;
305 static const int kSymbolizerStartupTimeMillis = 10;
306 uptr times_restarted_;
307 bool failed_to_start_;
308 bool reported_invalid_path_;
311 // For now we assume the following protocol:
312 // For each request of the form
313 // <module_name> <module_offset>
314 // passed to STDIN, external symbolizer prints to STDOUT response:
316 // <file_name>:<line_number>:<column_number>
318 // <file_name>:<line_number>:<column_number>
321 class LLVMSymbolizerProcess : public SymbolizerProcess {
323 explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
326 bool RenderInputCommand(char *buffer, uptr max_length, bool is_data,
327 const char *module_name, uptr module_offset) const {
328 internal_snprintf(buffer, max_length, "%s\"%s\" 0x%zx\n",
329 is_data ? "DATA " : "", module_name, module_offset);
333 bool ReachedEndOfOutput(const char *buffer, uptr length) const {
334 // Empty line marks the end of llvm-symbolizer output.
335 return length >= 2 && buffer[length - 1] == '\n' &&
336 buffer[length - 2] == '\n';
339 void ExecuteWithDefaultArgs(const char *path_to_binary) const {
340 #if defined(__x86_64__)
341 const char* const kSymbolizerArch = "--default-arch=x86_64";
342 #elif defined(__i386__)
343 const char* const kSymbolizerArch = "--default-arch=i386";
344 #elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
345 const char* const kSymbolizerArch = "--default-arch=powerpc64";
346 #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
347 const char* const kSymbolizerArch = "--default-arch=powerpc64le";
349 const char* const kSymbolizerArch = "--default-arch=unknown";
352 const char *const inline_flag = common_flags()->symbolize_inline_frames
354 : "--inlining=false";
355 execl(path_to_binary, path_to_binary, inline_flag, kSymbolizerArch,
360 class Addr2LineProcess : public SymbolizerProcess {
362 Addr2LineProcess(const char *path, const char *module_name)
363 : SymbolizerProcess(path), module_name_(internal_strdup(module_name)) {}
365 const char *module_name() const { return module_name_; }
368 bool RenderInputCommand(char *buffer, uptr max_length, bool is_data,
369 const char *module_name, uptr module_offset) const {
372 CHECK_EQ(0, internal_strcmp(module_name, module_name_));
373 internal_snprintf(buffer, max_length, "0x%zx\n", module_offset);
377 bool ReachedEndOfOutput(const char *buffer, uptr length) const {
378 // Output should consist of two lines.
380 for (uptr i = 0; i < length; ++i) {
381 if (buffer[i] == '\n')
389 void ExecuteWithDefaultArgs(const char *path_to_binary) const {
390 execl(path_to_binary, path_to_binary, "-Cfe", module_name_, (char *)0);
393 const char *module_name_; // Owned, leaked.
396 class Addr2LinePool : public ExternalSymbolizerInterface {
398 explicit Addr2LinePool(const char *addr2line_path,
399 LowLevelAllocator *allocator)
400 : addr2line_path_(addr2line_path), allocator_(allocator),
401 addr2line_pool_(16) {}
403 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
406 Addr2LineProcess *addr2line = 0;
407 for (uptr i = 0; i < addr2line_pool_.size(); ++i) {
409 internal_strcmp(module_name, addr2line_pool_[i]->module_name())) {
410 addr2line = addr2line_pool_[i];
416 new(*allocator_) Addr2LineProcess(addr2line_path_, module_name);
417 addr2line_pool_.push_back(addr2line);
419 return addr2line->SendCommand(is_data, module_name, module_offset);
423 const char *addr2line_path_;
424 LowLevelAllocator *allocator_;
425 InternalMmapVector<Addr2LineProcess*> addr2line_pool_;
428 #if SANITIZER_SUPPORTS_WEAK_HOOKS
430 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
431 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
432 char *Buffer, int MaxLength);
433 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
434 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
435 char *Buffer, int MaxLength);
436 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
437 void __sanitizer_symbolize_flush();
438 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
439 int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
443 class InternalSymbolizer {
445 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
447 static InternalSymbolizer *get(LowLevelAllocator *alloc) {
448 if (__sanitizer_symbolize_code != 0 &&
449 __sanitizer_symbolize_data != 0) {
450 return new(*alloc) InternalSymbolizer();
455 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
456 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
457 : __sanitizer_symbolize_code;
458 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
464 if (__sanitizer_symbolize_flush)
465 __sanitizer_symbolize_flush();
468 const char *Demangle(const char *name) {
469 if (__sanitizer_symbolize_demangle) {
470 for (uptr res_length = 1024;
471 res_length <= InternalSizeClassMap::kMaxSize;) {
472 char *res_buff = static_cast<char*>(InternalAlloc(res_length));
474 __sanitizer_symbolize_demangle(name, res_buff, res_length);
475 if (req_length > res_length) {
476 res_length = req_length + 1;
477 InternalFree(res_buff);
487 InternalSymbolizer() { }
489 static const int kBufferSize = 16 * 1024;
490 static const int kMaxDemangledNameSize = 1024;
491 char buffer_[kBufferSize];
493 #else // SANITIZER_SUPPORTS_WEAK_HOOKS
495 class InternalSymbolizer {
497 static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; }
498 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
502 const char *Demangle(const char *name) { return name; }
505 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS
507 class POSIXSymbolizer : public Symbolizer {
509 POSIXSymbolizer(ExternalSymbolizerInterface *external_symbolizer,
510 InternalSymbolizer *internal_symbolizer,
511 LibbacktraceSymbolizer *libbacktrace_symbolizer)
513 external_symbolizer_(external_symbolizer),
514 internal_symbolizer_(internal_symbolizer),
515 libbacktrace_symbolizer_(libbacktrace_symbolizer) {}
517 SymbolizedStack *SymbolizePC(uptr addr) override {
518 BlockingMutexLock l(&mu_);
519 const char *module_name;
521 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
522 return SymbolizedStack::New(addr);
523 // First, try to use libbacktrace symbolizer (if it's available).
524 if (libbacktrace_symbolizer_ != 0) {
526 if (SymbolizedStack *res = libbacktrace_symbolizer_->SymbolizeCode(
527 addr, module_name, module_offset))
530 // Always fill data about module name and offset.
531 SymbolizedStack *res = SymbolizedStack::New(addr);
532 res->info.FillAddressAndModuleInfo(addr, module_name, module_offset);
534 const char *str = SendCommand(false, module_name, module_offset);
536 // Symbolizer was not initialized or failed.
540 bool top_frame = true;
541 SymbolizedStack *last = res;
543 char *function_name = 0;
544 str = ExtractToken(str, "\n", &function_name);
545 CHECK(function_name);
546 if (function_name[0] == '\0') {
547 // There are no more frames.
550 SymbolizedStack *cur;
555 cur = SymbolizedStack::New(addr);
556 cur->info.FillAddressAndModuleInfo(addr, module_name, module_offset);
561 AddressInfo *info = &cur->info;
562 info->function = function_name;
563 // Parse <file>:<line>:<column> buffer.
564 char *file_line_info = 0;
565 str = ExtractToken(str, "\n", &file_line_info);
566 CHECK(file_line_info);
567 const char *line_info = ExtractToken(file_line_info, ":", &info->file);
568 line_info = ExtractInt(line_info, ":", &info->line);
569 line_info = ExtractInt(line_info, "", &info->column);
570 InternalFree(file_line_info);
572 // Functions and filenames can be "??", in which case we write 0
573 // to address info to mark that names are unknown.
574 if (0 == internal_strcmp(info->function, "??")) {
575 InternalFree(info->function);
578 if (0 == internal_strcmp(info->file, "??")) {
579 InternalFree(info->file);
586 bool SymbolizeData(uptr addr, DataInfo *info) override {
587 BlockingMutexLock l(&mu_);
588 LoadedModule *module = FindModuleForAddress(addr);
591 const char *module_name = module->full_name();
592 uptr module_offset = addr - module->base_address();
594 info->module = internal_strdup(module_name);
595 info->module_offset = module_offset;
596 // First, try to use libbacktrace symbolizer (if it's available).
597 if (libbacktrace_symbolizer_ != 0) {
599 if (libbacktrace_symbolizer_->SymbolizeData(addr, info))
602 const char *str = SendCommand(true, module_name, module_offset);
605 str = ExtractToken(str, "\n", &info->name);
606 str = ExtractUptr(str, " ", &info->start);
607 str = ExtractUptr(str, "\n", &info->size);
608 info->start += module->base_address();
612 bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
613 uptr *module_address) override {
614 BlockingMutexLock l(&mu_);
615 return FindModuleNameAndOffsetForAddress(pc, module_name, module_address);
618 bool CanReturnFileLineInfo() override {
619 return internal_symbolizer_ != 0 || external_symbolizer_ != 0 ||
620 libbacktrace_symbolizer_ != 0;
623 void Flush() override {
624 BlockingMutexLock l(&mu_);
625 if (internal_symbolizer_ != 0) {
626 SymbolizerScope sym_scope(this);
627 internal_symbolizer_->Flush();
631 const char *Demangle(const char *name) override {
632 BlockingMutexLock l(&mu_);
633 // Run hooks even if we don't use internal symbolizer, as cxxabi
634 // demangle may call system functions.
635 SymbolizerScope sym_scope(this);
636 // Try to use libbacktrace demangler (if available).
637 if (libbacktrace_symbolizer_ != 0) {
638 if (const char *demangled = libbacktrace_symbolizer_->Demangle(name))
641 if (internal_symbolizer_ != 0)
642 return internal_symbolizer_->Demangle(name);
643 return DemangleCXXABI(name);
646 void PrepareForSandboxing() override {
647 #if SANITIZER_LINUX && !SANITIZER_ANDROID
648 BlockingMutexLock l(&mu_);
649 // Cache /proc/self/exe on Linux.
655 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
657 // First, try to use internal symbolizer.
658 if (internal_symbolizer_) {
659 SymbolizerScope sym_scope(this);
660 return internal_symbolizer_->SendCommand(is_data, module_name,
663 // Otherwise, fall back to external symbolizer.
664 if (external_symbolizer_) {
665 SymbolizerScope sym_scope(this);
666 return external_symbolizer_->SendCommand(is_data, module_name,
672 LoadedModule *FindModuleForAddress(uptr address) {
674 bool modules_were_reloaded = false;
675 if (modules_ == 0 || !modules_fresh_) {
676 modules_ = (LoadedModule*)(symbolizer_allocator_.Allocate(
677 kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
679 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
681 CHECK_GT(n_modules_, 0);
682 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
683 modules_fresh_ = true;
684 modules_were_reloaded = true;
686 for (uptr i = 0; i < n_modules_; i++) {
687 if (modules_[i].containsAddress(address)) {
691 // Reload the modules and look up again, if we haven't tried it yet.
692 if (!modules_were_reloaded) {
693 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
694 // It's too aggressive to reload the list of modules each time we fail
695 // to find a module for a given address.
696 modules_fresh_ = false;
697 return FindModuleForAddress(address);
702 bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
703 uptr *module_offset) {
705 LoadedModule *module = FindModuleForAddress(address);
708 *module_name = module->full_name();
709 *module_offset = address - module->base_address();
713 // 16K loaded modules should be enough for everyone.
714 static const uptr kMaxNumberOfModuleContexts = 1 << 14;
715 LoadedModule *modules_; // Array of module descriptions is leaked.
717 // If stale, need to reload the modules before looking up addresses.
721 ExternalSymbolizerInterface *external_symbolizer_; // Leaked.
722 InternalSymbolizer *const internal_symbolizer_; // Leaked.
723 LibbacktraceSymbolizer *libbacktrace_symbolizer_; // Leaked.
726 Symbolizer *Symbolizer::PlatformInit() {
727 if (!common_flags()->symbolize) {
728 return new(symbolizer_allocator_) POSIXSymbolizer(0, 0, 0);
730 InternalSymbolizer* internal_symbolizer =
731 InternalSymbolizer::get(&symbolizer_allocator_);
732 ExternalSymbolizerInterface *external_symbolizer = 0;
733 LibbacktraceSymbolizer *libbacktrace_symbolizer = 0;
735 if (!internal_symbolizer) {
736 libbacktrace_symbolizer =
737 LibbacktraceSymbolizer::get(&symbolizer_allocator_);
738 if (!libbacktrace_symbolizer) {
739 const char *path_to_external = common_flags()->external_symbolizer_path;
740 if (path_to_external && path_to_external[0] == '\0') {
741 // External symbolizer is explicitly disabled. Do nothing.
743 // Find path to llvm-symbolizer if it's not provided.
744 if (!path_to_external)
745 path_to_external = FindPathToBinary("llvm-symbolizer");
746 if (path_to_external) {
747 external_symbolizer = new(symbolizer_allocator_)
748 LLVMSymbolizerProcess(path_to_external);
749 } else if (common_flags()->allow_addr2line) {
750 // If llvm-symbolizer is not found, try to use addr2line.
751 if (const char *addr2line_path = FindPathToBinary("addr2line")) {
752 external_symbolizer = new(symbolizer_allocator_)
753 Addr2LinePool(addr2line_path, &symbolizer_allocator_);
760 return new(symbolizer_allocator_) POSIXSymbolizer(
761 external_symbolizer, internal_symbolizer, libbacktrace_symbolizer);
764 } // namespace __sanitizer
766 #endif // SANITIZER_POSIX