2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
23 binutils_prefix = None
25 binary_name_filter = None
26 fix_filename_patterns = None
29 # FIXME: merge the code that calls fix_filename().
30 def fix_filename(file_name):
31 if fix_filename_patterns:
32 for path_to_cut in fix_filename_patterns:
33 file_name = re.sub('.*' + path_to_cut, '', file_name)
34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
38 def sysroot_path_filter(binary_name):
39 return sysroot_path + binary_name
42 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
48 class Symbolizer(object):
52 def symbolize(self, addr, binary, offset):
53 """Symbolize the given address (pair of binary and offset).
55 Overriden in subclasses.
57 addr: virtual address of an instruction.
58 binary: path to executable/shared object containing this instruction.
59 offset: instruction offset in the @binary.
61 list of strings (one string for each inlined frame) describing
62 the code locations for this instruction (that is, function name, file
63 name, line and column numbers).
68 class LLVMSymbolizer(Symbolizer):
69 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
70 super(LLVMSymbolizer, self).__init__()
71 self.symbolizer_path = symbolizer_path
72 self.default_arch = default_arch
74 self.dsym_hints = dsym_hints
75 self.pipe = self.open_llvm_symbolizer()
77 def open_llvm_symbolizer(self):
78 cmd = [self.symbolizer_path,
79 '--use-symbol-table=true',
80 '--demangle=%s' % demangle,
83 '--default-arch=%s' % self.default_arch]
84 if self.system == 'Darwin':
85 for hint in self.dsym_hints:
86 cmd.append('--dsym-hint=%s' % hint)
90 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
91 stdout=subprocess.PIPE)
96 def symbolize(self, addr, binary, offset):
97 """Overrides Symbolizer.symbolize."""
102 symbolizer_input = '"%s" %s' % (binary, offset)
104 print symbolizer_input
105 print >> self.pipe.stdin, symbolizer_input
107 function_name = self.pipe.stdout.readline().rstrip()
108 if not function_name:
110 file_name = self.pipe.stdout.readline().rstrip()
111 file_name = fix_filename(file_name)
112 if (not function_name.startswith('??') or
113 not file_name.startswith('??')):
114 # Append only non-trivial frames.
115 result.append('%s in %s %s' % (addr, function_name,
124 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
125 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
126 if not symbolizer_path:
127 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
128 if not symbolizer_path:
129 # Assume llvm-symbolizer is in PATH.
130 symbolizer_path = 'llvm-symbolizer'
131 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
134 class Addr2LineSymbolizer(Symbolizer):
135 def __init__(self, binary):
136 super(Addr2LineSymbolizer, self).__init__()
138 self.pipe = self.open_addr2line()
140 def open_addr2line(self):
141 addr2line_tool = 'addr2line'
143 addr2line_tool = binutils_prefix + addr2line_tool
144 cmd = [addr2line_tool, '-f']
146 cmd += ['--demangle']
147 cmd += ['-e', self.binary]
150 return subprocess.Popen(cmd,
151 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
153 def symbolize(self, addr, binary, offset):
154 """Overrides Symbolizer.symbolize."""
155 if self.binary != binary:
158 print >> self.pipe.stdin, offset
159 function_name = self.pipe.stdout.readline().rstrip()
160 file_name = self.pipe.stdout.readline().rstrip()
164 file_name = fix_filename(file_name)
165 return ['%s in %s %s' % (addr, function_name, file_name)]
168 class UnbufferedLineConverter(object):
170 Wrap a child process that responds to each line of input with one line of
171 output. Uses pty to trick the child into providing unbuffered output.
173 def __init__(self, args, close_stderr=False):
176 # We're the child. Transfer control to command.
178 dev_null = os.open('/dev/null', 0)
180 os.execvp(args[0], args)
183 attr = termios.tcgetattr(fd)
184 attr[3] = attr[3] & ~termios.ECHO
185 termios.tcsetattr(fd, termios.TCSANOW, attr)
186 # Set up a file()-like interface to the child process
187 self.r = os.fdopen(fd, "r", 1)
188 self.w = os.fdopen(os.dup(fd), "w", 1)
190 def convert(self, line):
191 self.w.write(line + "\n")
192 return self.readline()
195 return self.r.readline().rstrip()
198 class DarwinSymbolizer(Symbolizer):
199 def __init__(self, addr, binary):
200 super(DarwinSymbolizer, self).__init__()
202 self.arch = guess_arch(addr)
207 print 'atos -o %s -arch %s' % (self.binary, self.arch)
208 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
209 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
211 def symbolize(self, addr, binary, offset):
212 """Overrides Symbolizer.symbolize."""
213 if self.binary != binary:
215 atos_line = self.atos.convert('0x%x' % int(offset, 16))
216 while "got symbolicator for" in atos_line:
217 atos_line = self.atos.readline()
218 # A well-formed atos response looks like this:
219 # foo(type1, type2) (in object.name) (filename.cc:80)
220 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
222 print 'atos_line: ', atos_line
224 function_name = match.group(1)
225 function_name = re.sub('\(.*?\)', '', function_name)
226 file_name = fix_filename(match.group(3))
227 return ['%s in %s %s' % (addr, function_name, file_name)]
229 return ['%s in %s' % (addr, atos_line)]
232 # Chain several symbolizers so that if one symbolizer fails, we fall back
233 # to the next symbolizer in chain.
234 class ChainSymbolizer(Symbolizer):
235 def __init__(self, symbolizer_list):
236 super(ChainSymbolizer, self).__init__()
237 self.symbolizer_list = symbolizer_list
239 def symbolize(self, addr, binary, offset):
240 """Overrides Symbolizer.symbolize."""
241 for symbolizer in self.symbolizer_list:
243 result = symbolizer.symbolize(addr, binary, offset)
248 def append_symbolizer(self, symbolizer):
249 self.symbolizer_list.append(symbolizer)
252 def BreakpadSymbolizerFactory(binary):
253 suffix = os.getenv('BREAKPAD_SUFFIX')
255 filename = binary + suffix
256 if os.access(filename, os.F_OK):
257 return BreakpadSymbolizer(filename)
261 def SystemSymbolizerFactory(system, addr, binary):
262 if system == 'Darwin':
263 return DarwinSymbolizer(addr, binary)
264 elif system == 'Linux':
265 return Addr2LineSymbolizer(binary)
268 class BreakpadSymbolizer(Symbolizer):
269 def __init__(self, filename):
270 super(BreakpadSymbolizer, self).__init__()
271 self.filename = filename
272 lines = file(filename).readlines()
275 self.address_list = []
277 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
278 fragments = lines[0].rstrip().split()
279 self.arch = fragments[2]
280 self.debug_id = fragments[3]
281 self.binary = ' '.join(fragments[4:])
282 self.parse_lines(lines[1:])
284 def parse_lines(self, lines):
285 cur_function_addr = ''
287 fragments = line.split()
288 if fragments[0] == 'FILE':
289 assert int(fragments[1]) == len(self.files)
290 self.files.append(' '.join(fragments[2:]))
291 elif fragments[0] == 'PUBLIC':
292 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
293 elif fragments[0] in ['CFI', 'STACK']:
295 elif fragments[0] == 'FUNC':
296 cur_function_addr = int(fragments[1], 16)
297 if not cur_function_addr in self.symbols.keys():
298 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
300 # Line starting with an address.
301 addr = int(fragments[0], 16)
302 self.address_list.append(addr)
303 # Tuple of symbol address, size, line, file number.
304 self.addresses[addr] = (cur_function_addr,
305 int(fragments[1], 16),
308 self.address_list.sort()
310 def get_sym_file_line(self, addr):
312 if addr in self.addresses.keys():
315 index = bisect.bisect_left(self.address_list, addr)
319 key = self.address_list[index - 1]
320 sym_id, size, line_no, file_no = self.addresses[key]
321 symbol = self.symbols[sym_id]
322 filename = self.files[file_no]
323 if addr < key + size:
324 return symbol, filename, line_no
328 def symbolize(self, addr, binary, offset):
329 if self.binary != binary:
331 res = self.get_sym_file_line(int(offset, 16))
333 function_name, file_name, line_no = res
334 result = ['%s in %s %s:%d' % (
335 addr, function_name, file_name, line_no)]
342 class SymbolizationLoop(object):
343 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
344 # Used by clients who may want to supply a different binary name.
345 # E.g. in Chrome several binaries may share a single .dSYM.
346 self.binary_name_filter = binary_name_filter
347 self.dsym_hint_producer = dsym_hint_producer
348 self.system = os.uname()[0]
349 if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
350 raise Exception('Unknown system')
351 self.llvm_symbolizers = {}
352 self.last_llvm_symbolizer = None
353 self.dsym_hints = set([])
356 def symbolize_address(self, addr, binary, offset):
357 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
358 # a single symbolizer binary.
359 # On Darwin, if the dsym hint producer is present:
360 # 1. check whether we've seen this binary already; if so,
361 # use |llvm_symbolizers[binary]|, which has already loaded the debug
362 # info for this binary (might not be the case for
363 # |last_llvm_symbolizer|);
364 # 2. otherwise check if we've seen all the hints for this binary already;
365 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
366 # 3. otherwise create a new symbolizer and pass all currently known
368 if not binary in self.llvm_symbolizers:
369 use_last_symbolizer = True
370 if self.system == 'Darwin' and self.dsym_hint_producer:
371 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
372 use_last_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
373 self.dsym_hints |= dsym_hints_for_binary
374 if self.last_llvm_symbolizer and use_last_symbolizer:
375 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
377 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
378 self.system, guess_arch(addr), self.dsym_hints)
379 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
380 # Use the chain of symbolizers:
381 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
382 # (fall back to next symbolizer if the previous one fails).
383 if not binary in symbolizers:
384 symbolizers[binary] = ChainSymbolizer(
385 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
386 result = symbolizers[binary].symbolize(addr, binary, offset)
388 # Initialize system symbolizer only if other symbolizers failed.
389 symbolizers[binary].append_symbolizer(
390 SystemSymbolizerFactory(self.system, addr, binary))
391 result = symbolizers[binary].symbolize(addr, binary, offset)
392 # The system symbolizer must produce some result.
396 def get_symbolized_lines(self, symbolized_lines):
397 if not symbolized_lines:
398 return [self.current_line]
401 for symbolized_frame in symbolized_lines:
402 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
406 def process_logfile(self):
409 line = logfile.readline()
412 processed = self.process_line(line)
413 print '\n'.join(processed)
415 def process_line(self, line):
416 self.current_line = line.rstrip()
417 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
418 stack_trace_line_format = (
419 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
420 match = re.match(stack_trace_line_format, line)
422 return [self.current_line]
425 _, frameno_str, addr, binary, offset = match.groups()
426 if frameno_str == '0':
427 # Assume that frame #0 is the first frame of new stack trace.
429 original_binary = binary
430 if self.binary_name_filter:
431 binary = self.binary_name_filter(binary)
432 symbolized_line = self.symbolize_address(addr, binary, offset)
433 if not symbolized_line:
434 if original_binary != binary:
435 symbolized_line = self.symbolize_address(addr, binary, offset)
436 return self.get_symbolized_lines(symbolized_line)
439 if __name__ == '__main__':
440 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
441 description='ASan symbolization script',
442 epilog='''Example of use:
443 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log''')
444 parser.add_argument('path_to_cut', nargs='*',
445 help='pattern to be cut from the result file path ')
446 parser.add_argument('-d','--demangle', action='store_true',
447 help='demangle function names')
448 parser.add_argument('-s', metavar='SYSROOT',
449 help='set path to sysroot for sanitized binaries')
450 parser.add_argument('-c', metavar='CROSS_COMPILE',
451 help='set prefix for binutils')
452 parser.add_argument('-l','--logfile', default=sys.stdin, type=argparse.FileType('r'),
453 help='set log file name to parse, default is stdin')
454 args = parser.parse_args()
456 fix_filename_patterns = args.path_to_cut
460 binary_name_filter = sysroot_path_filter
461 sysroot_path = args.s
463 binutils_prefix = args.c
465 logfile = args.logfile
468 loop = SymbolizationLoop(binary_name_filter)
469 loop.process_logfile()