]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/compiler-rt/lib/asan/scripts/asan_symbolize.py
Update nvi to 2.1.3 which fixes the data corruption when locale conversion
[FreeBSD/FreeBSD.git] / contrib / compiler-rt / lib / asan / scripts / asan_symbolize.py
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3 #
4 #                     The LLVM Compiler Infrastructure
5 #
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
8 #
9 #===------------------------------------------------------------------------===#
10 import argparse
11 import bisect
12 import getopt
13 import os
14 import re
15 import subprocess
16 import sys
17
18 symbolizers = {}
19 DEBUG = False
20 demangle = False
21 binutils_prefix = None
22 sysroot_path = None
23 binary_name_filter = None
24 fix_filename_patterns = None
25 logfile = sys.stdin
26
27 # FIXME: merge the code that calls fix_filename().
28 def fix_filename(file_name):
29   if fix_filename_patterns:
30     for path_to_cut in fix_filename_patterns:
31       file_name = re.sub('.*' + path_to_cut, '', file_name)
32   file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
33   file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
34   return file_name
35
36 def sysroot_path_filter(binary_name):
37   return sysroot_path + binary_name
38
39 def guess_arch(addr):
40   # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
41   if len(addr) > 10:
42     return 'x86_64'
43   else:
44     return 'i386'
45
46 class Symbolizer(object):
47   def __init__(self):
48     pass
49
50   def symbolize(self, addr, binary, offset):
51     """Symbolize the given address (pair of binary and offset).
52
53     Overriden in subclasses.
54     Args:
55         addr: virtual address of an instruction.
56         binary: path to executable/shared object containing this instruction.
57         offset: instruction offset in the @binary.
58     Returns:
59         list of strings (one string for each inlined frame) describing
60         the code locations for this instruction (that is, function name, file
61         name, line and column numbers).
62     """
63     return None
64
65
66 class LLVMSymbolizer(Symbolizer):
67   def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
68     super(LLVMSymbolizer, self).__init__()
69     self.symbolizer_path = symbolizer_path
70     self.default_arch = default_arch
71     self.system = system
72     self.dsym_hints = dsym_hints
73     self.pipe = self.open_llvm_symbolizer()
74
75   def open_llvm_symbolizer(self):
76     cmd = [self.symbolizer_path,
77            '--use-symbol-table=true',
78            '--demangle=%s' % demangle,
79            '--functions=short',
80            '--inlining=true',
81            '--default-arch=%s' % self.default_arch]
82     if self.system == 'Darwin':
83       for hint in self.dsym_hints:
84         cmd.append('--dsym-hint=%s' % hint)
85     if DEBUG:
86       print ' '.join(cmd)
87     try:
88       result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
89                                 stdout=subprocess.PIPE)
90     except OSError:
91       result = None
92     return result
93
94   def symbolize(self, addr, binary, offset):
95     """Overrides Symbolizer.symbolize."""
96     if not self.pipe:
97       return None
98     result = []
99     try:
100       symbolizer_input = '"%s" %s' % (binary, offset)
101       if DEBUG:
102         print symbolizer_input
103       print >> self.pipe.stdin, symbolizer_input
104       while True:
105         function_name = self.pipe.stdout.readline().rstrip()
106         if not function_name:
107           break
108         file_name = self.pipe.stdout.readline().rstrip()
109         file_name = fix_filename(file_name)
110         if (not function_name.startswith('??') or
111             not file_name.startswith('??')):
112           # Append only non-trivial frames.
113           result.append('%s in %s %s' % (addr, function_name,
114                                          file_name))
115     except Exception:
116       result = []
117     if not result:
118       result = None
119     return result
120
121
122 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
123   symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
124   if not symbolizer_path:
125     symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
126     if not symbolizer_path:
127       # Assume llvm-symbolizer is in PATH.
128       symbolizer_path = 'llvm-symbolizer'
129   return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
130
131
132 class Addr2LineSymbolizer(Symbolizer):
133   def __init__(self, binary):
134     super(Addr2LineSymbolizer, self).__init__()
135     self.binary = binary
136     self.pipe = self.open_addr2line()
137
138   def open_addr2line(self):
139     addr2line_tool = 'addr2line'
140     if binutils_prefix:
141       addr2line_tool = binutils_prefix + addr2line_tool
142     cmd = [addr2line_tool, '-f']
143     if demangle:
144       cmd += ['--demangle']
145     cmd += ['-e', self.binary]
146     if DEBUG:
147       print ' '.join(cmd)
148     return subprocess.Popen(cmd,
149                             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
150
151   def symbolize(self, addr, binary, offset):
152     """Overrides Symbolizer.symbolize."""
153     if self.binary != binary:
154       return None
155     try:
156       print >> self.pipe.stdin, offset
157       function_name = self.pipe.stdout.readline().rstrip()
158       file_name = self.pipe.stdout.readline().rstrip()
159     except Exception:
160       function_name = ''
161       file_name = ''
162     file_name = fix_filename(file_name)
163     return ['%s in %s %s' % (addr, function_name, file_name)]
164
165
166 class UnbufferedLineConverter(object):
167   """
168   Wrap a child process that responds to each line of input with one line of
169   output.  Uses pty to trick the child into providing unbuffered output.
170   """
171   def __init__(self, args, close_stderr=False):
172     # Local imports so that the script can start on Windows.
173     import pty
174     import termios
175     pid, fd = pty.fork()
176     if pid == 0:
177       # We're the child. Transfer control to command.
178       if close_stderr:
179         dev_null = os.open('/dev/null', 0)
180         os.dup2(dev_null, 2)
181       os.execvp(args[0], args)
182     else:
183       # Disable echoing.
184       attr = termios.tcgetattr(fd)
185       attr[3] = attr[3] & ~termios.ECHO
186       termios.tcsetattr(fd, termios.TCSANOW, attr)
187       # Set up a file()-like interface to the child process
188       self.r = os.fdopen(fd, "r", 1)
189       self.w = os.fdopen(os.dup(fd), "w", 1)
190
191   def convert(self, line):
192     self.w.write(line + "\n")
193     return self.readline()
194
195   def readline(self):
196     return self.r.readline().rstrip()
197
198
199 class DarwinSymbolizer(Symbolizer):
200   def __init__(self, addr, binary):
201     super(DarwinSymbolizer, self).__init__()
202     self.binary = binary
203     self.arch = guess_arch(addr)
204     self.open_atos()
205
206   def open_atos(self):
207     if DEBUG:
208       print 'atos -o %s -arch %s' % (self.binary, self.arch)
209     cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
210     self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
211
212   def symbolize(self, addr, binary, offset):
213     """Overrides Symbolizer.symbolize."""
214     if self.binary != binary:
215       return None
216     atos_line = self.atos.convert('0x%x' % int(offset, 16))
217     while "got symbolicator for" in atos_line:
218       atos_line = self.atos.readline()
219     # A well-formed atos response looks like this:
220     #   foo(type1, type2) (in object.name) (filename.cc:80)
221     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
222     if DEBUG:
223       print 'atos_line: ', atos_line
224     if match:
225       function_name = match.group(1)
226       function_name = re.sub('\(.*?\)', '', function_name)
227       file_name = fix_filename(match.group(3))
228       return ['%s in %s %s' % (addr, function_name, file_name)]
229     else:
230       return ['%s in %s' % (addr, atos_line)]
231
232
233 # Chain several symbolizers so that if one symbolizer fails, we fall back
234 # to the next symbolizer in chain.
235 class ChainSymbolizer(Symbolizer):
236   def __init__(self, symbolizer_list):
237     super(ChainSymbolizer, self).__init__()
238     self.symbolizer_list = symbolizer_list
239
240   def symbolize(self, addr, binary, offset):
241     """Overrides Symbolizer.symbolize."""
242     for symbolizer in self.symbolizer_list:
243       if symbolizer:
244         result = symbolizer.symbolize(addr, binary, offset)
245         if result:
246           return result
247     return None
248
249   def append_symbolizer(self, symbolizer):
250     self.symbolizer_list.append(symbolizer)
251
252
253 def BreakpadSymbolizerFactory(binary):
254   suffix = os.getenv('BREAKPAD_SUFFIX')
255   if suffix:
256     filename = binary + suffix
257     if os.access(filename, os.F_OK):
258       return BreakpadSymbolizer(filename)
259   return None
260
261
262 def SystemSymbolizerFactory(system, addr, binary):
263   if system == 'Darwin':
264     return DarwinSymbolizer(addr, binary)
265   elif system == 'Linux':
266     return Addr2LineSymbolizer(binary)
267
268
269 class BreakpadSymbolizer(Symbolizer):
270   def __init__(self, filename):
271     super(BreakpadSymbolizer, self).__init__()
272     self.filename = filename
273     lines = file(filename).readlines()
274     self.files = []
275     self.symbols = {}
276     self.address_list = []
277     self.addresses = {}
278     # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
279     fragments = lines[0].rstrip().split()
280     self.arch = fragments[2]
281     self.debug_id = fragments[3]
282     self.binary = ' '.join(fragments[4:])
283     self.parse_lines(lines[1:])
284
285   def parse_lines(self, lines):
286     cur_function_addr = ''
287     for line in lines:
288       fragments = line.split()
289       if fragments[0] == 'FILE':
290         assert int(fragments[1]) == len(self.files)
291         self.files.append(' '.join(fragments[2:]))
292       elif fragments[0] == 'PUBLIC':
293         self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
294       elif fragments[0] in ['CFI', 'STACK']:
295         pass
296       elif fragments[0] == 'FUNC':
297         cur_function_addr = int(fragments[1], 16)
298         if not cur_function_addr in self.symbols.keys():
299           self.symbols[cur_function_addr] = ' '.join(fragments[4:])
300       else:
301         # Line starting with an address.
302         addr = int(fragments[0], 16)
303         self.address_list.append(addr)
304         # Tuple of symbol address, size, line, file number.
305         self.addresses[addr] = (cur_function_addr,
306                                 int(fragments[1], 16),
307                                 int(fragments[2]),
308                                 int(fragments[3]))
309     self.address_list.sort()
310
311   def get_sym_file_line(self, addr):
312     key = None
313     if addr in self.addresses.keys():
314       key = addr
315     else:
316       index = bisect.bisect_left(self.address_list, addr)
317       if index == 0:
318         return None
319       else:
320         key = self.address_list[index - 1]
321     sym_id, size, line_no, file_no = self.addresses[key]
322     symbol = self.symbols[sym_id]
323     filename = self.files[file_no]
324     if addr < key + size:
325       return symbol, filename, line_no
326     else:
327       return None
328
329   def symbolize(self, addr, binary, offset):
330     if self.binary != binary:
331       return None
332     res = self.get_sym_file_line(int(offset, 16))
333     if res:
334       function_name, file_name, line_no = res
335       result = ['%s in %s %s:%d' % (
336           addr, function_name, file_name, line_no)]
337       print result
338       return result
339     else:
340       return None
341
342
343 class SymbolizationLoop(object):
344   def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
345     if sys.platform == 'win32':
346       # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
347       # even in sandboxed processes.  Nothing needs to be done here.
348       self.process_line = self.process_line_echo
349     else:
350       # Used by clients who may want to supply a different binary name.
351       # E.g. in Chrome several binaries may share a single .dSYM.
352       self.binary_name_filter = binary_name_filter
353       self.dsym_hint_producer = dsym_hint_producer
354       self.system = os.uname()[0]
355       if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
356         raise Exception('Unknown system')
357       self.llvm_symbolizers = {}
358       self.last_llvm_symbolizer = None
359       self.dsym_hints = set([])
360       self.frame_no = 0
361       self.process_line = self.process_line_posix
362
363   def symbolize_address(self, addr, binary, offset):
364     # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
365     # a single symbolizer binary.
366     # On Darwin, if the dsym hint producer is present:
367     #  1. check whether we've seen this binary already; if so,
368     #     use |llvm_symbolizers[binary]|, which has already loaded the debug
369     #     info for this binary (might not be the case for
370     #     |last_llvm_symbolizer|);
371     #  2. otherwise check if we've seen all the hints for this binary already;
372     #     if so, reuse |last_llvm_symbolizer| which has the full set of hints;
373     #  3. otherwise create a new symbolizer and pass all currently known
374     #     .dSYM hints to it.
375     if not binary in self.llvm_symbolizers:
376       use_new_symbolizer = True
377       if self.system == 'Darwin' and self.dsym_hint_producer:
378         dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
379         use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
380         self.dsym_hints |= dsym_hints_for_binary
381       if self.last_llvm_symbolizer and not use_new_symbolizer:
382           self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
383       else:
384         self.last_llvm_symbolizer = LLVMSymbolizerFactory(
385             self.system, guess_arch(addr), self.dsym_hints)
386         self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
387     # Use the chain of symbolizers:
388     # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
389     # (fall back to next symbolizer if the previous one fails).
390     if not binary in symbolizers:
391       symbolizers[binary] = ChainSymbolizer(
392           [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
393     result = symbolizers[binary].symbolize(addr, binary, offset)
394     if result is None:
395       # Initialize system symbolizer only if other symbolizers failed.
396       symbolizers[binary].append_symbolizer(
397           SystemSymbolizerFactory(self.system, addr, binary))
398       result = symbolizers[binary].symbolize(addr, binary, offset)
399     # The system symbolizer must produce some result.
400     assert result
401     return result
402
403   def get_symbolized_lines(self, symbolized_lines):
404     if not symbolized_lines:
405       return [self.current_line]
406     else:
407       result = []
408       for symbolized_frame in symbolized_lines:
409         result.append('    #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
410         self.frame_no += 1
411       return result
412
413   def process_logfile(self):
414     self.frame_no = 0
415     for line in logfile:
416       processed = self.process_line(line)
417       print '\n'.join(processed)
418
419   def process_line_echo(self, line):
420     return [line.rstrip()]
421
422   def process_line_posix(self, line):
423     self.current_line = line.rstrip()
424     #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
425     stack_trace_line_format = (
426         '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
427     match = re.match(stack_trace_line_format, line)
428     if not match:
429       return [self.current_line]
430     if DEBUG:
431       print line
432     _, frameno_str, addr, binary, offset = match.groups()
433     if frameno_str == '0':
434       # Assume that frame #0 is the first frame of new stack trace.
435       self.frame_no = 0
436     original_binary = binary
437     if self.binary_name_filter:
438       binary = self.binary_name_filter(binary)
439     symbolized_line = self.symbolize_address(addr, binary, offset)
440     if not symbolized_line:
441       if original_binary != binary:
442         symbolized_line = self.symbolize_address(addr, binary, offset)
443     return self.get_symbolized_lines(symbolized_line)
444
445
446 if __name__ == '__main__':
447   parser = argparse.ArgumentParser(
448       formatter_class=argparse.RawDescriptionHelpFormatter,
449       description='ASan symbolization script',
450       epilog='Example of use:\n'
451              'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
452              '-s "$HOME/SymbolFiles" < asan.log')
453   parser.add_argument('path_to_cut', nargs='*',
454                       help='pattern to be cut from the result file path ')
455   parser.add_argument('-d','--demangle', action='store_true',
456                       help='demangle function names')
457   parser.add_argument('-s', metavar='SYSROOT',
458                       help='set path to sysroot for sanitized binaries')
459   parser.add_argument('-c', metavar='CROSS_COMPILE',
460                       help='set prefix for binutils')
461   parser.add_argument('-l','--logfile', default=sys.stdin,
462                       type=argparse.FileType('r'),
463                       help='set log file name to parse, default is stdin')
464   args = parser.parse_args()
465   if args.path_to_cut:
466     fix_filename_patterns = args.path_to_cut
467   if args.demangle:
468     demangle = True
469   if args.s:
470     binary_name_filter = sysroot_path_filter
471     sysroot_path = args.s
472   if args.c:
473     binutils_prefix = args.c
474   if args.logfile:
475     logfile = args.logfile
476   else:
477     logfile = sys.stdin
478   loop = SymbolizationLoop(binary_name_filter)
479   loop.process_logfile()