#!/usr/bin/env python #- # Copyright (c) 2010 Gleb Kurtsou # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ import os import sys import re import optparse class Config(object): version = '0.1' # controlled by user verbose = 0 dump = False no_dump = False version_filter = None symbol_filter = None alias_prefixes = [] # misc opts objdump = 'objdump' dwarfdump = 'dwarfdump' # debug cmpcache_enabled = True dwarfcache_enabled = True w_alias = True w_cached = False w_symbol = True class FileConfig(object): filename = None out = sys.stdout def init(self, outname): if outname and outname != '-': self.out = open(outname, "w") origfile = FileConfig() newfile = FileConfig() @classmethod def init(cls): cls.version_filter = StrFilter() cls.symbol_filter = StrFilter() class App(object): result_code = 0 def warn(cond, msg): if cond: print >> sys.stderr, "WARN: " + msg # {{{ misc class StrFilter(object): def __init__(self): self.exclude = [] self.include = [] def compile(self): self.re_exclude = [ re.compile(x) for x in self.exclude ] self.re_include = [ re.compile(x) for x in self.include ] def match(self, s): if len(self.re_include): matched = False for r in self.re_include: if r.match(s): matched = True break if not matched: return False for r in self.re_exclude: if r.match(s): return False return True class Cache(object): class CacheStats(object): def __init__(self): self.hit = 0 self.miss = 0 def show(self, name): total = self.hit + self.miss if total == 0: ratio = '(undef)' else: ratio = '%f' % (self.hit/float(total)) return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \ (name, self.hit, self.miss, ratio) def __init__(self, enabled=True, stats=None): self.enabled = enabled self.items = {} if stats == None: self.stats = Cache.CacheStats() else: self.stats = stats def get(self, id): if self.enabled and self.items.has_key(id): self.stats.hit += 1 return self.items[id] else: self.stats.miss += 1 return None def put(self, id, obj): if self.enabled: if self.items.has_key(id) and obj is not self.items[id]: #raise ValueError("Item is already cached: %d (%s, %s)" % # (id, self.items[id], obj)) warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \ (id, self.items[id], obj)) self.items[id] = obj def replace(self, id, obj): if self.enabled: assert self.items.has_key(id) self.items[id] = obj class ListDiff(object): def __init__(self, orig, new): self.orig = set(orig) self.new = set(new) self.common = self.orig & self.new self.added = self.new - self.common self.removed = self.orig - self.common class PrettyPrinter(object): def __init__(self): self.stack = [] def run_nested(self, obj): ex = obj._pp_ex(self) self.stack.append(ex) def run(self, obj): self._result = obj._pp(self) return self._result def nested(self): return sorted(set(self.stack)) def result(self): return self._result; # }}} #{{{ symbols and version maps class Symbol(object): def __init__(self, name, offset, version, lib): self.name = name self.offset = offset self.version = version self.lib = lib self.definition = None @property def name_ver(self): return self.name + '@' + self.version def __repr__(self): return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version) class CommonSymbol(object): def __init__(self, origsym, newsym): if origsym.name != newsym.name or origsym.version != newsym.version: raise RuntimeError("Symbols have different names: %s", [origsym, newsym]) self.origsym = origsym self.newsym = newsym self.name = newsym.name self.version = newsym.version def __repr__(self): return "CommonSymbol(%s, %s)" % (self.name, self.version) class SymbolAlias(object): def __init__(self, alias, prefix, offset): assert alias.startswith(prefix) self.alias = alias self.name = alias[len(prefix):] self.offset = offset def __repr__(self): return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset) class VersionMap(object): def __init__(self, name): self.name = name self.symbols = {} def append(self, symbol): if (self.symbols.has_key(symbol.name)): raise ValueError("Symbol is already defined %s@%s" % (symbol.name, self.name)) self.symbols[symbol.name] = symbol def names(self): return self.symbols.keys() def __repr__(self): return repr(self.symbols.values()) # }}} # {{{ types and definitions class Def(object): _is_alias = False def __init__(self, id, name, **kwargs): self.id = id self.name = name self.attrs = kwargs def __getattr__(self, attr): if not self.attrs.has_key(attr): raise AttributeError('%s in %s' % (attr, str(self))) return self.attrs[attr] def _name_opt(self, default=''): if not self.name: return default return self.name def _alias(self): if self._is_alias: return self.type._alias() return self def __cmp__(self, other): # TODO assert 'self' and 'other' belong to different libraries #print 'cmp defs: %s, %s' % (self, other) a = self._alias() try: b = other._alias() except AttributeError: return 1 r = cmp(a.__class__, b.__class__) if r == 0: if a.id != 0 and b.id != 0: ind = (long(a.id) << 32) + b.id r = Dwarf.cmpcache.get(ind) if r != None: return r else: ind = 0 r = cmp(a.attrs, b.attrs) if ind != 0: Dwarf.cmpcache.put(ind, r) else: r = 0 #raise RuntimeError('Comparing different classes: %s, %s' % # (a.__class__.__name__, b.__class__.__name__)) return r def __repr__(self): p = [] if hasattr(self, 'name'): p.append("name=%s" % self.name) for (k, v) in self.attrs.items(): if isinstance(v, Def): v = v.__class__.__name__ + '(...)' p.append("%s=%s" % (k, v)) return self.__class__.__name__ + '(' + ', '.join(p) + ')' def _mapval(self, param, vals): if param not in vals.keys(): raise NotImplementedError("Invalid value '%s': %s" % (param, str(self))) return vals[param] def _pp_ex(self, pp): raise NotImplementedError('Extended pretty print not implemeted: %s' % str(self)) def _pp(self, pp): raise NotImplementedError('Pretty print not implemeted: %s' % str(self)) class AnonymousDef(Def): def __init__(self, id, **kwargs): Def.__init__(self, id, None, **kwargs) class Void(AnonymousDef): _instance = None def __new__(cls, *args, **kwargs): if not cls._instance: cls._instance = super(Void, cls).__new__( cls, *args, **kwargs) return cls._instance def __init__(self): AnonymousDef.__init__(self, 0) def _pp(self, pp): return "void" class VarArgs(AnonymousDef): def _pp(self, pp): return "..." class PointerDef(AnonymousDef): def _pp(self, pp): t = pp.run(self.type) return "%s*" % (t,) class BaseTypeDef(Def): inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char'] def _pp(self, pp): if self.encoding in self.inttypes: sign = '' if self.encoding == 'DW_ATE_signed' else 'u' bits = int(self.byte_size) * 8 return '%sint%s_t' % (sign, bits) elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size) == 1: return 'char'; elif self.encoding == 'DW_ATE_float': return self._mapval(self.byte_size, { '16': 'long double', '8': 'double', '4': 'float', }) raise NotImplementedError('Invalid encoding: %s' % self) class TypeAliasDef(Def): _is_alias = True def _pp(self, pp): alias = self._alias() # push typedef name if self.name and not alias.name: alias.name = 'T(%s)' % self.name # return type with modifiers return self.type._pp(pp) class EnumerationTypeDef(Def): def _pp(self, pp): return 'enum ' + self._name_opt('UNKNOWN') class ConstTypeDef(AnonymousDef): _is_alias = True def _pp(self, pp): return 'const ' + self.type._pp(pp) class VolatileTypeDef(AnonymousDef): _is_alias = True def _pp(self, pp): return 'volatile ' + self.type._pp(pp) class ArrayDef(AnonymousDef): def _pp(self, pp): t = pp.run(self.type) assert len(self.subranges) == 1 try: sz = int(self.subranges[0].upper_bound) + 1 except ValueError: s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound) sz = int(s) + 1 return '%s[%s]' % (t, sz) class ArraySubrangeDef(AnonymousDef): pass class FunctionDef(Def): def _pp(self, pp): result = pp.run(self.result) if not self.params: params = "void" else: params = ', '.join([ pp.run(x) for x in self.params ]) return "%s %s(%s);" % (result, self.name, params) class FunctionTypeDef(Def): def _pp(self, pp): result = pp.run(self.result) if not self.params: params = "void" else: params = ', '.join([ pp.run(x) for x in self.params ]) return "F(%s, %s, (%s))" % (self._name_opt(), result, params) class ParameterDef(Def): def _pp(self, pp): t = pp.run(self.type) return "%s %s" % (t, self._name_opt()) # TODO class StructForwardDef(Def): pass class IncompleteDef(Def): def update(self, complete, cache=None): self.complete = complete complete.incomplete = self if cache != None: cached = cache.get(self.id) if cached != None and isinstance(cached, IncompleteDef): cache.replace(self.id, complete) class StructIncompleteDef(IncompleteDef): def _pp(self, pp): return "struct %s" % (self.name,) class UnionIncompleteDef(IncompleteDef): def _pp(self, pp): return "union %s" % (self.name,) class StructDef(Def): def _pp_ex(self, pp, suffix=';'): members = [ pp.run(x) for x in self.members ] return "struct %s { %s }%s" % \ (self._name_opt(), ' '.join(members), suffix) def _pp(self, pp): if self.name: pp.run_nested(self) return "struct %s" % (self.name,) else: return self._pp_ex(pp, suffix='') class UnionDef(Def): def _pp_ex(self, pp, suffix=';'): members = [ pp.run(x) for x in self.members ] return "union %s { %s }%s" % \ (self._name_opt(), ' '.join(members), suffix) def _pp(self, pp): if self.name: pp.run_nested(self) return "union %s" % (self.name,) else: return self._pp_ex(pp, suffix='') class MemberDef(Def): def _pp(self, pp): t = pp.run(self.type) if self.bit_size: bits = ":%s" % self.bit_size else: bits = "" return "%s %s%s;" % (t, self._name_opt(), bits) class Dwarf(object): cmpcache = Cache(enabled=Config.cmpcache_enabled) def __init__(self, dump): self.dump = dump def _build_optarg_type(self, praw): type = praw.optarg('type', Void()) if type != Void(): type = self.buildref(praw.unit, type) return type def build_subprogram(self, raw): if raw.optname == None: raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc')); params = [ self.build(x) for x in raw.nested ] result = self._build_optarg_type(raw) return FunctionDef(raw.id, raw.name, params=params, result=result) def build_subroutine_type(self, raw): params = [ self.build(x) for x in raw.nested ] result = self._build_optarg_type(raw) return FunctionTypeDef(raw.id, raw.optname, params=params, result=result) def build_formal_parameter(self, raw): type = self._build_optarg_type(raw) return ParameterDef(raw.id, raw.optname, type=type) def build_pointer_type(self, raw): type = self._build_optarg_type(raw) return PointerDef(raw.id, type=type) def build_member(self, raw): type = self.buildref(raw.unit, raw.arg('type')) return MemberDef(raw.id, raw.name, type=type, bit_size=raw.optarg('bit_size', None)) def build_structure_type(self, raw): incomplete = raw.unit.incomplete.get(raw.id) if incomplete == None: incomplete = StructIncompleteDef(raw.id, raw.optname) raw.unit.incomplete.put(raw.id, incomplete) else: return incomplete members = [ self.build(x) for x in raw.nested ] byte_size = raw.optarg('byte_size', None) if byte_size == None: obj = StructForwardDef(raw.id, raw.name, members=members, forcename=raw.name) obj = StructDef(raw.id, raw.optname, members=members, byte_size=byte_size) incomplete.update(obj, cache=raw.unit.cache) return obj def build_union_type(self, raw): incomplete = raw.unit.incomplete.get(raw.id) if incomplete == None: incomplete = UnionIncompleteDef(raw.id, raw.optname) raw.unit.incomplete.put(raw.id, incomplete) else: return incomplete members = [ self.build(x) for x in raw.nested ] byte_size = raw.optarg('byte_size', None) obj = UnionDef(raw.id, raw.optname, members=members, byte_size=byte_size) obj.incomplete = incomplete incomplete.complete = obj return obj def build_typedef(self, raw): type = self._build_optarg_type(raw) return TypeAliasDef(raw.id, raw.name, type=type) def build_const_type(self, raw): type = self._build_optarg_type(raw) return ConstTypeDef(raw.id, type=type) def build_volatile_type(self, raw): type = self._build_optarg_type(raw) return VolatileTypeDef(raw.id, type=type) def build_enumeration_type(self, raw): # TODO handle DW_TAG_enumerator ??? return EnumerationTypeDef(raw.id, name=raw.optname, byte_size=raw.arg('byte_size')) def build_base_type(self, raw): return BaseTypeDef(raw.id, raw.optname, byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding')) def build_array_type(self, raw): type = self.buildref(raw.unit, raw.arg('type')) subranges = [ self.build(x) for x in raw.nested ] return ArrayDef(raw.id, type=type, subranges=subranges) def build_subrange_type(self, raw): type = self.buildref(raw.unit, raw.arg('type')) return ArraySubrangeDef(raw.id, type=type, upper_bound=raw.optarg('upper_bound', 0)) def build_unspecified_parameters(self, raw): return VarArgs(raw.id) def _get_id(self, id): try: return int(id) except ValueError: if (id.startswith('<') and id.endswith('>')): return int(id[1:-1]) else: raise ValueError("Invalid dwarf id: %s" % id) def build(self, raw): obj = raw.unit.cache.get(raw.id) if obj != None: return obj builder_name = raw.tag.replace('DW_TAG_', 'build_') try: builder = getattr(self, builder_name) except AttributeError: raise AttributeError("Unknown dwarf tag: %s" % raw) obj = builder(raw) raw.unit.cache.put(obj.id, obj) return obj def buildref(self, unit, id): id = self._get_id(id) raw = unit.tags[id] obj = self.build(raw) return obj # }}} class Shlib(object): def __init__(self, libfile): self.libfile = libfile self.versions = {} self.alias_syms = {} def parse_objdump(self): objdump = ObjdumpParser(self.libfile) objdump.run() for p in objdump.dynamic_symbols: vername = p['ver'] if vername.startswith('(') and vername.endswith(')'): vername = vername[1:-1] if not Config.version_filter.match(vername): continue if not Config.symbol_filter.match(p['symbol']): continue sym = Symbol(p['symbol'], p['offset'], vername, self) if not self.versions.has_key(vername): self.versions[vername] = VersionMap(vername) self.versions[vername].append(sym) if Config.alias_prefixes: self.local_offsetmap = objdump.local_offsetmap for p in objdump.local_symbols: for prefix in Config.alias_prefixes: if not p['symbol'].startswith(prefix): continue alias = SymbolAlias(p['symbol'], prefix, p['offset']) if self.alias_syms.has_key(alias.name): prevalias = self.alias_syms[alias.name] if alias.name != prevalias.name or \ alias.offset != prevalias.offset: warn(Config.w_alias, "Symbol alias is " \ "already defined: %s: %s at %08x -- %s at %08x" % \ (alias.alias, alias.name, alias.offset, prevalias.name, prevalias.offset)) self.alias_syms[alias.name] = alias def parse_dwarfdump(self): dwarfdump = DwarfdumpParser(self.libfile) def lookup(sym): raw = None try: raw = dwarfdump.offsetmap[sym.offset] except: try: localnames = self.local_offsetmap[sym.offset] localnames.sort(key=lambda x: -len(x)) for localname in localnames: if not self.alias_syms.has_key(localname): continue alias = self.alias_syms[localname] raw = dwarfdump.offsetmap[alias.offset] break except: pass return raw dwarfdump.run() dwarf = Dwarf(dwarfdump) for ver in self.versions.values(): for sym in ver.symbols.values(): raw = lookup(sym); if not raw: warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \ (sym.name_ver, self.libfile, sym.offset)) continue if Config.verbose >= 3: print "Parsing symbol %s (%s)" % (sym.name_ver, self.libfile) sym.definition = dwarf.build(raw) def parse(self): if not os.path.isfile(self.libfile): print >> sys.stderr, ("No such file: %s" % self.libfile) sys.exit(1) self.parse_objdump() self.parse_dwarfdump() # {{{ parsers class Parser(object): def __init__(self, proc): self.proc = proc self.parser = self.parse_begin def run(self): fd = os.popen(self.proc, 'r') while True: line = fd.readline() if (not line): break line = line.strip() if (line): self.parser(line) err = fd.close() if err: print >> sys.stderr, ("Execution failed: %s" % self.proc) sys.exit(2) def parse_begin(self, line): print(line) class ObjdumpParser(Parser): re_header = re.compile('(?P