Home | History | Annotate | Download | only in find_runtime_symbols
      1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import bisect
      6 import re
      7 
      8 
      9 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
     10 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
     11 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
     12 _READELF_SECTION_HEADER_PATTER = re.compile(
     13     '^\s*\[\s*(Nr|\d+)\]\s+(|\S+)\s+([A-Z_]+)\s+([0-9a-f]+)\s+'
     14     '([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9]+)\s+([WAXMSILGxOop]*)\s+'
     15     '([0-9]+)\s+([0-9]+)\s+([0-9]+)')
     16 
     17 
     18 class ParsingException(Exception):
     19   def __str__(self):
     20     return repr(self.args[0])
     21 
     22 
     23 class AddressMapping(object):
     24   def __init__(self):
     25     self._symbol_map = {}
     26 
     27   def append(self, start, entry):
     28     self._symbol_map[start] = entry
     29 
     30   def find(self, address):
     31     return self._symbol_map.get(address)
     32 
     33 
     34 class RangeAddressMapping(AddressMapping):
     35   def __init__(self):
     36     super(RangeAddressMapping, self).__init__()
     37     self._sorted_start_list = []
     38     self._is_sorted = True
     39 
     40   def append(self, start, entry):
     41     if self._sorted_start_list:
     42       if self._sorted_start_list[-1] > start:
     43         self._is_sorted = False
     44       elif self._sorted_start_list[-1] == start:
     45         return
     46     self._sorted_start_list.append(start)
     47     self._symbol_map[start] = entry
     48 
     49   def find(self, address):
     50     if not self._sorted_start_list:
     51       return None
     52     if not self._is_sorted:
     53       self._sorted_start_list.sort()
     54       self._is_sorted = True
     55     found_index = bisect.bisect_left(self._sorted_start_list, address)
     56     found_start_address = self._sorted_start_list[found_index - 1]
     57     return self._symbol_map[found_start_address]
     58 
     59 
     60 class Procedure(object):
     61   """A class for a procedure symbol and an address range for the symbol."""
     62 
     63   def __init__(self, start, end, name):
     64     self.start = start
     65     self.end = end
     66     self.name = name
     67 
     68   def __eq__(self, other):
     69     return (self.start == other.start and
     70             self.end == other.end and
     71             self.name == other.name)
     72 
     73   def __ne__(self, other):
     74     return not self.__eq__(other)
     75 
     76   def __str__(self):
     77     return '%x-%x: %s' % (self.start, self.end, self.name)
     78 
     79 
     80 class ElfSection(object):
     81   """A class for an elf section header."""
     82 
     83   def __init__(
     84       self, number, name, stype, address, offset, size, es, flg, lk, inf, al):
     85     self.number = number
     86     self.name = name
     87     self.stype = stype
     88     self.address = address
     89     self.offset = offset
     90     self.size = size
     91     self.es = es
     92     self.flg = flg
     93     self.lk = lk
     94     self.inf = inf
     95     self.al = al
     96 
     97   def __eq__(self, other):
     98     return (self.number == other.number and
     99             self.name == other.name and
    100             self.stype == other.stype and
    101             self.address == other.address and
    102             self.offset == other.offset and
    103             self.size == other.size and
    104             self.es == other.es and
    105             self.flg == other.flg and
    106             self.lk == other.lk and
    107             self.inf == other.inf and
    108             self.al == other.al)
    109 
    110   def __ne__(self, other):
    111     return not self.__eq__(other)
    112 
    113   def __str__(self):
    114     return '%x+%x(%x) %s' % (self.address, self.size, self.offset, self.name)
    115 
    116 
    117 class StaticSymbolsInFile(object):
    118   """Represents static symbol information in a binary file."""
    119 
    120   def __init__(self, my_name):
    121     self.my_name = my_name
    122     self._elf_sections = []
    123     self._procedures = RangeAddressMapping()
    124     self._sourcefiles = RangeAddressMapping()
    125     self._typeinfos = AddressMapping()
    126 
    127   def _append_elf_section(self, elf_section):
    128     self._elf_sections.append(elf_section)
    129 
    130   def _append_procedure(self, start, procedure):
    131     self._procedures.append(start, procedure)
    132 
    133   def _append_sourcefile(self, start, sourcefile):
    134     self._sourcefiles.append(start, sourcefile)
    135 
    136   def _append_typeinfo(self, start, typeinfo):
    137     self._typeinfos.append(start, typeinfo)
    138 
    139   def _find_symbol_by_runtime_address(self, address, vma, target):
    140     if not (vma.begin <= address < vma.end):
    141       return None
    142 
    143     if vma.name != self.my_name:
    144       return None
    145 
    146     file_offset = address - (vma.begin - vma.offset)
    147     elf_address = None
    148     for section in self._elf_sections:
    149       if section.offset <= file_offset < (section.offset + section.size):
    150         elf_address = section.address + file_offset - section.offset
    151     if not elf_address:
    152       return None
    153 
    154     return target.find(elf_address)
    155 
    156   def find_procedure_by_runtime_address(self, address, vma):
    157     return self._find_symbol_by_runtime_address(address, vma, self._procedures)
    158 
    159   def find_sourcefile_by_runtime_address(self, address, vma):
    160     return self._find_symbol_by_runtime_address(address, vma, self._sourcefiles)
    161 
    162   def find_typeinfo_by_runtime_address(self, address, vma):
    163     return self._find_symbol_by_runtime_address(address, vma, self._typeinfos)
    164 
    165   def load_readelf_ew(self, f):
    166     found_header = False
    167     for line in f:
    168       if line.rstrip() == 'Section Headers:':
    169         found_header = True
    170         break
    171     if not found_header:
    172       return None
    173 
    174     for line in f:
    175       line = line.rstrip()
    176       matched = _READELF_SECTION_HEADER_PATTER.match(line)
    177       if matched:
    178         self._append_elf_section(ElfSection(
    179             int(matched.group(1), 10), # number
    180             matched.group(2), # name
    181             matched.group(3), # stype
    182             int(matched.group(4), 16), # address
    183             int(matched.group(5), 16), # offset
    184             int(matched.group(6), 16), # size
    185             matched.group(7), # es
    186             matched.group(8), # flg
    187             matched.group(9), # lk
    188             matched.group(10), # inf
    189             matched.group(11) # al
    190             ))
    191       else:
    192         if line in ('Key to Flags:', 'Program Headers:'):
    193           break
    194 
    195   def load_readelf_debug_decodedline_file(self, input_file):
    196     for line in input_file:
    197       splitted = line.rstrip().split(None, 2)
    198       self._append_sourcefile(int(splitted[0], 16), splitted[1])
    199 
    200   @staticmethod
    201   def _parse_nm_bsd_line(line):
    202     if line[8] == ' ':
    203       return line[0:8], line[9], line[11:]
    204     elif line[16] == ' ':
    205       return line[0:16], line[17], line[19:]
    206     raise ParsingException('Invalid nm output.')
    207 
    208   @staticmethod
    209   def _get_short_function_name(function):
    210     while True:
    211       function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
    212       if not number:
    213         break
    214     while True:
    215       function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
    216       if not number:
    217         break
    218     return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
    219 
    220   def load_nm_bsd(self, f, mangled=False):
    221     last_start = 0
    222     routine = ''
    223 
    224     for line in f:
    225       line = line.rstrip()
    226       sym_value, sym_type, sym_name = self._parse_nm_bsd_line(line)
    227 
    228       if sym_value[0] == ' ':
    229         continue
    230 
    231       start_val = int(sym_value, 16)
    232 
    233       if (sym_type in ('r', 'R', 'D', 'U', 'd', 'V') and
    234           (not mangled and sym_name.startswith('typeinfo'))):
    235         self._append_typeinfo(start_val, sym_name)
    236 
    237       # It's possible for two symbols to share the same address, if
    238       # one is a zero-length variable (like __start_google_malloc) or
    239       # one symbol is a weak alias to another (like __libc_malloc).
    240       # In such cases, we want to ignore all values except for the
    241       # actual symbol, which in nm-speak has type "T".  The logic
    242       # below does this, though it's a bit tricky: what happens when
    243       # we have a series of lines with the same address, is the first
    244       # one gets queued up to be processed.  However, it won't
    245       # *actually* be processed until later, when we read a line with
    246       # a different address.  That means that as long as we're reading
    247       # lines with the same address, we have a chance to replace that
    248       # item in the queue, which we do whenever we see a 'T' entry --
    249       # that is, a line with type 'T'.  If we never see a 'T' entry,
    250       # we'll just go ahead and process the first entry (which never
    251       # got touched in the queue), and ignore the others.
    252       if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
    253         # We are the 'T' symbol at this address, replace previous symbol.
    254         routine = sym_name
    255         continue
    256       elif start_val == last_start:
    257         # We're not the 'T' symbol at this address, so ignore us.
    258         continue
    259 
    260       # Tag this routine with the starting address in case the image
    261       # has multiple occurrences of this routine.  We use a syntax
    262       # that resembles template paramters that are automatically
    263       # stripped out by ShortFunctionName()
    264       sym_name += "<%016x>" % start_val
    265 
    266       if not mangled:
    267         routine = self._get_short_function_name(routine)
    268       self._append_procedure(
    269           last_start, Procedure(last_start, start_val, routine))
    270 
    271       last_start = start_val
    272       routine = sym_name
    273 
    274     if not mangled:
    275       routine = self._get_short_function_name(routine)
    276     self._append_procedure(
    277         last_start, Procedure(last_start, last_start, routine))
    278