Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
      3 #
      4 #                     The LLVM Compiler Infrastructure
      5 #
      6 # This file is distributed under the University of Illinois Open Source
      7 # License. See LICENSE.TXT for details.
      8 #
      9 #===------------------------------------------------------------------------===#
     10 import bisect
     11 import getopt
     12 import os
     13 import re
     14 import subprocess
     15 import sys
     16 
     17 llvm_symbolizer = None
     18 symbolizers = {}
     19 filetypes = {}
     20 vmaddrs = {}
     21 DEBUG = False
     22 demangle = False;
     23 
     24 
     25 # FIXME: merge the code that calls fix_filename().
     26 def fix_filename(file_name):
     27   for path_to_cut in sys.argv[1:]:
     28     file_name = re.sub('.*' + path_to_cut, '', file_name)
     29   file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
     30   file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
     31   return file_name
     32 
     33 
     34 class Symbolizer(object):
     35   def __init__(self):
     36     pass
     37 
     38   def symbolize(self, addr, binary, offset):
     39     """Symbolize the given address (pair of binary and offset).
     40 
     41     Overriden in subclasses.
     42     Args:
     43         addr: virtual address of an instruction.
     44         binary: path to executable/shared object containing this instruction.
     45         offset: instruction offset in the @binary.
     46     Returns:
     47         list of strings (one string for each inlined frame) describing
     48         the code locations for this instruction (that is, function name, file
     49         name, line and column numbers).
     50     """
     51     return None
     52 
     53 
     54 class LLVMSymbolizer(Symbolizer):
     55   def __init__(self, symbolizer_path):
     56     super(LLVMSymbolizer, self).__init__()
     57     self.symbolizer_path = symbolizer_path
     58     self.pipe = self.open_llvm_symbolizer()
     59 
     60   def open_llvm_symbolizer(self):
     61     if not os.path.exists(self.symbolizer_path):
     62       return None
     63     cmd = [self.symbolizer_path,
     64            '--use-symbol-table=true',
     65            '--demangle=%s' % demangle,
     66            '--functions=true',
     67            '--inlining=true']
     68     if DEBUG:
     69       print ' '.join(cmd)
     70     return subprocess.Popen(cmd, stdin=subprocess.PIPE,
     71                             stdout=subprocess.PIPE)
     72 
     73   def symbolize(self, addr, binary, offset):
     74     """Overrides Symbolizer.symbolize."""
     75     if not self.pipe:
     76       return None
     77     result = []
     78     try:
     79       symbolizer_input = '%s %s' % (binary, offset)
     80       if DEBUG:
     81         print symbolizer_input
     82       print >> self.pipe.stdin, symbolizer_input
     83       while True:
     84         function_name = self.pipe.stdout.readline().rstrip()
     85         if not function_name:
     86           break
     87         file_name = self.pipe.stdout.readline().rstrip()
     88         file_name = fix_filename(file_name)
     89         if (not function_name.startswith('??') and
     90             not file_name.startswith('??')):
     91           # Append only valid frames.
     92           result.append('%s in %s %s' % (addr, function_name,
     93                                          file_name))
     94     except Exception:
     95       result = []
     96     if not result:
     97       result = None
     98     return result
     99 
    100 
    101 def LLVMSymbolizerFactory(system):
    102   symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
    103   if not symbolizer_path:
    104     # Assume llvm-symbolizer is in PATH.
    105     symbolizer_path = 'llvm-symbolizer'
    106   return LLVMSymbolizer(symbolizer_path)
    107 
    108 
    109 class Addr2LineSymbolizer(Symbolizer):
    110   def __init__(self, binary):
    111     super(Addr2LineSymbolizer, self).__init__()
    112     self.binary = binary
    113     self.pipe = self.open_addr2line()
    114 
    115   def open_addr2line(self):
    116     cmd = ['addr2line', '-f']
    117     if demangle:
    118       cmd += ['--demangle']
    119     cmd += ['-e', self.binary]
    120     if DEBUG:
    121       print ' '.join(cmd)
    122     return subprocess.Popen(cmd,
    123                             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    124 
    125   def symbolize(self, addr, binary, offset):
    126     """Overrides Symbolizer.symbolize."""
    127     if self.binary != binary:
    128       return None
    129     try:
    130       print >> self.pipe.stdin, offset
    131       function_name = self.pipe.stdout.readline().rstrip()
    132       file_name = self.pipe.stdout.readline().rstrip()
    133     except Exception:
    134       function_name = ''
    135       file_name = ''
    136     file_name = fix_filename(file_name)
    137     return ['%s in %s %s' % (addr, function_name, file_name)]
    138 
    139 
    140 class DarwinSymbolizer(Symbolizer):
    141   def __init__(self, addr, binary):
    142     super(DarwinSymbolizer, self).__init__()
    143     self.binary = binary
    144     # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
    145     if len(addr) > 10:
    146       self.arch = 'x86_64'
    147     else:
    148       self.arch = 'i386'
    149     self.vmaddr = None
    150     self.pipe = None
    151 
    152   def write_addr_to_pipe(self, offset):
    153     print >> self.pipe.stdin, '0x%x' % int(offset, 16)
    154 
    155   def open_atos(self):
    156     if DEBUG:
    157       print 'atos -o %s -arch %s' % (self.binary, self.arch)
    158     cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
    159     self.pipe = subprocess.Popen(cmdline,
    160                                  stdin=subprocess.PIPE,
    161                                  stdout=subprocess.PIPE,
    162                                  stderr=subprocess.PIPE)
    163 
    164   def symbolize(self, addr, binary, offset):
    165     """Overrides Symbolizer.symbolize."""
    166     if self.binary != binary:
    167       return None
    168     self.open_atos()
    169     self.write_addr_to_pipe(offset)
    170     self.pipe.stdin.close()
    171     atos_line = self.pipe.stdout.readline().rstrip()
    172     # A well-formed atos response looks like this:
    173     #   foo(type1, type2) (in object.name) (filename.cc:80)
    174     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
    175     if DEBUG:
    176       print 'atos_line: ', atos_line
    177     if match:
    178       function_name = match.group(1)
    179       function_name = re.sub('\(.*?\)', '', function_name)
    180       file_name = fix_filename(match.group(3))
    181       return ['%s in %s %s' % (addr, function_name, file_name)]
    182     else:
    183       return ['%s in %s' % (addr, atos_line)]
    184 
    185 
    186 # Chain several symbolizers so that if one symbolizer fails, we fall back
    187 # to the next symbolizer in chain.
    188 class ChainSymbolizer(Symbolizer):
    189   def __init__(self, symbolizer_list):
    190     super(ChainSymbolizer, self).__init__()
    191     self.symbolizer_list = symbolizer_list
    192 
    193   def symbolize(self, addr, binary, offset):
    194     """Overrides Symbolizer.symbolize."""
    195     for symbolizer in self.symbolizer_list:
    196       if symbolizer:
    197         result = symbolizer.symbolize(addr, binary, offset)
    198         if result:
    199           return result
    200     return None
    201 
    202   def append_symbolizer(self, symbolizer):
    203     self.symbolizer_list.append(symbolizer)
    204 
    205 
    206 def BreakpadSymbolizerFactory(binary):
    207   suffix = os.getenv('BREAKPAD_SUFFIX')
    208   if suffix:
    209     filename = binary + suffix
    210     if os.access(filename, os.F_OK):
    211       return BreakpadSymbolizer(filename)
    212   return None
    213 
    214 
    215 def SystemSymbolizerFactory(system, addr, binary):
    216   if system == 'Darwin':
    217     return DarwinSymbolizer(addr, binary)
    218   elif system == 'Linux':
    219     return Addr2LineSymbolizer(binary)
    220 
    221 
    222 class BreakpadSymbolizer(Symbolizer):
    223   def __init__(self, filename):
    224     super(BreakpadSymbolizer, self).__init__()
    225     self.filename = filename
    226     lines = file(filename).readlines()
    227     self.files = []
    228     self.symbols = {}
    229     self.address_list = []
    230     self.addresses = {}
    231     # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
    232     fragments = lines[0].rstrip().split()
    233     self.arch = fragments[2]
    234     self.debug_id = fragments[3]
    235     self.binary = ' '.join(fragments[4:])
    236     self.parse_lines(lines[1:])
    237 
    238   def parse_lines(self, lines):
    239     cur_function_addr = ''
    240     for line in lines:
    241       fragments = line.split()
    242       if fragments[0] == 'FILE':
    243         assert int(fragments[1]) == len(self.files)
    244         self.files.append(' '.join(fragments[2:]))
    245       elif fragments[0] == 'PUBLIC':
    246         self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
    247       elif fragments[0] in ['CFI', 'STACK']:
    248         pass
    249       elif fragments[0] == 'FUNC':
    250         cur_function_addr = int(fragments[1], 16)
    251         if not cur_function_addr in self.symbols.keys():
    252           self.symbols[cur_function_addr] = ' '.join(fragments[4:])
    253       else:
    254         # Line starting with an address.
    255         addr = int(fragments[0], 16)
    256         self.address_list.append(addr)
    257         # Tuple of symbol address, size, line, file number.
    258         self.addresses[addr] = (cur_function_addr,
    259                                 int(fragments[1], 16),
    260                                 int(fragments[2]),
    261                                 int(fragments[3]))
    262     self.address_list.sort()
    263 
    264   def get_sym_file_line(self, addr):
    265     key = None
    266     if addr in self.addresses.keys():
    267       key = addr
    268     else:
    269       index = bisect.bisect_left(self.address_list, addr)
    270       if index == 0:
    271         return None
    272       else:
    273         key = self.address_list[index - 1]
    274     sym_id, size, line_no, file_no = self.addresses[key]
    275     symbol = self.symbols[sym_id]
    276     filename = self.files[file_no]
    277     if addr < key + size:
    278       return symbol, filename, line_no
    279     else:
    280       return None
    281 
    282   def symbolize(self, addr, binary, offset):
    283     if self.binary != binary:
    284       return None
    285     res = self.get_sym_file_line(int(offset, 16))
    286     if res:
    287       function_name, file_name, line_no = res
    288       result = ['%s in %s %s:%d' % (
    289           addr, function_name, file_name, line_no)]
    290       print result
    291       return result
    292     else:
    293       return None
    294 
    295 
    296 class SymbolizationLoop(object):
    297   def __init__(self, binary_name_filter=None):
    298     # Used by clients who may want to supply a different binary name.
    299     # E.g. in Chrome several binaries may share a single .dSYM.
    300     self.binary_name_filter = binary_name_filter
    301     self.system = os.uname()[0]
    302     if self.system in ['Linux', 'Darwin']:
    303       self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
    304     else:
    305       raise Exception('Unknown system')
    306 
    307   def symbolize_address(self, addr, binary, offset):
    308     # Use the chain of symbolizers:
    309     # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
    310     # (fall back to next symbolizer if the previous one fails).
    311     if not binary in symbolizers:
    312       symbolizers[binary] = ChainSymbolizer(
    313           [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
    314     result = symbolizers[binary].symbolize(addr, binary, offset)
    315     if result is None:
    316       # Initialize system symbolizer only if other symbolizers failed.
    317       symbolizers[binary].append_symbolizer(
    318           SystemSymbolizerFactory(self.system, addr, binary))
    319       result = symbolizers[binary].symbolize(addr, binary, offset)
    320     # The system symbolizer must produce some result.
    321     assert result
    322     return result
    323 
    324   def print_symbolized_lines(self, symbolized_lines):
    325     if not symbolized_lines:
    326       print self.current_line
    327     else:
    328       for symbolized_frame in symbolized_lines:
    329         print '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
    330         self.frame_no += 1
    331 
    332   def process_stdin(self):
    333     self.frame_no = 0
    334     for line in sys.stdin:
    335       self.current_line = line.rstrip()
    336       #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
    337       stack_trace_line_format = (
    338           '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
    339       match = re.match(stack_trace_line_format, line)
    340       if not match:
    341         print self.current_line
    342         continue
    343       if DEBUG:
    344         print line
    345       _, frameno_str, addr, binary, offset = match.groups()
    346       if frameno_str == '0':
    347         # Assume that frame #0 is the first frame of new stack trace.
    348         self.frame_no = 0
    349       original_binary = binary
    350       if self.binary_name_filter:
    351         binary = self.binary_name_filter(binary)
    352       symbolized_line = self.symbolize_address(addr, binary, offset)
    353       if not symbolized_line:
    354         if original_binary != binary:
    355           symbolized_line = self.symbolize_address(addr, binary, offset)
    356       self.print_symbolized_lines(symbolized_line)
    357 
    358 
    359 if __name__ == '__main__':
    360   opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"])
    361   for o, a in opts:
    362     if o in ("-d", "--demangle"):
    363       demangle = True;
    364   loop = SymbolizationLoop()
    365   loop.process_stdin()
    366