Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
      3 #
      4 #                     The LLVM Compiler Infrastructure
      5 #
      6 # This file is distributed under the University of Illinois Open Source
      7 # License. See LICENSE.TXT for details.
      8 #
      9 #===------------------------------------------------------------------------===#
     10 import bisect
     11 import getopt
     12 import os
     13 import pty
     14 import re
     15 import subprocess
     16 import sys
     17 import termios
     18 
     19 llvm_symbolizer = None
     20 symbolizers = {}
     21 DEBUG = False
     22 demangle = False;
     23 
     24 
     25 # FIXME: merge the code that calls fix_filename().
     26 def fix_filename(file_name):
     27   for path_to_cut in sys.argv[1:]:
     28     file_name = re.sub('.*' + path_to_cut, '', file_name)
     29   file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
     30   file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
     31   return file_name
     32 
     33 
     34 class Symbolizer(object):
     35   def __init__(self):
     36     pass
     37 
     38   def symbolize(self, addr, binary, offset):
     39     """Symbolize the given address (pair of binary and offset).
     40 
     41     Overriden in subclasses.
     42     Args:
     43         addr: virtual address of an instruction.
     44         binary: path to executable/shared object containing this instruction.
     45         offset: instruction offset in the @binary.
     46     Returns:
     47         list of strings (one string for each inlined frame) describing
     48         the code locations for this instruction (that is, function name, file
     49         name, line and column numbers).
     50     """
     51     return None
     52 
     53 
     54 class LLVMSymbolizer(Symbolizer):
     55   def __init__(self, symbolizer_path):
     56     super(LLVMSymbolizer, self).__init__()
     57     self.symbolizer_path = symbolizer_path
     58     self.pipe = self.open_llvm_symbolizer()
     59 
     60   def open_llvm_symbolizer(self):
     61     if not os.path.exists(self.symbolizer_path):
     62       return None
     63     cmd = [self.symbolizer_path,
     64            '--use-symbol-table=true',
     65            '--demangle=%s' % demangle,
     66            '--functions=true',
     67            '--inlining=true']
     68     if DEBUG:
     69       print ' '.join(cmd)
     70     return subprocess.Popen(cmd, stdin=subprocess.PIPE,
     71                             stdout=subprocess.PIPE)
     72 
     73   def symbolize(self, addr, binary, offset):
     74     """Overrides Symbolizer.symbolize."""
     75     if not self.pipe:
     76       return None
     77     result = []
     78     try:
     79       symbolizer_input = '%s %s' % (binary, offset)
     80       if DEBUG:
     81         print symbolizer_input
     82       print >> self.pipe.stdin, symbolizer_input
     83       while True:
     84         function_name = self.pipe.stdout.readline().rstrip()
     85         if not function_name:
     86           break
     87         file_name = self.pipe.stdout.readline().rstrip()
     88         file_name = fix_filename(file_name)
     89         if (not function_name.startswith('??') and
     90             not file_name.startswith('??')):
     91           # Append only valid frames.
     92           result.append('%s in %s %s' % (addr, function_name,
     93                                          file_name))
     94     except Exception:
     95       result = []
     96     if not result:
     97       result = None
     98     return result
     99 
    100 
    101 def LLVMSymbolizerFactory(system):
    102   symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
    103   if not symbolizer_path:
    104     # Assume llvm-symbolizer is in PATH.
    105     symbolizer_path = 'llvm-symbolizer'
    106   return LLVMSymbolizer(symbolizer_path)
    107 
    108 
    109 class Addr2LineSymbolizer(Symbolizer):
    110   def __init__(self, binary):
    111     super(Addr2LineSymbolizer, self).__init__()
    112     self.binary = binary
    113     self.pipe = self.open_addr2line()
    114 
    115   def open_addr2line(self):
    116     cmd = ['addr2line', '-f']
    117     if demangle:
    118       cmd += ['--demangle']
    119     cmd += ['-e', self.binary]
    120     if DEBUG:
    121       print ' '.join(cmd)
    122     return subprocess.Popen(cmd,
    123                             stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    124 
    125   def symbolize(self, addr, binary, offset):
    126     """Overrides Symbolizer.symbolize."""
    127     if self.binary != binary:
    128       return None
    129     try:
    130       print >> self.pipe.stdin, offset
    131       function_name = self.pipe.stdout.readline().rstrip()
    132       file_name = self.pipe.stdout.readline().rstrip()
    133     except Exception:
    134       function_name = ''
    135       file_name = ''
    136     file_name = fix_filename(file_name)
    137     return ['%s in %s %s' % (addr, function_name, file_name)]
    138 
    139 
    140 class UnbufferedLineConverter(object):
    141   """
    142   Wrap a child process that responds to each line of input with one line of
    143   output.  Uses pty to trick the child into providing unbuffered output.
    144   """
    145   def __init__(self, args, close_stderr=False):
    146     pid, fd = pty.fork()
    147     if pid == 0:
    148       # We're the child. Transfer control to command.
    149       if close_stderr:
    150         dev_null = os.open('/dev/null', 0)
    151         os.dup2(dev_null, 2)
    152       os.execvp(args[0], args)
    153     else:
    154       # Disable echoing.
    155       attr = termios.tcgetattr(fd)
    156       attr[3] = attr[3] & ~termios.ECHO
    157       termios.tcsetattr(fd, termios.TCSANOW, attr)
    158       # Set up a file()-like interface to the child process
    159       self.r = os.fdopen(fd, "r", 1)
    160       self.w = os.fdopen(os.dup(fd), "w", 1)
    161 
    162   def convert(self, line):
    163     self.w.write(line + "\n")
    164     return self.readline()
    165 
    166   def readline(self):
    167     return self.r.readline().rstrip()
    168 
    169 
    170 class DarwinSymbolizer(Symbolizer):
    171   def __init__(self, addr, binary):
    172     super(DarwinSymbolizer, self).__init__()
    173     self.binary = binary
    174     # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
    175     if len(addr) > 10:
    176       self.arch = 'x86_64'
    177     else:
    178       self.arch = 'i386'
    179     self.open_atos()
    180 
    181   def open_atos(self):
    182     if DEBUG:
    183       print 'atos -o %s -arch %s' % (self.binary, self.arch)
    184     cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
    185     self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
    186 
    187   def symbolize(self, addr, binary, offset):
    188     """Overrides Symbolizer.symbolize."""
    189     if self.binary != binary:
    190       return None
    191     atos_line = self.atos.convert('0x%x' % int(offset, 16))
    192     while "got symbolicator for" in atos_line:
    193       atos_line = self.atos.readline()
    194     # A well-formed atos response looks like this:
    195     #   foo(type1, type2) (in object.name) (filename.cc:80)
    196     match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
    197     if DEBUG:
    198       print 'atos_line: ', atos_line
    199     if match:
    200       function_name = match.group(1)
    201       function_name = re.sub('\(.*?\)', '', function_name)
    202       file_name = fix_filename(match.group(3))
    203       return ['%s in %s %s' % (addr, function_name, file_name)]
    204     else:
    205       return ['%s in %s' % (addr, atos_line)]
    206 
    207 
    208 # Chain several symbolizers so that if one symbolizer fails, we fall back
    209 # to the next symbolizer in chain.
    210 class ChainSymbolizer(Symbolizer):
    211   def __init__(self, symbolizer_list):
    212     super(ChainSymbolizer, self).__init__()
    213     self.symbolizer_list = symbolizer_list
    214 
    215   def symbolize(self, addr, binary, offset):
    216     """Overrides Symbolizer.symbolize."""
    217     for symbolizer in self.symbolizer_list:
    218       if symbolizer:
    219         result = symbolizer.symbolize(addr, binary, offset)
    220         if result:
    221           return result
    222     return None
    223 
    224   def append_symbolizer(self, symbolizer):
    225     self.symbolizer_list.append(symbolizer)
    226 
    227 
    228 def BreakpadSymbolizerFactory(binary):
    229   suffix = os.getenv('BREAKPAD_SUFFIX')
    230   if suffix:
    231     filename = binary + suffix
    232     if os.access(filename, os.F_OK):
    233       return BreakpadSymbolizer(filename)
    234   return None
    235 
    236 
    237 def SystemSymbolizerFactory(system, addr, binary):
    238   if system == 'Darwin':
    239     return DarwinSymbolizer(addr, binary)
    240   elif system == 'Linux':
    241     return Addr2LineSymbolizer(binary)
    242 
    243 
    244 class BreakpadSymbolizer(Symbolizer):
    245   def __init__(self, filename):
    246     super(BreakpadSymbolizer, self).__init__()
    247     self.filename = filename
    248     lines = file(filename).readlines()
    249     self.files = []
    250     self.symbols = {}
    251     self.address_list = []
    252     self.addresses = {}
    253     # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
    254     fragments = lines[0].rstrip().split()
    255     self.arch = fragments[2]
    256     self.debug_id = fragments[3]
    257     self.binary = ' '.join(fragments[4:])
    258     self.parse_lines(lines[1:])
    259 
    260   def parse_lines(self, lines):
    261     cur_function_addr = ''
    262     for line in lines:
    263       fragments = line.split()
    264       if fragments[0] == 'FILE':
    265         assert int(fragments[1]) == len(self.files)
    266         self.files.append(' '.join(fragments[2:]))
    267       elif fragments[0] == 'PUBLIC':
    268         self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
    269       elif fragments[0] in ['CFI', 'STACK']:
    270         pass
    271       elif fragments[0] == 'FUNC':
    272         cur_function_addr = int(fragments[1], 16)
    273         if not cur_function_addr in self.symbols.keys():
    274           self.symbols[cur_function_addr] = ' '.join(fragments[4:])
    275       else:
    276         # Line starting with an address.
    277         addr = int(fragments[0], 16)
    278         self.address_list.append(addr)
    279         # Tuple of symbol address, size, line, file number.
    280         self.addresses[addr] = (cur_function_addr,
    281                                 int(fragments[1], 16),
    282                                 int(fragments[2]),
    283                                 int(fragments[3]))
    284     self.address_list.sort()
    285 
    286   def get_sym_file_line(self, addr):
    287     key = None
    288     if addr in self.addresses.keys():
    289       key = addr
    290     else:
    291       index = bisect.bisect_left(self.address_list, addr)
    292       if index == 0:
    293         return None
    294       else:
    295         key = self.address_list[index - 1]
    296     sym_id, size, line_no, file_no = self.addresses[key]
    297     symbol = self.symbols[sym_id]
    298     filename = self.files[file_no]
    299     if addr < key + size:
    300       return symbol, filename, line_no
    301     else:
    302       return None
    303 
    304   def symbolize(self, addr, binary, offset):
    305     if self.binary != binary:
    306       return None
    307     res = self.get_sym_file_line(int(offset, 16))
    308     if res:
    309       function_name, file_name, line_no = res
    310       result = ['%s in %s %s:%d' % (
    311           addr, function_name, file_name, line_no)]
    312       print result
    313       return result
    314     else:
    315       return None
    316 
    317 
    318 class SymbolizationLoop(object):
    319   def __init__(self, binary_name_filter=None):
    320     # Used by clients who may want to supply a different binary name.
    321     # E.g. in Chrome several binaries may share a single .dSYM.
    322     self.binary_name_filter = binary_name_filter
    323     self.system = os.uname()[0]
    324     if self.system in ['Linux', 'Darwin']:
    325       self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
    326     else:
    327       raise Exception('Unknown system')
    328 
    329   def symbolize_address(self, addr, binary, offset):
    330     # Use the chain of symbolizers:
    331     # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
    332     # (fall back to next symbolizer if the previous one fails).
    333     if not binary in symbolizers:
    334       symbolizers[binary] = ChainSymbolizer(
    335           [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
    336     result = symbolizers[binary].symbolize(addr, binary, offset)
    337     if result is None:
    338       # Initialize system symbolizer only if other symbolizers failed.
    339       symbolizers[binary].append_symbolizer(
    340           SystemSymbolizerFactory(self.system, addr, binary))
    341       result = symbolizers[binary].symbolize(addr, binary, offset)
    342     # The system symbolizer must produce some result.
    343     assert result
    344     return result
    345 
    346   def print_symbolized_lines(self, symbolized_lines):
    347     if not symbolized_lines:
    348       print self.current_line
    349     else:
    350       for symbolized_frame in symbolized_lines:
    351         print '    #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
    352         self.frame_no += 1
    353 
    354   def process_stdin(self):
    355     self.frame_no = 0
    356     while True:
    357       line = sys.stdin.readline()
    358       if not line:
    359         break
    360       self.current_line = line.rstrip()
    361       #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
    362       stack_trace_line_format = (
    363           '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
    364       match = re.match(stack_trace_line_format, line)
    365       if not match:
    366         print self.current_line
    367         continue
    368       if DEBUG:
    369         print line
    370       _, frameno_str, addr, binary, offset = match.groups()
    371       if frameno_str == '0':
    372         # Assume that frame #0 is the first frame of new stack trace.
    373         self.frame_no = 0
    374       original_binary = binary
    375       if self.binary_name_filter:
    376         binary = self.binary_name_filter(binary)
    377       symbolized_line = self.symbolize_address(addr, binary, offset)
    378       if not symbolized_line:
    379         if original_binary != binary:
    380           symbolized_line = self.symbolize_address(addr, binary, offset)
    381       self.print_symbolized_lines(symbolized_line)
    382 
    383 
    384 if __name__ == '__main__':
    385   opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"])
    386   for o, a in opts:
    387     if o in ("-d", "--demangle"):
    388       demangle = True;
    389   loop = SymbolizationLoop()
    390   loop.process_stdin()
    391