Home | History | Annotate | Download | only in python
      1 #!/usr/bin/python
      2 
      3 #----------------------------------------------------------------------
      4 # Be sure to add the python path that points to the LLDB shared library.
      5 #
      6 # To use this in the embedded python interpreter using "lldb":
      7 #
      8 #   cd /path/containing/crashlog.py
      9 #   lldb
     10 #   (lldb) script import crashlog
     11 #   "crashlog" command installed, type "crashlog --help" for detailed help
     12 #   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
     13 #
     14 # The benefit of running the crashlog command inside lldb in the 
     15 # embedded python interpreter is when the command completes, there 
     16 # will be a target with all of the files loaded at the locations
     17 # described in the crash log. Only the files that have stack frames
     18 # in the backtrace will be loaded unless the "--load-all" option
     19 # has been specified. This allows users to explore the program in the
     20 # state it was in right at crash time. 
     21 #
     22 # On MacOSX csh, tcsh:
     23 #   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
     24 #
     25 # On MacOSX sh, bash:
     26 #   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
     27 #----------------------------------------------------------------------
     28 
     29 import lldb
     30 import commands
     31 import optparse
     32 import os
     33 import plistlib
     34 import re
     35 import shlex
     36 import sys
     37 import time
     38 import uuid
     39 
     40 class Address:
     41     """Class that represents an address that will be symbolicated"""
     42     def __init__(self, target, load_addr):
     43         self.target = target
     44         self.load_addr = load_addr # The load address that this object represents
     45         self.so_addr = None # the resolved lldb.SBAddress (if any), named so_addr for section/offset address
     46         self.sym_ctx = None # The cached symbol context for this address
     47         self.description = None # Any original textual description of this address to be used as a backup in case symbolication fails
     48         self.symbolication = None # The cached symbolicated string that describes this address
     49         self.inlined = False
     50     def __str__(self):
     51         s = "%#16.16x" % (self.load_addr)
     52         if self.symbolication:
     53             s += " %s" % (self.symbolication)
     54         elif self.description:
     55             s += " %s" % (self.description)
     56         elif self.so_addr:
     57             s += " %s" % (self.so_addr)
     58         return s
     59 
     60     def resolve_addr(self):
     61         if self.so_addr == None:
     62             self.so_addr = self.target.ResolveLoadAddress (self.load_addr)
     63         return self.so_addr
     64 
     65     def is_inlined(self):
     66         return self.inlined
     67     
     68     def get_symbol_context(self):
     69         if self.sym_ctx == None:
     70             sb_addr = self.resolve_addr()
     71             if sb_addr:
     72                 self.sym_ctx = self.target.ResolveSymbolContextForAddress (sb_addr, lldb.eSymbolContextEverything)
     73             else:
     74                 self.sym_ctx = lldb.SBSymbolContext()
     75         return self.sym_ctx
     76 
     77     def get_instructions(self):
     78         sym_ctx = self.get_symbol_context()
     79         if sym_ctx:
     80             function = sym_ctx.GetFunction()
     81             if function:
     82                 return function.GetInstructions(self.target)
     83             return sym_ctx.GetSymbol().GetInstructions(self.target)
     84         return None
     85     
     86     def symbolicate(self, verbose = False):
     87         if self.symbolication == None:
     88             self.symbolication = ''
     89             self.inlined = False
     90             sym_ctx = self.get_symbol_context()
     91             if sym_ctx:
     92                 module = sym_ctx.GetModule()
     93                 if module:
     94                     # Print full source file path in verbose mode
     95                     if verbose:
     96                         self.symbolication += str(module.GetFileSpec()) + '`'
     97                     else:
     98                         self.symbolication += module.GetFileSpec().GetFilename() + '`'
     99                     function_start_load_addr = -1
    100                     function = sym_ctx.GetFunction()
    101                     block = sym_ctx.GetBlock()
    102                     line_entry = sym_ctx.GetLineEntry()
    103                     symbol = sym_ctx.GetSymbol()
    104                     inlined_block = block.GetContainingInlinedBlock();
    105                     if function:
    106                         self.symbolication += function.GetName()
    107 
    108                         if inlined_block:
    109                             self.inlined = True
    110                             self.symbolication += ' [inlined] ' + inlined_block.GetInlinedName();
    111                             block_range_idx = inlined_block.GetRangeIndexForBlockAddress (self.so_addr)
    112                             if block_range_idx < lldb.UINT32_MAX:
    113                                 block_range_start_addr = inlined_block.GetRangeStartAddress (block_range_idx)
    114                                 function_start_load_addr = block_range_start_addr.GetLoadAddress (self.target)
    115                         if function_start_load_addr == -1:
    116                             function_start_load_addr = function.GetStartAddress().GetLoadAddress (self.target)
    117                     elif symbol:
    118                         self.symbolication += symbol.GetName()
    119                         function_start_load_addr = symbol.GetStartAddress().GetLoadAddress (self.target)
    120                     else:
    121                         self.symbolication = ''
    122                         return False
    123 
    124                     # Dump the offset from the current function or symbol if it is non zero
    125                     function_offset = self.load_addr - function_start_load_addr
    126                     if function_offset > 0:
    127                         self.symbolication += " + %u" % (function_offset)
    128                     elif function_offset < 0:
    129                         self.symbolication += " %i (invalid negative offset, file a bug) " % function_offset
    130 
    131                     # Print out any line information if any is available
    132                     if line_entry.GetFileSpec():
    133                         # Print full source file path in verbose mode
    134                         if verbose:
    135                             self.symbolication += ' at %s' % line_entry.GetFileSpec()
    136                         else:
    137                             self.symbolication += ' at %s' % line_entry.GetFileSpec().GetFilename()
    138                         self.symbolication += ':%u' % line_entry.GetLine ()
    139                         column = line_entry.GetColumn()
    140                         if column > 0:
    141                             self.symbolication += ':%u' % column
    142                     return True
    143         return False
    144 
    145 class Section:
    146     """Class that represents an load address range"""
    147     sect_info_regex = re.compile('(?P<name>[^=]+)=(?P<range>.*)')
    148     addr_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*$')
    149     range_regex = re.compile('^\s*(?P<start>0x[0-9A-Fa-f]+)\s*(?P<op>[-+])\s*(?P<end>0x[0-9A-Fa-f]+)\s*$')
    150 
    151     def __init__(self, start_addr = None, end_addr = None, name = None):
    152         self.start_addr = start_addr
    153         self.end_addr = end_addr
    154         self.name = name
    155     
    156     def contains(self, addr):
    157         return self.start_addr <= addr and addr < self.end_addr;
    158     
    159     def set_from_string(self, s):
    160         match = self.sect_info_regex.match (s)
    161         if match:
    162             self.name = match.group('name')
    163             range_str = match.group('range')
    164             addr_match = self.addr_regex.match(range_str)
    165             if addr_match:
    166                 self.start_addr = int(addr_match.group('start'), 16)
    167                 self.end_addr = None
    168                 return True
    169 
    170             range_match = self.range_regex.match(range_str)
    171             if range_match:
    172                 self.start_addr = int(range_match.group('start'), 16)
    173                 self.end_addr = int(range_match.group('end'), 16)
    174                 op = range_match.group('op')
    175                 if op == '+':
    176                     self.end_addr += self.start_addr
    177                 return True
    178         print 'error: invalid section info string "%s"' % s
    179         print 'Valid section info formats are:'
    180         print 'Format                Example                    Description'
    181         print '--------------------- -----------------------------------------------'
    182         print '<name>=<base>        __TEXT=0x123000             Section from base address only'
    183         print '<name>=<base>-<end>  __TEXT=0x123000-0x124000    Section from base address and end address'
    184         print '<name>=<base>+<size> __TEXT=0x123000+0x1000      Section from base address and size'
    185         return False
    186         
    187     def __str__(self):
    188         if self.name:
    189             if self.end_addr != None:
    190                 if self.start_addr != None:
    191                     return "%s=[0x%16.16x - 0x%16.16x)" % (self.name, self.start_addr, self.end_addr)
    192             else:
    193                 if self.start_addr != None:
    194                     return "%s=0x%16.16x" % (self.name, self.start_addr)
    195             return self.name
    196         return "<invalid>"
    197             
    198 class Image:
    199     """A class that represents an executable image and any associated data"""
    200     
    201     def __init__(self, path, uuid = None):
    202         self.path = path
    203         self.resolved_path = None
    204         self.resolved = False
    205         self.unavailable = False
    206         self.uuid = uuid
    207         self.section_infos = list()
    208         self.identifier = None
    209         self.version = None
    210         self.arch = None
    211         self.module = None
    212         self.symfile = None
    213         self.slide = None
    214         
    215     
    216     def dump(self, prefix):
    217         print "%s%s" % (prefix, self)
    218 
    219     def debug_dump(self):
    220         print 'path = "%s"' % (self.path)
    221         print 'resolved_path = "%s"' % (self.resolved_path)
    222         print 'resolved = %i' % (self.resolved)
    223         print 'unavailable = %i' % (self.unavailable)
    224         print 'uuid = %s' % (self.uuid)
    225         print 'section_infos = %s' % (self.section_infos)
    226         print 'identifier = "%s"' % (self.identifier)
    227         print 'version = %s' % (self.version)
    228         print 'arch = %s' % (self.arch)
    229         print 'module = %s' % (self.module)
    230         print 'symfile = "%s"' % (self.symfile)
    231         print 'slide = %i (0x%x)' % (self.slide, self.slide)
    232         
    233     def __str__(self):
    234         s = "%s %s %s" % (self.get_uuid(), self.version, self.get_resolved_path())
    235         for section_info in self.section_infos:
    236             s += ", %s" % (section_info)
    237         if self.slide != None:
    238             s += ', slide = 0x%16.16x' % self.slide
    239         return s        
    240     
    241     def add_section(self, section):
    242         #print "added '%s' to '%s'" % (section, self.path)
    243         self.section_infos.append (section)
    244         
    245     def get_section_containing_load_addr (self, load_addr):
    246         for section_info in self.section_infos:
    247             if section_info.contains(load_addr):
    248                 return section_info
    249         return None
    250 
    251     def get_resolved_path(self):
    252         if self.resolved_path:
    253             return self.resolved_path
    254         elif self.path:
    255             return self.path
    256         return None
    257 
    258     def get_resolved_path_basename(self):
    259         path = self.get_resolved_path()
    260         if path:
    261             return os.path.basename(path)
    262         return None
    263 
    264     def symfile_basename(self):
    265         if self.symfile:
    266             return os.path.basename(self.symfile)
    267         return None
    268     
    269     def has_section_load_info(self):
    270         return self.section_infos or self.slide != None
    271     
    272     def load_module(self, target):
    273         if self.unavailable:
    274             return None # We already warned that we couldn't find this module, so don't return an error string
    275         # Load this module into "target" using the section infos to
    276         # set the section load addresses
    277         if self.has_section_load_info():
    278             if target:
    279                 if self.module:
    280                     if self.section_infos:
    281                         num_sections_loaded = 0
    282                         for section_info in self.section_infos:
    283                             if section_info.name:
    284                                 section = self.module.FindSection (section_info.name)
    285                                 if section:
    286                                     error = target.SetSectionLoadAddress (section, section_info.start_addr)
    287                                     if error.Success():
    288                                         num_sections_loaded += 1
    289                                     else:
    290                                         return 'error: %s' % error.GetCString()
    291                                 else:
    292                                     return 'error: unable to find the section named "%s"' % section_info.name
    293                             else:
    294                                 return 'error: unable to find "%s" section in "%s"' % (range.name, self.get_resolved_path())
    295                         if num_sections_loaded == 0:
    296                             return 'error: no sections were successfully loaded'
    297                     else:
    298                         err = target.SetModuleLoadAddress(self.module, self.slide)
    299                         if err.Fail():
    300                             return err.GetCString()
    301                     return None
    302                 else:
    303                     return 'error: invalid module'
    304             else:
    305                 return 'error: invalid target'
    306         else:
    307             return 'error: no section infos'
    308         
    309     def add_module(self, target):
    310         '''Add the Image described in this object to "target" and load the sections if "load" is True.'''
    311         if target:
    312             # Try and find using UUID only first so that paths need not match up
    313             uuid_str = self.get_normalized_uuid_string()
    314             if uuid_str:
    315                 self.module = target.AddModule (None, None, uuid_str)
    316             if not self.module:
    317                 self.locate_module_and_debug_symbols ()
    318                 if self.unavailable:
    319                     return None
    320                 resolved_path = self.get_resolved_path()
    321                 self.module = target.AddModule (resolved_path, self.arch, uuid_str, self.symfile)
    322             if not self.module:
    323                 return 'error: unable to get module for (%s) "%s"' % (self.arch, self.get_resolved_path())
    324             if self.has_section_load_info():
    325                 return self.load_module(target)
    326             else:
    327                 return None # No sections, the module was added to the target, so success
    328         else:
    329             return 'error: invalid target'
    330     
    331     def locate_module_and_debug_symbols (self):
    332         # By default, just use the paths that were supplied in:
    333         # self.path
    334         # self.resolved_path
    335         # self.module
    336         # self.symfile
    337         # Subclasses can inherit from this class and override this function
    338         self.resolved = True
    339         return True
    340     
    341     def get_uuid(self):
    342         if not self.uuid and self.module:
    343             self.uuid = uuid.UUID(self.module.GetUUIDString())
    344         return self.uuid
    345 
    346     def get_normalized_uuid_string(self):
    347         if self.uuid:
    348             return str(self.uuid).upper()
    349         return None
    350 
    351     def create_target(self):
    352         '''Create a target using the information in this Image object.'''
    353         if self.unavailable:
    354             return None
    355 
    356         if self.locate_module_and_debug_symbols ():
    357             resolved_path = self.get_resolved_path();
    358             path_spec = lldb.SBFileSpec (resolved_path)
    359             #result.PutCString ('plist[%s] = %s' % (uuid, self.plist))
    360             error = lldb.SBError()
    361             target = lldb.debugger.CreateTarget (resolved_path, self.arch, None, False, error);
    362             if target:
    363                 self.module = target.FindModule(path_spec)
    364                 if self.has_section_load_info():
    365                     err = self.load_module(target)
    366                     if err:
    367                         print 'ERROR: ', err
    368                 return target
    369             else:
    370                 print 'error: unable to create a valid target for (%s) "%s"' % (self.arch, self.path)
    371         else:
    372             print 'error: unable to locate main executable (%s) "%s"' % (self.arch, self.path)
    373         return None
    374     
    375 class Symbolicator:
    376 
    377     def __init__(self):
    378         """A class the represents the information needed to symbolicate addresses in a program"""
    379         self.target = None
    380         self.images = list() # a list of images to be used when symbolicating
    381         self.addr_mask = 0xffffffffffffffff
    382     
    383     def __str__(self):
    384         s = "Symbolicator:\n"
    385         if self.target:
    386             s += "Target = '%s'\n" % (self.target)
    387             s += "Target modules:'\n"
    388             for m in self.target.modules:
    389                 print m
    390         s += "Images:\n"
    391         for image in self.images:
    392             s += '    %s\n' % (image)
    393         return s
    394     
    395     def find_images_with_identifier(self, identifier):
    396         images = list()
    397         for image in self.images:
    398             if image.identifier == identifier:
    399                 images.append(image)
    400         return images
    401         
    402     def find_image_containing_load_addr(self, load_addr):
    403         for image in self.images:
    404             if image.get_section_containing_load_addr (load_addr):
    405                 return image
    406         return None
    407     
    408     def create_target(self):
    409         if self.target:
    410             return self.target
    411 
    412         if self.images:
    413             for image in self.images:
    414                 self.target = image.create_target ()
    415                 if self.target:
    416                     if self.target.GetAddressByteSize() == 4:
    417                         triple = self.target.triple
    418                         if triple:
    419                             arch = triple.split('-')[0]
    420                             if "arm" in arch:
    421                                 self.addr_mask = 0xfffffffffffffffe
    422                     return self.target
    423         return None
    424     
    425     def symbolicate(self, load_addr, verbose = False):
    426         if not self.target:
    427             self.create_target()
    428         if self.target:
    429             live_process = False
    430             process = self.target.process
    431             if process:
    432                 state = process.state
    433                 if state > lldb.eStateUnloaded and state < lldb.eStateDetached:
    434                     live_process = True
    435             # If we don't have a live process, we can attempt to find the image
    436             # that a load address belongs to and lazily load its module in the
    437             # target, but we shouldn't do any of this if we have a live process
    438             if not live_process:
    439                 image = self.find_image_containing_load_addr (load_addr)
    440                 if image:
    441                     image.add_module (self.target)
    442             symbolicated_address = Address(self.target, load_addr)
    443             if symbolicated_address.symbolicate (verbose):
    444                 if symbolicated_address.so_addr:
    445                     symbolicated_addresses = list()
    446                     symbolicated_addresses.append(symbolicated_address)
    447                     # See if we were able to reconstruct anything?
    448                     while 1:
    449                         inlined_parent_so_addr = lldb.SBAddress()
    450                         inlined_parent_sym_ctx = symbolicated_address.sym_ctx.GetParentOfInlinedScope (symbolicated_address.so_addr, inlined_parent_so_addr)
    451                         if not inlined_parent_sym_ctx:
    452                             break
    453                         if not inlined_parent_so_addr:
    454                             break
    455 
    456                         symbolicated_address = Address(self.target, inlined_parent_so_addr.GetLoadAddress(self.target))
    457                         symbolicated_address.sym_ctx = inlined_parent_sym_ctx
    458                         symbolicated_address.so_addr = inlined_parent_so_addr
    459                         symbolicated_address.symbolicate (verbose)
    460             
    461                         # push the new frame onto the new frame stack
    462                         symbolicated_addresses.append (symbolicated_address)
    463         
    464                     if symbolicated_addresses:
    465                         return symbolicated_addresses
    466         else:
    467             print 'error: no target in Symbolicator'
    468         return None
    469             
    470         
    471 def disassemble_instructions (target, instructions, pc, insts_before_pc, insts_after_pc, non_zeroeth_frame):
    472     lines = list()
    473     pc_index = -1
    474     comment_column = 50
    475     for inst_idx, inst in enumerate(instructions):
    476         inst_pc = inst.GetAddress().GetLoadAddress(target);
    477         if pc == inst_pc:
    478             pc_index = inst_idx
    479         mnemonic = inst.GetMnemonic (target)
    480         operands =  inst.GetOperands (target)
    481         comment =  inst.GetComment (target)
    482         #data = inst.GetData (target)
    483         lines.append ("%#16.16x: %8s %s" % (inst_pc, mnemonic, operands))
    484         if comment:
    485             line_len = len(lines[-1])
    486             if line_len < comment_column:
    487                 lines[-1] += ' ' * (comment_column - line_len)
    488                 lines[-1] += "; %s" % comment
    489 
    490     if pc_index >= 0:
    491         # If we are disassembling the non-zeroeth frame, we need to backup the PC by 1
    492         if non_zeroeth_frame and pc_index > 0:
    493             pc_index = pc_index - 1
    494         if insts_before_pc == -1:
    495             start_idx = 0
    496         else:
    497             start_idx = pc_index - insts_before_pc
    498         if start_idx < 0:
    499             start_idx = 0
    500         if insts_before_pc == -1:
    501             end_idx = inst_idx
    502         else:
    503             end_idx = pc_index + insts_after_pc
    504         if end_idx > inst_idx:
    505             end_idx = inst_idx
    506         for i in range(start_idx, end_idx+1):
    507             if i == pc_index:
    508                 print ' -> ', lines[i]
    509             else:
    510                 print '    ', lines[i]
    511 
    512 def print_module_section_data (section):
    513     print section
    514     section_data = section.GetSectionData()
    515     if section_data:
    516         ostream = lldb.SBStream()
    517         section_data.GetDescription (ostream, section.GetFileAddress())
    518         print ostream.GetData()
    519 
    520 def print_module_section (section, depth):
    521     print section
    522     if depth > 0:
    523         num_sub_sections = section.GetNumSubSections()
    524         for sect_idx in range(num_sub_sections):
    525             print_module_section (section.GetSubSectionAtIndex(sect_idx), depth - 1)
    526 
    527 def print_module_sections (module, depth):
    528     for sect in module.section_iter():
    529         print_module_section (sect, depth)
    530 
    531 def print_module_symbols (module):
    532     for sym in module:
    533         print sym
    534 
    535 def Symbolicate(command_args):
    536     
    537     usage = "usage: %prog [options] <addr1> [addr2 ...]"
    538     description='''Symbolicate one or more addresses using LLDB's python scripting API..'''
    539     parser = optparse.OptionParser(description=description, prog='crashlog.py',usage=usage)
    540     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
    541     parser.add_option('-p', '--platform', type='string', metavar='platform', dest='platform', help='Specify the platform to use when creating the debug target. Valid values include "localhost", "darwin-kernel", "ios-simulator", "remote-freebsd", "remote-macosx", "remote-ios", "remote-linux".')
    542     parser.add_option('-f', '--file', type='string', metavar='file', dest='file', help='Specify a file to use when symbolicating')
    543     parser.add_option('-a', '--arch', type='string', metavar='arch', dest='arch', help='Specify a architecture to use when symbolicating')
    544     parser.add_option('-s', '--slide', type='int', metavar='slide', dest='slide', help='Specify the slide to use on the file specified with the --file option', default=None)
    545     parser.add_option('--section', type='string', action='append', dest='section_strings', help='specify <sect-name>=<start-addr> or <sect-name>=<start-addr>-<end-addr>')
    546     try:
    547         (options, args) = parser.parse_args(command_args)
    548     except:
    549         return
    550     symbolicator = Symbolicator()
    551     images = list();
    552     if options.file:
    553         image = Image(options.file);
    554         image.arch = options.arch
    555         # Add any sections that were specified with one or more --section options
    556         if options.section_strings:
    557             for section_str in options.section_strings:
    558                 section = Section()
    559                 if section.set_from_string (section_str):
    560                     image.add_section (section)
    561                 else:
    562                     sys.exit(1)
    563         if options.slide != None:
    564             image.slide = options.slide
    565         symbolicator.images.append(image)
    566     
    567     target = symbolicator.create_target()
    568     if options.verbose:
    569         print symbolicator
    570     if target:
    571         for addr_str in args:
    572             addr = int(addr_str, 0)
    573             symbolicated_addrs = symbolicator.symbolicate(addr, options.verbose)
    574             for symbolicated_addr in symbolicated_addrs:
    575                 print symbolicated_addr
    576             print
    577     else:
    578         print 'error: no target for %s' % (symbolicator)
    579         
    580 if __name__ == '__main__':
    581     # Create a new debugger instance
    582     lldb.debugger = lldb.SBDebugger.Create()
    583     Symbolicate (sys.argv[1:])
    584