Home | History | Annotate | Download | only in bin
      1 #!/usr/bin/env python
      2 # Copyright 2016 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 import argparse
      7 import bisect
      8 import collections
      9 import gzip
     10 import json
     11 import os
     12 import re
     13 import subprocess
     14 import sys
     15 
     16 _SYMBOLS_PATH = os.path.abspath(os.path.join(
     17     os.path.dirname(os.path.realpath(__file__)),
     18     '..',
     19     'third_party',
     20     'symbols'))
     21 sys.path.append(_SYMBOLS_PATH)
     22 # pylint: disable=import-error
     23 import symbols.elf_symbolizer as elf_symbolizer
     24 
     25 
     26 # Relevant trace event phases from Chromium's
     27 # src/base/trace_event/common/trace_event_common.h.
     28 TRACE_EVENT_PHASE_METADATA = 'M'
     29 TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'
     30 
     31 
     32 # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
     33 # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
     34 # via 'name' group.
     35 ANDROID_PATH_MATCHER = re.compile(
     36     r'^/data/(?:app/[^/]+/lib/[^/]+/|app-lib/[^/]+/)(?P<name>.*\.so)')
     37 
     38 # Subpath of output path where unstripped libraries are stored.
     39 ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
     40 
     41 
     42 def FindInSystemPath(binary_name):
     43   paths = os.environ['PATH'].split(os.pathsep)
     44   for path in paths:
     45     binary_path = os.path.join(path, binary_name)
     46     if os.path.isfile(binary_path):
     47       return binary_path
     48   return None
     49 
     50 
     51 def IsSymbolizableFile(file_path):
     52   result = subprocess.check_output(['file', '-0', file_path])
     53   type_string = result[result.find('\0') + 1:]
     54   return bool(re.match(r'\: (ELF|Mach-O) (32|64)-bit\b', type_string))
     55 
     56 
     57 class ProcessMemoryMaps(object):
     58   """Represents 'process_mmaps' trace file entry."""
     59 
     60   class Region(object):
     61     def __init__(self, start_address, size, file_path):
     62       self._start_address = start_address
     63       self._size = size
     64       self._file_path = file_path
     65 
     66     @property
     67     def start_address(self):
     68       return self._start_address
     69 
     70     @property
     71     def end_address(self):
     72       return self._start_address + self._size
     73 
     74     @property
     75     def size(self):
     76       return self._size
     77 
     78     @property
     79     def file_path(self):
     80       return self._file_path
     81 
     82     def __cmp__(self, other):
     83       if isinstance(other, type(self)):
     84         return long(self._start_address).__cmp__(long(other._start_address))
     85       elif isinstance(other, (long, int)):
     86         return long(self._start_address).__cmp__(long(other))
     87       else:
     88         raise Exception('Cannot compare with %s' % type(other))
     89 
     90     def __repr__(self):
     91       return 'Region(0x{:X} - 0x{:X}, {})'.format(
     92           self.start_address, self.end_address, self.file_path)
     93 
     94   def __init__(self, process_mmaps):
     95     """Parses 'process_mmaps' dictionary."""
     96 
     97     regions = []
     98     for region_value in process_mmaps['vm_regions']:
     99       regions.append(self.Region(
    100           long(region_value['sa'], 16),
    101           long(region_value['sz'], 16),
    102           region_value['mf']))
    103     regions.sort()
    104 
    105     # Copy regions without duplicates and check for overlaps.
    106     self._regions = []
    107     previous_region = None
    108     for region in regions:
    109       if previous_region is not None:
    110         if region == previous_region:
    111           continue
    112         assert region.start_address >= previous_region.end_address, \
    113             'Regions {} and {} overlap.'.format(previous_region, region)
    114       previous_region = region
    115       self._regions.append(region)
    116 
    117   @property
    118   def regions(self):
    119     return self._regions
    120 
    121   def FindRegion(self, address):
    122     """Finds region containing |address|. Returns None if none found."""
    123 
    124     region_index = bisect.bisect_right(self._regions, address) - 1
    125     if region_index >= 0:
    126       region = self._regions[region_index]
    127       if address >= region.start_address and address < region.end_address:
    128         return region
    129     return None
    130 
    131 
    132 class StackFrames(object):
    133   """Represents 'stackFrames' trace file entry."""
    134 
    135   class PCFrame(object):
    136     def __init__(self, pc, frame):
    137       self._modified = False
    138       self._pc = pc
    139       self._frame = frame
    140 
    141     @property
    142     def modified(self):
    143       return self._modified
    144 
    145     @property
    146     def pc(self):
    147       return self._pc
    148 
    149     @property
    150     def name(self):
    151       return self._frame['name']
    152 
    153     @name.setter
    154     def name(self, value):
    155       self._modified = True
    156       self._frame['name'] = value
    157 
    158   def __init__(self, stack_frames):
    159     """Constructs object using 'stackFrames' dictionary."""
    160     self._pc_frames = []
    161     for frame in stack_frames.itervalues():
    162       pc_frame = self._ParsePCFrame(frame)
    163       if pc_frame:
    164         self._pc_frames.append(pc_frame)
    165 
    166   @property
    167   def pc_frames(self):
    168     return self._pc_frames
    169 
    170   @property
    171   def modified(self):
    172     return any(f.modified for f in self._pc_frames)
    173 
    174   _PC_TAG = 'pc:'
    175 
    176   @classmethod
    177   def _ParsePCFrame(self, frame):
    178     name = frame['name']
    179     if not name.startswith(self._PC_TAG):
    180       return None
    181     pc = long(name[len(self._PC_TAG):], 16)
    182     return self.PCFrame(pc, frame)
    183 
    184 
    185 class Process(object):
    186   """Holds various bits of information about a process in a trace file."""
    187 
    188   def __init__(self, pid):
    189     self.pid = pid
    190     self.name = None
    191     self.mmaps = None
    192     self.stack_frames = None
    193 
    194 
    195 def CollectProcesses(trace):
    196   """Parses trace dictionary and returns pid->Process map of all processes
    197      suitable for symbolization (which have both mmaps and stack_frames).
    198   """
    199 
    200   process_map = {}
    201 
    202   # Android traces produced via 'chrome://inspect/?tracing#devices' are
    203   # just list of events.
    204   events = trace if isinstance(trace, list) else trace['traceEvents']
    205   for event in events:
    206     name = event.get('name')
    207     if not name:
    208       continue
    209 
    210     pid = event['pid']
    211     process = process_map.get(pid)
    212     if process is None:
    213       process = Process(pid)
    214       process_map[pid] = process
    215 
    216     phase = event['ph']
    217     if phase == TRACE_EVENT_PHASE_METADATA:
    218       if name == 'process_name':
    219         process.name = event['args']['name']
    220       elif name == 'stackFrames':
    221         process.stack_frames = StackFrames(event['args']['stackFrames'])
    222     elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:
    223       process_mmaps = event['args']['dumps'].get('process_mmaps')
    224       if process_mmaps:
    225         # TODO(dskiba): this parses all process_mmaps, but retains only the
    226         #               last one. We need to parse only once (lazy parsing?).
    227         process.mmaps = ProcessMemoryMaps(process_mmaps)
    228 
    229   return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]
    230 
    231 
    232 class SymbolizableFile(object):
    233   """Holds file path, addresses to symbolize and stack frames to update.
    234 
    235   This class is a link between ELFSymbolizer and a trace file: it specifies
    236   what to symbolize (addresses) and what to update with the symbolization
    237   result (frames).
    238   """
    239   def __init__(self, file_path):
    240     self.path = file_path
    241     self.frames_by_address = collections.defaultdict(list)
    242 
    243 
    244 def ResolveSymbolizableFiles(processes):
    245   """Resolves and groups PCs into list of SymbolizableFiles.
    246 
    247   As part of the grouping process, this function resolves PC from each stack
    248   frame to the corresponding mmap region. Stack frames that failed to resolve
    249   are symbolized with '<unresolved>'.
    250   """
    251   symfile_by_path = {}
    252   for process in processes:
    253     for frame in process.stack_frames.pc_frames:
    254       region = process.mmaps.FindRegion(frame.pc)
    255       if region is None:
    256         frame.name = '<unresolved>'
    257         continue
    258 
    259       symfile = symfile_by_path.get(region.file_path)
    260       if symfile is None:
    261         symfile = SymbolizableFile(region.file_path)
    262         symfile_by_path[symfile.path] = symfile
    263 
    264       relative_pc = frame.pc - region.start_address
    265       symfile.frames_by_address[relative_pc].append(frame)
    266   return symfile_by_path.values()
    267 
    268 
    269 def SymbolizeFiles(symfiles, addr2line_path):
    270   """Symbolizes each file in the given list of SymbolizableFiles
    271      and updates stack frames with symbolization results."""
    272   print 'Symbolizing...'
    273 
    274   def _SubPrintf(message, *args):
    275     print ('  ' + message).format(*args)
    276 
    277   symbolized = False
    278   for symfile in symfiles:
    279     unsymbolized_name = '<{}>'.format(
    280         symfile.path if symfile.path else 'unnamed')
    281 
    282     problem = None
    283     if not os.path.isabs(symfile.path):
    284       problem = 'not a file'
    285     elif not os.path.isfile(symfile.path):
    286       problem = "file doesn't exist"
    287     elif not IsSymbolizableFile(symfile.path):
    288       problem = 'file is not symbolizable'
    289     if problem:
    290       _SubPrintf("Won't symbolize {} PCs for '{}': {}.",
    291                  len(symfile.frames_by_address),
    292                  symfile.path,
    293                  problem)
    294       for frames in symfile.frames_by_address.itervalues():
    295         for frame in frames:
    296           frame.name = unsymbolized_name
    297       continue
    298 
    299     def _SymbolizerCallback(sym_info, frames):
    300       # Unwind inline chain to the top.
    301       while sym_info.inlined_by:
    302         sym_info = sym_info.inlined_by
    303 
    304       symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
    305       for frame in frames:
    306         frame.name = symbolized_name
    307 
    308     symbolizer = elf_symbolizer.ELFSymbolizer(symfile.path,
    309                                               addr2line_path,
    310                                               _SymbolizerCallback,
    311                                               inlines=True)
    312 
    313     _SubPrintf('Symbolizing {} PCs from {}...',
    314                len(symfile.frames_by_address),
    315                symfile.path)
    316 
    317     for address, frames in symfile.frames_by_address.iteritems():
    318       # SymbolizeAsync() asserts that the type of address is int. We operate
    319       # on longs (since they are raw pointers possibly from 64-bit processes).
    320       # It's OK to cast here because we're passing relative PC, which should
    321       # always fit into int.
    322       symbolizer.SymbolizeAsync(int(address), frames)
    323 
    324     symbolizer.Join()
    325     symbolized = True
    326 
    327   return symbolized
    328 
    329 
    330 def HaveFilesFromAndroid(symfiles):
    331   return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)
    332 
    333 
    334 def RemapAndroidFiles(symfiles, output_path):
    335   for symfile in symfiles:
    336     match = ANDROID_PATH_MATCHER.match(symfile.path)
    337     if match:
    338       name = match.group('name')
    339       symfile.path = os.path.join(output_path, ANDROID_UNSTRIPPED_SUBPATH, name)
    340 
    341 
    342 # Suffix used for backup files.
    343 BACKUP_FILE_TAG = '.BACKUP'
    344 
    345 def main():
    346   parser = argparse.ArgumentParser()
    347   parser.add_argument('file',
    348                       help='Trace file to symbolize (.json or .json.gz)')
    349   parser.add_argument('--no-backup',
    350                       dest='backup', default='true', action='store_false',
    351                       help="Don't create {} files".format(BACKUP_FILE_TAG))
    352   parser.add_argument('--output-directory',
    353                       help='The path to the build output directory, such ' +
    354                            'as out/Debug. Only needed for Android.')
    355   options = parser.parse_args()
    356 
    357   trace_file_path = options.file
    358   def _OpenTraceFile(mode):
    359     if trace_file_path.endswith('.gz'):
    360       return gzip.open(trace_file_path, mode + 'b')
    361     else:
    362       return open(trace_file_path, mode + 't')
    363 
    364   addr2line_path = FindInSystemPath('addr2line')
    365   if addr2line_path is None:
    366     sys.exit("Can't symbolize - no addr2line in PATH.")
    367 
    368   print 'Reading trace file...'
    369   with _OpenTraceFile('r') as trace_file:
    370     trace = json.load(trace_file)
    371 
    372   processes = CollectProcesses(trace)
    373   symfiles = ResolveSymbolizableFiles(processes)
    374 
    375   # Android trace files don't have any indication they are from Android.
    376   # So we're checking for Android-specific paths.
    377   if HaveFilesFromAndroid(symfiles):
    378     if not options.output_directory:
    379       parser.error('The trace file appears to be from Android. Please '
    380                    "specify output directory (e.g. 'out/Debug') to properly "
    381                    'symbolize it.')
    382     RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
    383 
    384   if SymbolizeFiles(symfiles, addr2line_path):
    385     if options.backup:
    386       backup_file_path = trace_file_path + BACKUP_FILE_TAG
    387       print 'Backing up trace file to {}...'.format(backup_file_path)
    388       os.rename(trace_file_path, backup_file_path)
    389 
    390     print 'Updating trace file...'
    391     with _OpenTraceFile('w') as trace_file:
    392       json.dump(trace, trace_file)
    393   else:
    394     print 'No PCs symbolized - not updating trace file.'
    395 
    396 
    397 if __name__ == '__main__':
    398   main()
    399