Home | History | Annotate | Download | only in lib
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import os
      7 import sys
      8 
      9 _BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     10 _FIND_RUNTIME_SYMBOLS_PATH = os.path.join(_BASE_PATH,
     11                                           os.pardir,
     12                                           'find_runtime_symbols')
     13 _TOOLS_LINUX_PATH = os.path.join(_BASE_PATH,
     14                                  os.pardir,
     15                                  'linux')
     16 sys.path.append(_FIND_RUNTIME_SYMBOLS_PATH)
     17 sys.path.append(_TOOLS_LINUX_PATH)
     18 
     19 import find_runtime_symbols
     20 import prepare_symbol_info
     21 import procfs  # pylint: disable=W0611,F0401
     22 
     23 LOGGER = logging.getLogger('dmprof')
     24 
     25 FUNCTION_SYMBOLS = find_runtime_symbols.FUNCTION_SYMBOLS
     26 SOURCEFILE_SYMBOLS = find_runtime_symbols.SOURCEFILE_SYMBOLS
     27 TYPEINFO_SYMBOLS = find_runtime_symbols.TYPEINFO_SYMBOLS
     28 
     29 
     30 class SymbolDataSources(object):
     31   """Manages symbol data sources in a process.
     32 
     33   The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
     34   so on.  They are collected into a directory '|prefix|.symmap' from the binary
     35   files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.
     36 
     37   Binaries are not mandatory to profile.  The prepared data sources work in
     38   place of the binary even if the binary has been overwritten with another
     39   binary.
     40 
     41   Note that loading the symbol data sources takes a long time.  They are often
     42   very big.  So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
     43   which caches actually used symbols.
     44   """
     45   def __init__(self, prefix, alternative_dirs=None):
     46     self._prefix = prefix
     47     self._prepared_symbol_data_sources_path = None
     48     self._loaded_symbol_data_sources = None
     49     self._alternative_dirs = alternative_dirs or {}
     50 
     51   def prepare(self):
     52     """Prepares symbol data sources by extracting mapping from a binary.
     53 
     54     The prepared symbol data sources are stored in a directory.  The directory
     55     name is stored in |self._prepared_symbol_data_sources_path|.
     56 
     57     Returns:
     58         True if succeeded.
     59     """
     60     LOGGER.info('Preparing symbol mapping...')
     61     self._prepared_symbol_data_sources_path, used_tempdir = (
     62         prepare_symbol_info.prepare_symbol_info(
     63             self._prefix + '.maps',
     64             output_dir_path=self._prefix + '.symmap',
     65             alternative_dirs=self._alternative_dirs,
     66             use_tempdir=True,
     67             use_source_file_name=True))
     68     if self._prepared_symbol_data_sources_path:
     69       LOGGER.info('  Prepared symbol mapping.')
     70       if used_tempdir:
     71         LOGGER.warn('  Using a temporary directory for symbol mapping.')
     72         LOGGER.warn('  Delete it by yourself.')
     73         LOGGER.warn('  Or, move the directory by yourself to use it later.')
     74       return True
     75     else:
     76       LOGGER.warn('  Failed to prepare symbol mapping.')
     77       return False
     78 
     79   def get(self):
     80     """Returns the prepared symbol data sources.
     81 
     82     Returns:
     83         The prepared symbol data sources.  None if failed.
     84     """
     85     if not self._prepared_symbol_data_sources_path and not self.prepare():
     86       return None
     87     if not self._loaded_symbol_data_sources:
     88       LOGGER.info('Loading symbol mapping...')
     89       self._loaded_symbol_data_sources = (
     90           find_runtime_symbols.RuntimeSymbolsInProcess.load(
     91               self._prepared_symbol_data_sources_path))
     92     return self._loaded_symbol_data_sources
     93 
     94   def path(self):
     95     """Returns the path of the prepared symbol data sources if possible."""
     96     if not self._prepared_symbol_data_sources_path and not self.prepare():
     97       return None
     98     return self._prepared_symbol_data_sources_path
     99 
    100 
    101 class SymbolFinder(object):
    102   """Finds corresponding symbols from addresses.
    103 
    104   This class does only 'find()' symbols from a specified |address_list|.
    105   It is introduced to make a finder mockable.
    106   """
    107   def __init__(self, symbol_type, symbol_data_sources):
    108     self._symbol_type = symbol_type
    109     self._symbol_data_sources = symbol_data_sources
    110 
    111   def find(self, address_list):
    112     return find_runtime_symbols.find_runtime_symbols(
    113         self._symbol_type, self._symbol_data_sources.get(), address_list)
    114 
    115 
    116 class SymbolMappingCache(object):
    117   """Caches mapping from actually used addresses to symbols.
    118 
    119   'update()' updates the cache from the original symbol data sources via
    120   'SymbolFinder'.  Symbols can be looked up by the method 'lookup()'.
    121   """
    122   def __init__(self):
    123     self._symbol_mapping_caches = {
    124         FUNCTION_SYMBOLS: {},
    125         SOURCEFILE_SYMBOLS: {},
    126         TYPEINFO_SYMBOLS: {},
    127         }
    128 
    129   def update(self, symbol_type, bucket_set, symbol_finder, cache_f):
    130     """Updates symbol mapping cache on memory and in a symbol cache file.
    131 
    132     It reads cached symbol mapping from a symbol cache file |cache_f| if it
    133     exists.  Unresolved addresses are then resolved and added to the cache
    134     both on memory and in the symbol cache file with using 'SymbolFinder'.
    135 
    136     A cache file is formatted as follows:
    137       <Address> <Symbol>
    138       <Address> <Symbol>
    139       <Address> <Symbol>
    140       ...
    141 
    142     Args:
    143         symbol_type: A type of symbols to update.  It should be one of
    144             FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
    145         bucket_set: A BucketSet object.
    146         symbol_finder: A SymbolFinder object to find symbols.
    147         cache_f: A readable and writable IO object of the symbol cache file.
    148     """
    149     cache_f.seek(0, os.SEEK_SET)
    150     self._load(cache_f, symbol_type)
    151 
    152     unresolved_addresses = sorted(
    153         address for address in bucket_set.iter_addresses(symbol_type)
    154         if address not in self._symbol_mapping_caches[symbol_type])
    155 
    156     if not unresolved_addresses:
    157       LOGGER.info('No need to resolve any more addresses.')
    158       return
    159 
    160     cache_f.seek(0, os.SEEK_END)
    161     LOGGER.info('Loading %d unresolved addresses.' %
    162                 len(unresolved_addresses))
    163     symbol_dict = symbol_finder.find(unresolved_addresses)
    164 
    165     for address, symbol in symbol_dict.iteritems():
    166       stripped_symbol = symbol.strip() or '?'
    167       self._symbol_mapping_caches[symbol_type][address] = stripped_symbol
    168       cache_f.write('%x %s\n' % (address, stripped_symbol))
    169 
    170   def lookup(self, symbol_type, address):
    171     """Looks up a symbol for a given |address|.
    172 
    173     Args:
    174         symbol_type: A type of symbols to update.  It should be one of
    175             FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
    176         address: An integer that represents an address.
    177 
    178     Returns:
    179         A string that represents a symbol.
    180     """
    181     return self._symbol_mapping_caches[symbol_type].get(address)
    182 
    183   def _load(self, cache_f, symbol_type):
    184     try:
    185       for line in cache_f:
    186         items = line.rstrip().split(None, 1)
    187         if len(items) == 1:
    188           items.append('??')
    189         self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]
    190       LOGGER.info('Loaded %d entries from symbol cache.' %
    191                      len(self._symbol_mapping_caches[symbol_type]))
    192     except IOError as e:
    193       LOGGER.info('The symbol cache file is invalid: %s' % e)
    194