Home | History | Annotate | Download | only in memdump
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2013 The Chromium Authors. All rights reserved.
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 import base64
      8 import os
      9 import sys
     10 import re
     11 
     12 from optparse import OptionParser
     13 
     14 """Extracts the list of resident symbols of a library loaded in a process.
     15 
     16 This scripts combines the extended output of memdump for a given process
     17 (obtained through memdump -x PID) and the symbol table of a .so loaded in that
     18 process (obtained through nm -C lib-with-symbols.so), filtering out only those
     19 symbols that, at the time of the snapshot, were resident in memory (that are,
     20 the symbols which start address belongs to a mapped page of the .so which was
     21 resident at the time of the snapshot).
     22 The aim is to perform a "code coverage"-like profiling of a binary, intersecting
     23 run-time information (list of resident pages) and debug symbols.
     24 """
     25 
     26 _PAGE_SIZE = 4096
     27 
     28 
     29 def _TestBit(word, bit):
     30   assert(bit >= 0 and bit < 8)
     31   return not not ((word >> bit) & 1)
     32 
     33 
     34 def _HexAddr(addr):
     35   return hex(addr)[2:].zfill(8)
     36 
     37 
     38 def _GetResidentPagesSet(memdump_contents, lib_name, verbose):
     39   """Parses the memdump output and extracts the resident page set for lib_name.
     40   Args:
     41     memdump_contents: Array of strings (lines) of a memdump output.
     42     lib_name: A string containing the name of the library.so to be matched.
     43     verbose: Print a verbose header for each mapping matched.
     44 
     45   Returns:
     46     A set of resident pages (the key is the page index) for all the
     47     mappings matching .*lib_name.
     48   """
     49   resident_pages = set()
     50   MAP_RX = re.compile(
     51       r'^([0-9a-f]+)-([0-9a-f]+) ([\w-]+) ([0-9a-f]+) .* "(.*)" \[(.*)\]$')
     52   for line in memdump_contents:
     53     line = line.rstrip('\r\n')
     54     if line.startswith('[ PID'):
     55       continue
     56 
     57     r = MAP_RX.match(line)
     58     if not r:
     59       sys.stderr.write('Skipping %s from %s\n' % (line, memdump_file))
     60       continue
     61 
     62     map_start = int(r.group(1), 16)
     63     map_end = int(r.group(2), 16)
     64     prot = r.group(3)
     65     offset = int(r.group(4), 16)
     66     assert(offset % _PAGE_SIZE == 0)
     67     lib = r.group(5)
     68     enc_bitmap = r.group(6)
     69 
     70     if not lib.endswith(lib_name):
     71       continue
     72 
     73     bitmap = base64.b64decode(enc_bitmap)
     74     map_pages_count = (map_end - map_start + 1) / _PAGE_SIZE
     75     bitmap_pages_count = len(bitmap) * 8
     76 
     77     if verbose:
     78       print 'Found %s: mapped %d pages in mode %s @ offset %s.' % (
     79             lib, map_pages_count, prot, _HexAddr(offset))
     80       print ' Map range in the process VA: [%s - %s]. Len: %s' % (
     81           _HexAddr(map_start),
     82           _HexAddr(map_end),
     83           _HexAddr(map_pages_count * _PAGE_SIZE))
     84       print ' Corresponding addresses in the binary: [%s - %s]. Len: %s' % (
     85           _HexAddr(offset),
     86           _HexAddr(offset + map_end - map_start),
     87           _HexAddr(map_pages_count * _PAGE_SIZE))
     88       print ' Bitmap: %d pages' % bitmap_pages_count
     89       print ''
     90 
     91     assert(bitmap_pages_count >= map_pages_count)
     92     for i in xrange(map_pages_count):
     93       bitmap_idx = i / 8
     94       bitmap_off = i % 8
     95       if (bitmap_idx < len(bitmap) and
     96           _TestBit(ord(bitmap[bitmap_idx]), bitmap_off)):
     97         resident_pages.add(offset / _PAGE_SIZE + i)
     98   return resident_pages
     99 
    100 
    101 def main(argv):
    102   NM_RX = re.compile(r'^([0-9a-f]+)\s+.*$')
    103 
    104   parser = OptionParser()
    105   parser.add_option("-r", "--reverse",
    106                     action="store_true", dest="reverse", default=False,
    107                     help="Print out non present symbols")
    108   parser.add_option("-v", "--verbose",
    109                     action="store_true", dest="verbose", default=False,
    110                     help="Print out verbose debug information.")
    111 
    112   (options, args) = parser.parse_args()
    113 
    114   if len(args) != 3:
    115     print 'Usage: %s [-v] memdump.file nm.file library.so' % (
    116         os.path.basename(argv[0]))
    117     return 1
    118 
    119   memdump_file = args[0]
    120   nm_file = args[1]
    121   lib_name = args[2]
    122 
    123   if memdump_file == '-':
    124     memdump_contents = sys.stdin.readlines()
    125   else:
    126     memdump_contents = open(memdump_file, 'r').readlines()
    127   resident_pages = _GetResidentPagesSet(memdump_contents,
    128                                         lib_name,
    129                                         options.verbose)
    130 
    131   # Process the nm symbol table, filtering out the resident symbols.
    132   nm_fh = open(nm_file, 'r')
    133   for line in nm_fh:
    134     line = line.rstrip('\r\n')
    135     # Skip undefined symbols (lines with no address).
    136     if line.startswith(' '):
    137       continue
    138 
    139     r = NM_RX.match(line)
    140     if not r:
    141       sys.stderr.write('Skipping %s from %s\n' % (line, nm_file))
    142       continue
    143 
    144     sym_addr = int(r.group(1), 16)
    145     sym_page = sym_addr / _PAGE_SIZE
    146     last_sym_matched = (sym_page in resident_pages)
    147     if (sym_page in resident_pages) != options.reverse:
    148       print line
    149   return 0
    150 
    151 if __name__ == '__main__':
    152   sys.exit(main(sys.argv))
    153