Home | History | Annotate | Download | only in linux
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Dump functions called by static intializers in a Linux Release binary.
      7 
      8 Usage example:
      9   tools/linux/dump-static-intializers.py out/Release/chrome
     10 
     11 A brief overview of static initialization:
     12 1) the compiler writes out, per object file, a function that contains
     13    the static intializers for that file.
     14 2) the compiler also writes out a pointer to that function in a special
     15    section.
     16 3) at link time, the linker concatenates the function pointer sections
     17    into a single list of all initializers.
     18 4) at run time, on startup the binary runs all function pointers.
     19 
     20 The functions in (1) all have mangled names of the form
     21   _GLOBAL__I_foobar.cc
     22 using objdump, we can disassemble those functions and dump all symbols that
     23 they reference.
     24 """
     25 
     26 import optparse
     27 import re
     28 import subprocess
     29 import sys
     30 
     31 # A map of symbol => informative text about it.
     32 NOTES = {
     33   '__cxa_atexit@plt': 'registers a dtor to run at exit',
     34   'std::__ioinit': '#includes <iostream>, use <ostream> instead',
     35 }
     36 
     37 # Determine whether this is a git checkout (as opposed to e.g. svn).
     38 IS_GIT_WORKSPACE = (subprocess.Popen(
     39     ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0)
     40 
     41 class Demangler(object):
     42   """A wrapper around c++filt to provide a function to demangle symbols."""
     43   def __init__(self):
     44     self.cppfilt = subprocess.Popen(['c++filt'],
     45                                     stdin=subprocess.PIPE,
     46                                     stdout=subprocess.PIPE)
     47 
     48   def Demangle(self, sym):
     49     """Given mangled symbol |sym|, return its demangled form."""
     50     self.cppfilt.stdin.write(sym + '\n')
     51     return self.cppfilt.stdout.readline().strip()
     52 
     53 # Matches for example: "cert_logger.pb.cc", capturing "cert_logger".
     54 protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$')
     55 def QualifyFilenameAsProto(filename):
     56   """Attempt to qualify a bare |filename| with a src-relative path, assuming it
     57   is a protoc-generated file.  If a single match is found, it is returned.
     58   Otherwise the original filename is returned."""
     59   if not IS_GIT_WORKSPACE:
     60     return filename
     61   match = protobuf_filename_re.match(filename)
     62   if not match:
     63     return filename
     64   basename = match.groups(0)
     65   gitlsfiles = subprocess.Popen(
     66     ['git', 'ls-files', '--', '*/%s.proto' % basename],
     67     stdout=subprocess.PIPE)
     68   candidate = filename
     69   for line in gitlsfiles.stdout:
     70     if candidate != filename:
     71       return filename # Multiple hits, can't help.
     72     candidate = line.strip()
     73   return candidate
     74 
     75 # Regex matching the substring of a symbol's demangled text representation most
     76 # likely to appear in a source file.
     77 # Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes
     78 # "InitBuiltinFunctionTable", since the first (optional & non-capturing) group
     79 # picks up any ::-qualification and the last fragment picks up a suffix that
     80 # starts with an opener.
     81 symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$')
     82 def QualifyFilename(filename, symbol):
     83   """Given a bare filename and a symbol that occurs in it, attempt to qualify
     84   it with a src-relative path.  If more than one file matches, return the
     85   original filename."""
     86   if not IS_GIT_WORKSPACE:
     87     return filename
     88   match = symbol_code_name_re.match(symbol)
     89   if not match:
     90     return filename
     91   symbol = match.group(1)
     92   gitgrep = subprocess.Popen(
     93     ['git', 'grep', '-l', symbol, '--', '*/%s' % filename],
     94     stdout=subprocess.PIPE)
     95   candidate = filename
     96   for line in gitgrep.stdout:
     97     if candidate != filename:  # More than one candidate; return bare filename.
     98       return filename
     99     candidate = line.strip()
    100   return candidate
    101 
    102 # Regex matching nm output for the symbols we're interested in.
    103 # See test_ParseNmLine for examples.
    104 nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)')
    105 def ParseNmLine(line):
    106   """Given a line of nm output, parse static initializers as a
    107   (file, start, size) tuple."""
    108   match = nm_re.match(line)
    109   if match:
    110     addr, size, filename = match.groups()
    111     return (filename, int(addr, 16), int(size, 16))
    112 
    113 
    114 def test_ParseNmLine():
    115   """Verify the nm_re regex matches some sample lines."""
    116   parse = ParseNmLine(
    117     '0000000001919920 0000000000000008 t '
    118     '_ZN12_GLOBAL__I_safe_browsing_service.cc')
    119   assert parse == ('safe_browsing_service.cc', 26319136, 8), parse
    120 
    121   parse = ParseNmLine(
    122     '00000000026b9eb0 0000000000000024 t '
    123     '_GLOBAL__sub_I_extension_specifics.pb.cc')
    124   assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse
    125 
    126 # Just always run the test; it is fast enough.
    127 test_ParseNmLine()
    128 
    129 
    130 def ParseNm(binary):
    131   """Given a binary, yield static initializers as (file, start, size) tuples."""
    132   nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE)
    133   for line in nm.stdout:
    134     parse = ParseNmLine(line)
    135     if parse:
    136       yield parse
    137 
    138 # Regex matching objdump output for the symbols we're interested in.
    139 # Example line:
    140 #     12354ab:  (disassembly, including <FunctionReference>)
    141 disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>')
    142 def ExtractSymbolReferences(binary, start, end):
    143   """Given a span of addresses, returns symbol references from disassembly."""
    144   cmd = ['objdump', binary, '--disassemble',
    145          '--start-address=0x%x' % start, '--stop-address=0x%x' % end]
    146   objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    147 
    148   refs = set()
    149   for line in objdump.stdout:
    150     if '__static_initialization_and_destruction' in line:
    151       raise RuntimeError, ('code mentions '
    152                            '__static_initialization_and_destruction; '
    153                            'did you accidentally run this on a Debug binary?')
    154     match = disassembly_re.search(line)
    155     if match:
    156       (ref,) = match.groups()
    157       if ref.startswith('.LC') or ref.startswith('_DYNAMIC'):
    158         # Ignore these, they are uninformative.
    159         continue
    160       if ref.startswith('_GLOBAL__I_'):
    161         # Probably a relative jump within this function.
    162         continue
    163       refs.add(ref)
    164 
    165   return sorted(refs)
    166 
    167 def main():
    168   parser = optparse.OptionParser(usage='%prog [option] filename')
    169   parser.add_option('-d', '--diffable', dest='diffable',
    170                     action='store_true', default=False,
    171                     help='Prints the filename on each line, for more easily '
    172                          'diff-able output. (Used by sizes.py)')
    173   opts, args = parser.parse_args()
    174   if len(args) != 1:
    175     parser.error('missing filename argument')
    176     return 1
    177   binary = args[0]
    178 
    179   demangler = Demangler()
    180   file_count = 0
    181   initializer_count = 0
    182 
    183   files = ParseNm(binary)
    184   if opts.diffable:
    185     files = sorted(files)
    186   for filename, addr, size in files:
    187     file_count += 1
    188     ref_output = []
    189 
    190     qualified_filename = QualifyFilenameAsProto(filename)
    191 
    192     if size == 2:
    193       # gcc generates a two-byte 'repz retq' initializer when there is a
    194       # ctor even when the ctor is empty.  This is fixed in gcc 4.6, but
    195       # Android uses gcc 4.4.
    196       ref_output.append('[empty ctor, but it still has cost on gcc <4.6]')
    197     else:
    198       for ref in ExtractSymbolReferences(binary, addr, addr+size):
    199         initializer_count += 1
    200 
    201         ref = demangler.Demangle(ref)
    202         if qualified_filename == filename:
    203           qualified_filename = QualifyFilename(filename, ref)
    204 
    205         note = ''
    206         if ref in NOTES:
    207           note = NOTES[ref]
    208         elif ref.endswith('_2eproto()'):
    209           note = 'protocol compiler bug: crbug.com/105626'
    210 
    211         if note:
    212           ref_output.append('%s [%s]' % (ref, note))
    213         else:
    214           ref_output.append(ref)
    215 
    216     if opts.diffable:
    217       print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output)
    218     else:
    219       print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename,
    220                                                         addr, size)
    221       print ''.join('  %s\n' % r for r in ref_output)
    222 
    223   if opts.diffable:
    224     print '#',
    225   print 'Found %d static initializers in %d files.' % (initializer_count,
    226                                                        file_count)
    227 
    228   return 0
    229 
    230 if '__main__' == __name__:
    231   sys.exit(main())
    232