1 #!/usr/bin/env python 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Dump functions called by static intializers in a Linux Release binary. 7 8 Usage example: 9 tools/linux/dump-static-intializers.py out/Release/chrome 10 11 A brief overview of static initialization: 12 1) the compiler writes out, per object file, a function that contains 13 the static intializers for that file. 14 2) the compiler also writes out a pointer to that function in a special 15 section. 16 3) at link time, the linker concatenates the function pointer sections 17 into a single list of all initializers. 18 4) at run time, on startup the binary runs all function pointers. 19 20 The functions in (1) all have mangled names of the form 21 _GLOBAL__I_foobar.cc 22 using objdump, we can disassemble those functions and dump all symbols that 23 they reference. 24 """ 25 26 import optparse 27 import re 28 import subprocess 29 import sys 30 31 # A map of symbol => informative text about it. 32 NOTES = { 33 '__cxa_atexit@plt': 'registers a dtor to run at exit', 34 'std::__ioinit': '#includes <iostream>, use <ostream> instead', 35 } 36 37 # Determine whether this is a git checkout (as opposed to e.g. svn). 38 IS_GIT_WORKSPACE = (subprocess.Popen( 39 ['git', 'rev-parse'], stderr=subprocess.PIPE).wait() == 0) 40 41 class Demangler(object): 42 """A wrapper around c++filt to provide a function to demangle symbols.""" 43 def __init__(self): 44 self.cppfilt = subprocess.Popen(['c++filt'], 45 stdin=subprocess.PIPE, 46 stdout=subprocess.PIPE) 47 48 def Demangle(self, sym): 49 """Given mangled symbol |sym|, return its demangled form.""" 50 self.cppfilt.stdin.write(sym + '\n') 51 return self.cppfilt.stdout.readline().strip() 52 53 # Matches for example: "cert_logger.pb.cc", capturing "cert_logger". 54 protobuf_filename_re = re.compile(r'(.*)\.pb\.cc$') 55 def QualifyFilenameAsProto(filename): 56 """Attempt to qualify a bare |filename| with a src-relative path, assuming it 57 is a protoc-generated file. If a single match is found, it is returned. 58 Otherwise the original filename is returned.""" 59 if not IS_GIT_WORKSPACE: 60 return filename 61 match = protobuf_filename_re.match(filename) 62 if not match: 63 return filename 64 basename = match.groups(0) 65 gitlsfiles = subprocess.Popen( 66 ['git', 'ls-files', '--', '*/%s.proto' % basename], 67 stdout=subprocess.PIPE) 68 candidate = filename 69 for line in gitlsfiles.stdout: 70 if candidate != filename: 71 return filename # Multiple hits, can't help. 72 candidate = line.strip() 73 return candidate 74 75 # Regex matching the substring of a symbol's demangled text representation most 76 # likely to appear in a source file. 77 # Example: "v8::internal::Builtins::InitBuiltinFunctionTable()" becomes 78 # "InitBuiltinFunctionTable", since the first (optional & non-capturing) group 79 # picks up any ::-qualification and the last fragment picks up a suffix that 80 # starts with an opener. 81 symbol_code_name_re = re.compile(r'^(?:[^(<[]*::)?([^:(<[]*).*?$') 82 def QualifyFilename(filename, symbol): 83 """Given a bare filename and a symbol that occurs in it, attempt to qualify 84 it with a src-relative path. If more than one file matches, return the 85 original filename.""" 86 if not IS_GIT_WORKSPACE: 87 return filename 88 match = symbol_code_name_re.match(symbol) 89 if not match: 90 return filename 91 symbol = match.group(1) 92 gitgrep = subprocess.Popen( 93 ['git', 'grep', '-l', symbol, '--', '*/%s' % filename], 94 stdout=subprocess.PIPE) 95 candidate = filename 96 for line in gitgrep.stdout: 97 if candidate != filename: # More than one candidate; return bare filename. 98 return filename 99 candidate = line.strip() 100 return candidate 101 102 # Regex matching nm output for the symbols we're interested in. 103 # See test_ParseNmLine for examples. 104 nm_re = re.compile(r'(\S+) (\S+) t (?:_ZN12)?_GLOBAL__(?:sub_)?I_(.*)') 105 def ParseNmLine(line): 106 """Given a line of nm output, parse static initializers as a 107 (file, start, size) tuple.""" 108 match = nm_re.match(line) 109 if match: 110 addr, size, filename = match.groups() 111 return (filename, int(addr, 16), int(size, 16)) 112 113 114 def test_ParseNmLine(): 115 """Verify the nm_re regex matches some sample lines.""" 116 parse = ParseNmLine( 117 '0000000001919920 0000000000000008 t ' 118 '_ZN12_GLOBAL__I_safe_browsing_service.cc') 119 assert parse == ('safe_browsing_service.cc', 26319136, 8), parse 120 121 parse = ParseNmLine( 122 '00000000026b9eb0 0000000000000024 t ' 123 '_GLOBAL__sub_I_extension_specifics.pb.cc') 124 assert parse == ('extension_specifics.pb.cc', 40607408, 36), parse 125 126 # Just always run the test; it is fast enough. 127 test_ParseNmLine() 128 129 130 def ParseNm(binary): 131 """Given a binary, yield static initializers as (file, start, size) tuples.""" 132 nm = subprocess.Popen(['nm', '-S', binary], stdout=subprocess.PIPE) 133 for line in nm.stdout: 134 parse = ParseNmLine(line) 135 if parse: 136 yield parse 137 138 # Regex matching objdump output for the symbols we're interested in. 139 # Example line: 140 # 12354ab: (disassembly, including <FunctionReference>) 141 disassembly_re = re.compile(r'^\s+[0-9a-f]+:.*<(\S+)>') 142 def ExtractSymbolReferences(binary, start, end): 143 """Given a span of addresses, returns symbol references from disassembly.""" 144 cmd = ['objdump', binary, '--disassemble', 145 '--start-address=0x%x' % start, '--stop-address=0x%x' % end] 146 objdump = subprocess.Popen(cmd, stdout=subprocess.PIPE) 147 148 refs = set() 149 for line in objdump.stdout: 150 if '__static_initialization_and_destruction' in line: 151 raise RuntimeError, ('code mentions ' 152 '__static_initialization_and_destruction; ' 153 'did you accidentally run this on a Debug binary?') 154 match = disassembly_re.search(line) 155 if match: 156 (ref,) = match.groups() 157 if ref.startswith('.LC') or ref.startswith('_DYNAMIC'): 158 # Ignore these, they are uninformative. 159 continue 160 if ref.startswith('_GLOBAL__I_'): 161 # Probably a relative jump within this function. 162 continue 163 refs.add(ref) 164 165 return sorted(refs) 166 167 def main(): 168 parser = optparse.OptionParser(usage='%prog [option] filename') 169 parser.add_option('-d', '--diffable', dest='diffable', 170 action='store_true', default=False, 171 help='Prints the filename on each line, for more easily ' 172 'diff-able output. (Used by sizes.py)') 173 opts, args = parser.parse_args() 174 if len(args) != 1: 175 parser.error('missing filename argument') 176 return 1 177 binary = args[0] 178 179 demangler = Demangler() 180 file_count = 0 181 initializer_count = 0 182 183 files = ParseNm(binary) 184 if opts.diffable: 185 files = sorted(files) 186 for filename, addr, size in files: 187 file_count += 1 188 ref_output = [] 189 190 qualified_filename = QualifyFilenameAsProto(filename) 191 192 if size == 2: 193 # gcc generates a two-byte 'repz retq' initializer when there is a 194 # ctor even when the ctor is empty. This is fixed in gcc 4.6, but 195 # Android uses gcc 4.4. 196 ref_output.append('[empty ctor, but it still has cost on gcc <4.6]') 197 else: 198 for ref in ExtractSymbolReferences(binary, addr, addr+size): 199 initializer_count += 1 200 201 ref = demangler.Demangle(ref) 202 if qualified_filename == filename: 203 qualified_filename = QualifyFilename(filename, ref) 204 205 note = '' 206 if ref in NOTES: 207 note = NOTES[ref] 208 elif ref.endswith('_2eproto()'): 209 note = 'protocol compiler bug: crbug.com/105626' 210 211 if note: 212 ref_output.append('%s [%s]' % (ref, note)) 213 else: 214 ref_output.append(ref) 215 216 if opts.diffable: 217 print '\n'.join('# ' + qualified_filename + ' ' + r for r in ref_output) 218 else: 219 print '%s (initializer offset 0x%x size 0x%x)' % (qualified_filename, 220 addr, size) 221 print ''.join(' %s\n' % r for r in ref_output) 222 223 if opts.diffable: 224 print '#', 225 print 'Found %d static initializers in %d files.' % (initializer_count, 226 file_count) 227 228 return 0 229 230 if '__main__' == __name__: 231 sys.exit(main()) 232