Home | History | Annotate | Download | only in binary_size
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Generate a spatial analysis against an arbitrary library.
      7 
      8 To use, build the 'binary_size_tool' target. Then run this tool, passing
      9 in the location of the library to be analyzed along with any other options
     10 you desire.
     11 """
     12 
     13 import collections
     14 import json
     15 import logging
     16 import multiprocessing
     17 import optparse
     18 import os
     19 import re
     20 import shutil
     21 import subprocess
     22 import sys
     23 import tempfile
     24 import time
     25 
     26 import binary_size_utils
     27 
     28 # This path changee is not beautiful. Temporary (I hope) measure until
     29 # the chromium project has figured out a proper way to organize the
     30 # library of python tools. http://crbug.com/375725
     31 elf_symbolizer_path = os.path.abspath(os.path.join(
     32     os.path.dirname(__file__),
     33     '..',
     34     '..',
     35     'build',
     36     'android',
     37     'pylib'))
     38 sys.path.append(elf_symbolizer_path)
     39 import symbols.elf_symbolizer as elf_symbolizer  # pylint: disable=F0401
     40 
     41 
     42 # Node dictionary keys. These are output in json read by the webapp so
     43 # keep them short to save file size.
     44 # Note: If these change, the webapp must also change.
     45 NODE_TYPE_KEY = 'k'
     46 NODE_NAME_KEY = 'n'
     47 NODE_CHILDREN_KEY = 'children'
     48 NODE_SYMBOL_TYPE_KEY = 't'
     49 NODE_SYMBOL_SIZE_KEY = 'value'
     50 NODE_MAX_DEPTH_KEY = 'maxDepth'
     51 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
     52 
     53 # The display name of the bucket where we put symbols without path.
     54 NAME_NO_PATH_BUCKET = '(No Path)'
     55 
     56 # Try to keep data buckets smaller than this to avoid killing the
     57 # graphing lib.
     58 BIG_BUCKET_LIMIT = 3000
     59 
     60 
     61 # TODO(andrewhayden): Only used for legacy reports. Delete.
     62 def FormatBytes(byte_count):
     63   """Pretty-print a number of bytes."""
     64   if byte_count > 1e6:
     65     byte_count = byte_count / 1.0e6
     66     return '%.1fm' % byte_count
     67   if byte_count > 1e3:
     68     byte_count = byte_count / 1.0e3
     69     return '%.1fk' % byte_count
     70   return str(byte_count)
     71 
     72 
     73 # TODO(andrewhayden): Only used for legacy reports. Delete.
     74 def SymbolTypeToHuman(symbol_type):
     75   """Convert a symbol type as printed by nm into a human-readable name."""
     76   return {'b': 'bss',
     77           'd': 'data',
     78           'r': 'read-only data',
     79           't': 'code',
     80           'w': 'weak symbol',
     81           'v': 'weak symbol'}[symbol_type]
     82 
     83 
     84 def _MkChild(node, name):
     85   child = node[NODE_CHILDREN_KEY].get(name)
     86   if child is None:
     87     child = {NODE_NAME_KEY: name,
     88              NODE_CHILDREN_KEY: {}}
     89     node[NODE_CHILDREN_KEY][name] = child
     90   return child
     91 
     92 
     93 
     94 def SplitNoPathBucket(node):
     95   """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
     96   handle. Split it into sub-buckets in that case."""
     97   root_children = node[NODE_CHILDREN_KEY]
     98   if NAME_NO_PATH_BUCKET in root_children:
     99     no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
    100     old_children = no_path_bucket[NODE_CHILDREN_KEY]
    101     count = 0
    102     for symbol_type, symbol_bucket in old_children.iteritems():
    103       count += len(symbol_bucket[NODE_CHILDREN_KEY])
    104     if count > BIG_BUCKET_LIMIT:
    105       new_children = {}
    106       no_path_bucket[NODE_CHILDREN_KEY] = new_children
    107       current_bucket = None
    108       index = 0
    109       for symbol_type, symbol_bucket in old_children.iteritems():
    110         for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
    111           if index % BIG_BUCKET_LIMIT == 0:
    112             group_no = (index / BIG_BUCKET_LIMIT) + 1
    113             current_bucket = _MkChild(no_path_bucket,
    114                                       '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
    115                                                           group_no))
    116             assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
    117             node[NODE_TYPE_KEY] = 'p'  # p for path
    118           index += 1
    119           symbol_size = value[NODE_SYMBOL_SIZE_KEY]
    120           AddSymbolIntoFileNode(current_bucket, symbol_type,
    121                                 symbol_name, symbol_size)
    122 
    123 
    124 def MakeChildrenDictsIntoLists(node):
    125   largest_list_len = 0
    126   if NODE_CHILDREN_KEY in node:
    127     largest_list_len = len(node[NODE_CHILDREN_KEY])
    128     child_list = []
    129     for child in node[NODE_CHILDREN_KEY].itervalues():
    130       child_largest_list_len = MakeChildrenDictsIntoLists(child)
    131       if child_largest_list_len > largest_list_len:
    132         largest_list_len = child_largest_list_len
    133       child_list.append(child)
    134     node[NODE_CHILDREN_KEY] = child_list
    135 
    136   return largest_list_len
    137 
    138 
    139 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
    140   """Puts symbol into the file path node |node|.
    141   Returns the number of added levels in tree. I.e. returns 2."""
    142 
    143   # 'node' is the file node and first step is to find its symbol-type bucket.
    144   node[NODE_LAST_PATH_ELEMENT_KEY] = True
    145   node = _MkChild(node, symbol_type)
    146   assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
    147   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
    148   node[NODE_TYPE_KEY] = 'b'  # b for bucket
    149 
    150   # 'node' is now the symbol-type bucket. Make the child entry.
    151   node = _MkChild(node, symbol_name)
    152   if NODE_CHILDREN_KEY in node:
    153     if node[NODE_CHILDREN_KEY]:
    154       logging.warning('A container node used as symbol for %s.' % symbol_name)
    155     # This is going to be used as a leaf so no use for child list.
    156     del node[NODE_CHILDREN_KEY]
    157   node[NODE_SYMBOL_SIZE_KEY] = symbol_size
    158   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
    159   node[NODE_TYPE_KEY] = 's'  # s for symbol
    160 
    161   return 2  # Depth of the added subtree.
    162 
    163 
    164 def MakeCompactTree(symbols):
    165   result = {NODE_NAME_KEY: '/',
    166             NODE_CHILDREN_KEY: {},
    167             NODE_TYPE_KEY: 'p',
    168             NODE_MAX_DEPTH_KEY: 0}
    169   seen_symbol_with_path = False
    170   for symbol_name, symbol_type, symbol_size, file_path in symbols:
    171 
    172     if 'vtable for ' in symbol_name:
    173       symbol_type = '@'  # hack to categorize these separately
    174     # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
    175     if file_path:
    176       file_path = os.path.normpath(file_path)
    177       seen_symbol_with_path = True
    178     else:
    179       file_path = NAME_NO_PATH_BUCKET
    180 
    181     if file_path.startswith('/'):
    182       file_path = file_path[1:]
    183     path_parts = file_path.split('/')
    184 
    185     # Find pre-existing node in tree, or update if it already exists
    186     node = result
    187     depth = 0
    188     while len(path_parts) > 0:
    189       path_part = path_parts.pop(0)
    190       if len(path_part) == 0:
    191         continue
    192       depth += 1
    193       node = _MkChild(node, path_part)
    194       assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
    195       node[NODE_TYPE_KEY] = 'p'  # p for path
    196 
    197     depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
    198     result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
    199 
    200   if not seen_symbol_with_path:
    201     logging.warning('Symbols lack paths. Data will not be structured.')
    202 
    203   # The (no path) bucket can be extremely large if we failed to get
    204   # path information. Split it into subgroups if needed.
    205   SplitNoPathBucket(result)
    206 
    207   largest_list_len = MakeChildrenDictsIntoLists(result)
    208 
    209   if largest_list_len > BIG_BUCKET_LIMIT:
    210     logging.warning('There are sections with %d nodes. '
    211                     'Results might be unusable.' % largest_list_len)
    212   return result
    213 
    214 
    215 # TODO(andrewhayden): Only used for legacy reports. Delete.
    216 def TreeifySymbols(symbols):
    217   """Convert symbols into a path-based tree, calculating size information
    218   along the way.
    219 
    220   The result is a dictionary that contains two kinds of nodes:
    221   1. Leaf nodes, representing source code locations (e.g., c++ files)
    222      These nodes have the following dictionary entries:
    223        sizes: a dictionary whose keys are categories (such as code, data,
    224               vtable, etceteras) and whose values are the size, in bytes, of
    225               those categories;
    226        size:  the total size, in bytes, of all the entries in the sizes dict
    227   2. Non-leaf nodes, representing directories
    228      These nodes have the following dictionary entries:
    229        children: a dictionary whose keys are names (path entries; either
    230                  directory or file names) and whose values are other nodes;
    231        size:     the total size, in bytes, of all the leaf nodes that are
    232                  contained within the children dict (recursively expanded)
    233 
    234   The result object is itself a dictionary that represents the common ancestor
    235   of all child nodes, e.g. a path to which all other nodes beneath it are
    236   relative. The 'size' attribute of this dict yields the sum of the size of all
    237   leaf nodes within the data structure.
    238   """
    239   dirs = {'children': {}, 'size': 0}
    240   for sym, symbol_type, size, path in symbols:
    241     dirs['size'] += size
    242     if path:
    243       path = os.path.normpath(path)
    244       if path.startswith('/'):
    245         path = path[1:]
    246 
    247     parts = None
    248     if path:
    249       parts = path.split('/')
    250 
    251     if parts:
    252       assert path
    253       file_key = parts.pop()
    254       tree = dirs
    255       try:
    256         # Traverse the tree to the parent of the file node, creating as needed
    257         for part in parts:
    258           assert part != ''
    259           if part not in tree['children']:
    260             tree['children'][part] = {'children': {}, 'size': 0}
    261           tree = tree['children'][part]
    262           tree['size'] += size
    263 
    264         # Get (creating if necessary) the node for the file
    265         # This node doesn't have a 'children' attribute
    266         if file_key not in tree['children']:
    267           tree['children'][file_key] = {'sizes': collections.defaultdict(int),
    268                                         'size': 0}
    269         tree = tree['children'][file_key]
    270         tree['size'] += size
    271 
    272         # Accumulate size into a bucket within the file
    273         symbol_type = symbol_type.lower()
    274         if 'vtable for ' in sym:
    275           tree['sizes']['[vtable]'] += size
    276         elif 'r' == symbol_type:
    277           tree['sizes']['[rodata]'] += size
    278         elif 'd' == symbol_type:
    279           tree['sizes']['[data]'] += size
    280         elif 'b' == symbol_type:
    281           tree['sizes']['[bss]'] += size
    282         elif 't' == symbol_type:
    283           # 'text' in binary parlance means 'code'.
    284           tree['sizes']['[code]'] += size
    285         elif 'w' == symbol_type:
    286           tree['sizes']['[weak]'] += size
    287         else:
    288           tree['sizes']['[other]'] += size
    289       except:
    290         print >> sys.stderr, sym, parts, file_key
    291         raise
    292     else:
    293       key = 'symbols without paths'
    294       if key not in dirs['children']:
    295         dirs['children'][key] = {'sizes': collections.defaultdict(int),
    296                                  'size': 0}
    297       tree = dirs['children'][key]
    298       subkey = 'misc'
    299       if (sym.endswith('::__FUNCTION__') or
    300         sym.endswith('::__PRETTY_FUNCTION__')):
    301         subkey = '__FUNCTION__'
    302       elif sym.startswith('CSWTCH.'):
    303         subkey = 'CSWTCH'
    304       elif '::' in sym:
    305         subkey = sym[0:sym.find('::') + 2]
    306       tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size
    307       tree['size'] += size
    308   return dirs
    309 
    310 
    311 # TODO(andrewhayden): Only used for legacy reports. Delete.
    312 def JsonifyTree(tree, name):
    313   """Convert TreeifySymbols output to a JSON treemap.
    314 
    315   The format is very similar, with the notable exceptions being
    316   lists of children instead of maps and some different attribute names."""
    317   children = []
    318   css_class_map = {
    319                   '[vtable]': 'vtable',
    320                   '[rodata]': 'read-only_data',
    321                   '[data]': 'data',
    322                   '[bss]': 'bss',
    323                   '[code]': 'code',
    324                   '[weak]': 'weak_symbol'
    325   }
    326   if 'children' in tree:
    327     # Non-leaf node. Recurse.
    328     for child_name, child in tree['children'].iteritems():
    329       children.append(JsonifyTree(child, child_name))
    330   else:
    331     # Leaf node; dump per-file stats as entries in the treemap
    332     for kind, size in tree['sizes'].iteritems():
    333       child_json = {'name': kind + ' (' + FormatBytes(size) + ')',
    334                    'data': { '$area': size }}
    335       css_class = css_class_map.get(kind)
    336       if css_class is not None:
    337         child_json['data']['$symbol'] = css_class
    338       children.append(child_json)
    339   # Sort children by size, largest to smallest.
    340   children.sort(key=lambda child: -child['data']['$area'])
    341 
    342   # For leaf nodes, the 'size' attribute is the size of the leaf;
    343   # Non-leaf nodes don't really have a size, but their 'size' attribute is
    344   # the sum of the sizes of all their children.
    345   return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
    346           'data': { '$area': tree['size'] },
    347           'children': children }
    348 
    349 def DumpCompactTree(symbols, outfile):
    350   tree_root = MakeCompactTree(symbols)
    351   with open(outfile, 'w') as out:
    352     out.write('var tree_data = ')
    353     json.dump(tree_root, out)
    354   print('Writing %d bytes json' % os.path.getsize(outfile))
    355 
    356 
    357 # TODO(andrewhayden): Only used for legacy reports. Delete.
    358 def DumpTreemap(symbols, outfile):
    359   dirs = TreeifySymbols(symbols)
    360   out = open(outfile, 'w')
    361   try:
    362     out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
    363   finally:
    364     out.flush()
    365     out.close()
    366 
    367 
    368 # TODO(andrewhayden): Only used for legacy reports. Delete.
    369 def DumpLargestSymbols(symbols, outfile, n):
    370   # a list of (sym, symbol_type, size, path); sort by size.
    371   symbols = sorted(symbols, key=lambda x: -x[2])
    372   dumped = 0
    373   out = open(outfile, 'w')
    374   try:
    375     out.write('var largestSymbols = [\n')
    376     for sym, symbol_type, size, path in symbols:
    377       if symbol_type in ('b', 'w'):
    378         continue  # skip bss and weak symbols
    379       if path is None:
    380         path = ''
    381       entry = {'size': FormatBytes(size),
    382                'symbol': sym,
    383                'type': SymbolTypeToHuman(symbol_type),
    384                'location': path }
    385       out.write(json.dumps(entry))
    386       out.write(',\n')
    387       dumped += 1
    388       if dumped >= n:
    389         return
    390   finally:
    391     out.write('];\n')
    392     out.flush()
    393     out.close()
    394 
    395 
    396 def MakeSourceMap(symbols):
    397   sources = {}
    398   for _sym, _symbol_type, size, path in symbols:
    399     key = None
    400     if path:
    401       key = os.path.normpath(path)
    402     else:
    403       key = '[no path]'
    404     if key not in sources:
    405       sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
    406     record = sources[key]
    407     record['size'] += size
    408     record['symbol_count'] += 1
    409   return sources
    410 
    411 
    412 # TODO(andrewhayden): Only used for legacy reports. Delete.
    413 def DumpLargestSources(symbols, outfile, n):
    414   source_map = MakeSourceMap(symbols)
    415   sources = sorted(source_map.values(), key=lambda x: -x['size'])
    416   dumped = 0
    417   out = open(outfile, 'w')
    418   try:
    419     out.write('var largestSources = [\n')
    420     for record in sources:
    421       entry = {'size': FormatBytes(record['size']),
    422                'symbol_count': str(record['symbol_count']),
    423                'location': record['path']}
    424       out.write(json.dumps(entry))
    425       out.write(',\n')
    426       dumped += 1
    427       if dumped >= n:
    428         return
    429   finally:
    430     out.write('];\n')
    431     out.flush()
    432     out.close()
    433 
    434 
    435 # TODO(andrewhayden): Only used for legacy reports. Delete.
    436 def DumpLargestVTables(symbols, outfile, n):
    437   vtables = []
    438   for symbol, _type, size, path in symbols:
    439     if 'vtable for ' in symbol:
    440       vtables.append({'symbol': symbol, 'path': path, 'size': size})
    441   vtables = sorted(vtables, key=lambda x: -x['size'])
    442   dumped = 0
    443   out = open(outfile, 'w')
    444   try:
    445     out.write('var largestVTables = [\n')
    446     for record in vtables:
    447       entry = {'size': FormatBytes(record['size']),
    448                'symbol': record['symbol'],
    449                'location': record['path']}
    450       out.write(json.dumps(entry))
    451       out.write(',\n')
    452       dumped += 1
    453       if dumped >= n:
    454         return
    455   finally:
    456     out.write('];\n')
    457     out.flush()
    458     out.close()
    459 
    460 
    461 # Regex for parsing "nm" output. A sample line looks like this:
    462 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
    463 #
    464 # The fields are: address, size, type, name, source location
    465 # Regular expression explained ( see also: https://xkcd.com/208 ):
    466 # ([0-9a-f]{8,}+)   The address
    467 # [\s]+             Whitespace separator
    468 # ([0-9a-f]{8,}+)   The size. From here on out it's all optional.
    469 # [\s]+             Whitespace separator
    470 # (\S?)             The symbol type, which is any non-whitespace char
    471 # [\s*]             Whitespace separator
    472 # ([^\t]*)          Symbol name, any non-tab character (spaces ok!)
    473 # [\t]?             Tab separator
    474 # (.*)              The location (filename[:linennum|?][ (discriminator n)]
    475 sNmPattern = re.compile(
    476   r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
    477 
    478 class Progress():
    479   def __init__(self):
    480     self.count = 0
    481     self.skip_count = 0
    482     self.collisions = 0
    483     self.time_last_output = time.time()
    484     self.count_last_output = 0
    485 
    486 
    487 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs):
    488   nm_output = RunNm(library, nm_binary)
    489   nm_output_lines = nm_output.splitlines()
    490   nm_output_lines_len = len(nm_output_lines)
    491   address_symbol = {}
    492   progress = Progress()
    493   def map_address_symbol(symbol, addr):
    494     progress.count += 1
    495     if addr in address_symbol:
    496       # 'Collision between %s and %s.' % (str(symbol.name),
    497       #                                   str(address_symbol[addr].name))
    498       progress.collisions += 1
    499     else:
    500       address_symbol[addr] = symbol
    501 
    502     progress_chunk = 100
    503     if progress.count % progress_chunk == 0:
    504       time_now = time.time()
    505       time_spent = time_now - progress.time_last_output
    506       if time_spent > 1.0:
    507         # Only output at most once per second.
    508         progress.time_last_output = time_now
    509         chunk_size = progress.count - progress.count_last_output
    510         progress.count_last_output = progress.count
    511         if time_spent > 0:
    512           speed = chunk_size / time_spent
    513         else:
    514           speed = 0
    515         progress_percent = (100.0 * (progress.count + progress.skip_count) /
    516                             nm_output_lines_len)
    517         print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' %
    518               (progress_percent, progress.count, progress.collisions, speed))
    519 
    520   symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
    521                                             map_address_symbol,
    522                                             max_concurrent_jobs=jobs)
    523   user_interrupted = False
    524   try:
    525     for line in nm_output_lines:
    526       match = sNmPattern.match(line)
    527       if match:
    528         location = match.group(5)
    529         if not location:
    530           addr = int(match.group(1), 16)
    531           size = int(match.group(2), 16)
    532           if addr in address_symbol:  # Already looked up, shortcut
    533                                       # ELFSymbolizer.
    534             map_address_symbol(address_symbol[addr], addr)
    535             continue
    536           elif size == 0:
    537             # Save time by not looking up empty symbols (do they even exist?)
    538             print('Empty symbol: ' + line)
    539           else:
    540             symbolizer.SymbolizeAsync(addr, addr)
    541             continue
    542 
    543       progress.skip_count += 1
    544   except KeyboardInterrupt:
    545     user_interrupted = True
    546     print('Interrupting - killing subprocesses. Please wait.')
    547 
    548   try:
    549     symbolizer.Join()
    550   except KeyboardInterrupt:
    551     # Don't want to abort here since we will be finished in a few seconds.
    552     user_interrupted = True
    553     print('Patience you must have my young padawan.')
    554 
    555   if user_interrupted:
    556     print('Skipping the rest of the file mapping. '
    557           'Output will not be fully classified.')
    558 
    559   with open(outfile, 'w') as out:
    560     for line in nm_output_lines:
    561       match = sNmPattern.match(line)
    562       if match:
    563         location = match.group(5)
    564         if not location:
    565           addr = int(match.group(1), 16)
    566           symbol = address_symbol.get(addr)
    567           if symbol is not None:
    568             path = '??'
    569             if symbol.source_path is not None:
    570               path = symbol.source_path
    571             line_number = 0
    572             if symbol.source_line is not None:
    573               line_number = symbol.source_line
    574             out.write('%s\t%s:%d\n' % (line, path, line_number))
    575             continue
    576 
    577       out.write('%s\n' % line)
    578 
    579   print('%d symbols in the results.' % len(address_symbol))
    580 
    581 
    582 def RunNm(binary, nm_binary):
    583   print('Starting nm')
    584   cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
    585          binary]
    586   nm_process = subprocess.Popen(cmd,
    587                                 stdout=subprocess.PIPE,
    588                                 stderr=subprocess.PIPE)
    589   (process_output, err_output) = nm_process.communicate()
    590 
    591   if nm_process.returncode != 0:
    592     if err_output:
    593       raise Exception, err_output
    594     else:
    595       raise Exception, process_output
    596 
    597   print('Finished nm')
    598   return process_output
    599 
    600 
    601 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
    602                  addr2line_binary, nm_binary):
    603   if nm_infile is None:
    604     if outfile is None:
    605       outfile = tempfile.NamedTemporaryFile(delete=False).name
    606 
    607     if verbose:
    608       print 'Running parallel addr2line, dumping symbols to ' + outfile
    609     RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs)
    610 
    611     nm_infile = outfile
    612 
    613   elif verbose:
    614     print 'Using nm input from ' + nm_infile
    615   with file(nm_infile, 'r') as infile:
    616     return list(binary_size_utils.ParseNm(infile))
    617 
    618 
    619 def _find_in_system_path(binary):
    620   """Locate the full path to binary in the system path or return None
    621   if not found."""
    622   system_path = os.environ["PATH"].split(os.pathsep)
    623   for path in system_path:
    624     binary_path = os.path.join(path, binary)
    625     if os.path.isfile(binary_path):
    626       return binary_path
    627   return None
    628 
    629 def CheckDebugFormatSupport(library, addr2line_binary):
    630   """Kills the program if debug data is in an unsupported format.
    631 
    632   There are two common versions of the DWARF debug formats and
    633   since we are right now transitioning from DWARF2 to newer formats,
    634   it's possible to have a mix of tools that are not compatible. Detect
    635   that and abort rather than produce meaningless output."""
    636   tool_output = subprocess.check_output([addr2line_binary, '--version'])
    637   version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
    638   parsed_output = version_re.match(tool_output)
    639   major = int(parsed_output.group(1))
    640   minor = int(parsed_output.group(2))
    641   supports_dwarf4 = major > 2 or major == 2 and minor > 22
    642 
    643   if supports_dwarf4:
    644     return
    645 
    646   print('Checking version of debug information in %s.' % library)
    647   debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
    648                                        '--dwarf-depth=1', library])
    649   dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
    650   parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
    651   version = int(parsed_dwarf_format_output.group(1))
    652   if version > 2:
    653     print('The supplied tools only support DWARF2 debug data but the binary\n' +
    654           'uses DWARF%d. Update the tools or compile the binary\n' % version +
    655           'with -gdwarf-2.')
    656     sys.exit(1)
    657 
    658 
    659 def main():
    660   usage = """%prog [options]
    661 
    662   Runs a spatial analysis on a given library, looking up the source locations
    663   of its symbols and calculating how much space each directory, source file,
    664   and so on is taking. The result is a report that can be used to pinpoint
    665   sources of large portions of the binary, etceteras.
    666 
    667   Under normal circumstances, you only need to pass two arguments, thusly:
    668 
    669       %prog --library /path/to/library --destdir /path/to/output
    670 
    671   In this mode, the program will dump the symbols from the specified library
    672   and map those symbols back to source locations, producing a web-based
    673   report in the specified output directory.
    674 
    675   Other options are available via '--help'.
    676   """
    677   parser = optparse.OptionParser(usage=usage)
    678   parser.add_option('--nm-in', metavar='PATH',
    679                     help='if specified, use nm input from <path> instead of '
    680                     'generating it. Note that source locations should be '
    681                     'present in the file; i.e., no addr2line symbol lookups '
    682                     'will be performed when this option is specified. '
    683                     'Mutually exclusive with --library.')
    684   parser.add_option('--destdir', metavar='PATH',
    685                     help='write output to the specified directory. An HTML '
    686                     'report is generated here along with supporting files; '
    687                     'any existing report will be overwritten.')
    688   parser.add_option('--library', metavar='PATH',
    689                     help='if specified, process symbols in the library at '
    690                     'the specified path. Mutually exclusive with --nm-in.')
    691   parser.add_option('--nm-binary',
    692                     help='use the specified nm binary to analyze library. '
    693                     'This is to be used when the nm in the path is not for '
    694                     'the right architecture or of the right version.')
    695   parser.add_option('--addr2line-binary',
    696                     help='use the specified addr2line binary to analyze '
    697                     'library. This is to be used when the addr2line in '
    698                     'the path is not for the right architecture or '
    699                     'of the right version.')
    700   parser.add_option('--jobs', type='int',
    701                     help='number of jobs to use for the parallel '
    702                     'addr2line processing pool; defaults to 1. More '
    703                     'jobs greatly improve throughput but eat RAM like '
    704                     'popcorn, and take several gigabytes each. Start low '
    705                     'and ramp this number up until your machine begins to '
    706                     'struggle with RAM. '
    707                     'This argument is only valid when using --library.')
    708   parser.add_option('-v', dest='verbose', action='store_true',
    709                     help='be verbose, printing lots of status information.')
    710   parser.add_option('--nm-out', metavar='PATH',
    711                     help='keep the nm output file, and store it at the '
    712                     'specified path. This is useful if you want to see the '
    713                     'fully processed nm output after the symbols have been '
    714                     'mapped to source locations. By default, a tempfile is '
    715                     'used and is deleted when the program terminates.'
    716                     'This argument is only valid when using --library.')
    717   parser.add_option('--legacy', action='store_true',
    718                     help='emit legacy binary size report instead of modern')
    719   opts, _args = parser.parse_args()
    720 
    721   if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
    722     parser.error('exactly one of --library or --nm-in is required')
    723   if (opts.nm_in):
    724     if opts.jobs:
    725       print >> sys.stderr, ('WARNING: --jobs has no effect '
    726                             'when used with --nm-in')
    727   if not opts.destdir:
    728     parser.error('--destdir is required argument')
    729   if not opts.jobs:
    730     # Use the number of processors but cap between 2 and 4 since raw
    731     # CPU power isn't the limiting factor. It's I/O limited, memory
    732     # bus limited and available-memory-limited. Too many processes and
    733     # the computer will run out of memory and it will be slow.
    734     opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
    735 
    736   if opts.addr2line_binary:
    737     assert os.path.isfile(opts.addr2line_binary)
    738     addr2line_binary = opts.addr2line_binary
    739   else:
    740     addr2line_binary = _find_in_system_path('addr2line')
    741     assert addr2line_binary, 'Unable to find addr2line in the path. '\
    742         'Use --addr2line-binary to specify location.'
    743 
    744   if opts.nm_binary:
    745     assert os.path.isfile(opts.nm_binary)
    746     nm_binary = opts.nm_binary
    747   else:
    748     nm_binary = _find_in_system_path('nm')
    749     assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
    750         'to specify location.'
    751 
    752   print('addr2line: %s' % addr2line_binary)
    753   print('nm: %s' % nm_binary)
    754 
    755   CheckDebugFormatSupport(opts.library, addr2line_binary)
    756 
    757   symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
    758                          opts.jobs, opts.verbose is True,
    759                          addr2line_binary, nm_binary)
    760   if not os.path.exists(opts.destdir):
    761     os.makedirs(opts.destdir, 0755)
    762 
    763 
    764   if opts.legacy: # legacy report
    765     DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
    766     DumpLargestSymbols(symbols,
    767                          os.path.join(opts.destdir, 'largest-symbols.js'), 100)
    768     DumpLargestSources(symbols,
    769                          os.path.join(opts.destdir, 'largest-sources.js'), 100)
    770     DumpLargestVTables(symbols,
    771                          os.path.join(opts.destdir, 'largest-vtables.js'), 100)
    772     treemap_out = os.path.join(opts.destdir, 'webtreemap')
    773     if not os.path.exists(treemap_out):
    774       os.makedirs(treemap_out, 0755)
    775     treemap_src = os.path.join('third_party', 'webtreemap', 'src')
    776     shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)
    777     shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)
    778     shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)
    779     shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template',
    780                              'index.html'), opts.destdir)
    781   else: # modern report
    782     DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js'))
    783     d3_out = os.path.join(opts.destdir, 'd3')
    784     if not os.path.exists(d3_out):
    785       os.makedirs(d3_out, 0755)
    786     d3_src = os.path.join(os.path.dirname(__file__),
    787                           '..',
    788                           '..',
    789                           'third_party', 'd3', 'src')
    790     template_src = os.path.join(os.path.dirname(__file__),
    791                                 'template')
    792     shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
    793     shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
    794     shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
    795     shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)
    796 
    797   print 'Report saved to ' + opts.destdir + '/index.html'
    798 
    799 
    800 if __name__ == '__main__':
    801   sys.exit(main())
    802