Home | History | Annotate | Download | only in binary_size
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Describe the size difference of two binaries.
      7 
      8 Generates a description of the size difference of two binaries based
      9 on the difference of the size of various symbols.
     10 
     11 This tool needs "nm" dumps of each binary with full symbol
     12 information. You can obtain the necessary dumps by running the
     13 run_binary_size_analysis.py script upon each binary, with the
     14 "--nm-out" parameter set to the location in which you want to save the
     15 dumps. Example:
     16 
     17   # obtain symbol data from first binary in /tmp/nm1.dump
     18   cd $CHECKOUT1_SRC
     19   ninja -C out/Release binary_size_tool
     20   tools/binary_size/run_binary_size_analysis \
     21       --library <path_to_library>
     22       --destdir /tmp/throwaway
     23       --nm-out /tmp/nm1.dump
     24 
     25   # obtain symbol data from second binary in /tmp/nm2.dump
     26   cd $CHECKOUT2_SRC
     27   ninja -C out/Release binary_size_tool
     28   tools/binary_size/run_binary_size_analysis \
     29       --library <path_to_library>
     30       --destdir /tmp/throwaway
     31       --nm-out /tmp/nm2.dump
     32 
     33   # cleanup useless files
     34   rm -r /tmp/throwaway
     35 
     36   # run this tool
     37   explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
     38 """
     39 
     40 import collections
     41 import operator
     42 import optparse
     43 import os
     44 import sys
     45 
     46 import binary_size_utils
     47 
     48 
     49 def Compare(symbols1, symbols2):
     50   """Executes a comparison of the symbols in symbols1 and symbols2.
     51 
     52   Returns:
     53       tuple of lists: (added_symbols, removed_symbols, changed_symbols, others)
     54   """
     55   added = [] # tuples
     56   removed = [] # tuples
     57   changed = [] # tuples
     58   unchanged = [] # tuples
     59 
     60   cache1 = {}
     61   cache2 = {}
     62   # Make a map of (file, symbol_type) : (symbol_name, symbol_size)
     63   for cache, symbols in ((cache1, symbols1), (cache2, symbols2)):
     64     for symbol_name, symbol_type, symbol_size, file_path in symbols:
     65       if 'vtable for ' in symbol_name:
     66         symbol_type = '@' # hack to categorize these separately
     67       if file_path:
     68         file_path = os.path.normpath(file_path)
     69       else:
     70         file_path = '(No Path)'
     71       key = (file_path, symbol_type)
     72       bucket = cache.setdefault(key, {})
     73       size_list = bucket.setdefault(symbol_name, [])
     74       size_list.append(symbol_size)
     75 
     76   # Now diff them. We iterate over the elements in cache1. For each symbol
     77   # that we find in cache2, we record whether it was deleted, changed, or
     78   # unchanged. We then remove it from cache2; all the symbols that remain
     79   # in cache2 at the end of the iteration over cache1 are the 'new' symbols.
     80   for key, bucket1 in cache1.items():
     81     bucket2 = cache2.get(key)
     82     if not bucket2:
     83       # A file was removed. Everything in bucket1 is dead.
     84       for symbol_name, symbol_size_list in bucket1.items():
     85         for symbol_size in symbol_size_list:
     86           removed.append((key[0], key[1], symbol_name, symbol_size, None))
     87     else:
     88       # File still exists, look for changes within.
     89       for symbol_name, symbol_size_list in bucket1.items():
     90         size_list2 = bucket2.get(symbol_name)
     91         if size_list2 is None:
     92           # Symbol no longer exists in bucket2.
     93           for symbol_size in symbol_size_list:
     94             removed.append((key[0], key[1], symbol_name, symbol_size, None))
     95         else:
     96           del bucket2[symbol_name] # Symbol is not new, delete from cache2.
     97           if len(symbol_size_list) == 1 and len(size_list2) == 1:
     98             symbol_size = symbol_size_list[0]
     99             size2 = size_list2[0]
    100             if symbol_size != size2:
    101               # Symbol has change size in bucket.
    102               changed.append((key[0], key[1], symbol_name, symbol_size, size2))
    103             else:
    104               # Symbol is unchanged.
    105               unchanged.append((key[0], key[1], symbol_name, symbol_size,
    106                                 size2))
    107           else:
    108             # Complex comparison for when a symbol exists multiple times
    109             # in the same file (where file can be "unknown file").
    110             symbol_size_counter = collections.Counter(symbol_size_list)
    111             delta_counter = collections.Counter(symbol_size_list)
    112             delta_counter.subtract(size_list2)
    113             for symbol_size in sorted(delta_counter.keys()):
    114               delta = delta_counter[symbol_size]
    115               unchanged_count = symbol_size_counter[symbol_size]
    116               if delta > 0:
    117                 unchanged_count -= delta
    118               for _ in range(unchanged_count):
    119                 unchanged.append((key[0], key[1], symbol_name, symbol_size,
    120                                   symbol_size))
    121               if delta > 0: # Used to be more of these than there is now.
    122                 for _ in range(delta):
    123                   removed.append((key[0], key[1], symbol_name, symbol_size,
    124                                   None))
    125               elif delta < 0: # More of this (symbol,size) now.
    126                 for _ in range(-delta):
    127                   added.append((key[0], key[1], symbol_name, None, symbol_size))
    128 
    129           if len(bucket2) == 0:
    130             del cache1[key] # Entire bucket is empty, delete from cache2
    131 
    132   # We have now analyzed all symbols that are in cache1 and removed all of
    133   # the encountered symbols from cache2. What's left in cache2 is the new
    134   # symbols.
    135   for key, bucket2 in cache2.iteritems():
    136     for symbol_name, symbol_size_list in bucket2.items():
    137       for symbol_size in symbol_size_list:
    138         added.append((key[0], key[1], symbol_name, None, symbol_size))
    139   return (added, removed, changed, unchanged)
    140 
    141 def DeltaStr(number):
    142   """Returns the number as a string with a '+' prefix if it's > 0 and
    143   a '-' prefix if it's < 0."""
    144   result = str(number)
    145   if number > 0:
    146     result = '+' + result
    147   return result
    148 
    149 
    150 class CrunchStatsData(object):
    151   """Stores a summary of data of a certain kind."""
    152   def __init__(self, symbols):
    153     self.symbols = symbols
    154     self.sources = set()
    155     self.before_size = 0
    156     self.after_size = 0
    157     self.symbols_by_path = {}
    158 
    159 
    160 def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols):
    161   """Outputs to stdout a summary of changes based on the symbol lists."""
    162   # Split changed into grown and shrunk because that is easier to
    163   # discuss.
    164   grown = []
    165   shrunk = []
    166   for item in changed:
    167     file_path, symbol_type, symbol_name, size1, size2 = item
    168     if size1 < size2:
    169       grown.append(item)
    170     else:
    171       shrunk.append(item)
    172 
    173   new_symbols = CrunchStatsData(added)
    174   removed_symbols = CrunchStatsData(removed)
    175   grown_symbols = CrunchStatsData(grown)
    176   shrunk_symbols = CrunchStatsData(shrunk)
    177   sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols]
    178   for section in sections:
    179     for file_path, symbol_type, symbol_name, size1, size2 in section.symbols:
    180       section.sources.add(file_path)
    181       if size1 is not None:
    182         section.before_size += size1
    183       if size2 is not None:
    184         section.after_size += size2
    185       bucket = section.symbols_by_path.setdefault(file_path, [])
    186       bucket.append((symbol_name, symbol_type, size1, size2))
    187 
    188   total_change = sum(s.after_size - s.before_size for s in sections)
    189   summary = 'Total change: %s bytes' % DeltaStr(total_change)
    190   print(summary)
    191   print('=' * len(summary))
    192   for section in sections:
    193     if not section.symbols:
    194       continue
    195     if section.before_size == 0:
    196       description = ('added, totalling %s bytes' % DeltaStr(section.after_size))
    197     elif section.after_size == 0:
    198       description = ('removed, totalling %s bytes' %
    199                      DeltaStr(-section.before_size))
    200     else:
    201       if section.after_size > section.before_size:
    202         type_str = 'grown'
    203       else:
    204         type_str = 'shrunk'
    205       description = ('%s, for a net change of %s bytes '
    206                      '(%d bytes before, %d bytes after)' %
    207             (type_str, DeltaStr(section.after_size - section.before_size),
    208              section.before_size, section.after_size))
    209     print('  %d %s across %d sources' %
    210           (len(section.symbols), description, len(section.sources)))
    211 
    212   maybe_unchanged_sources = set()
    213   unchanged_symbols_size = 0
    214   for file_path, symbol_type, symbol_name, size1, size2 in unchanged:
    215     maybe_unchanged_sources.add(file_path)
    216     unchanged_symbols_size += size1 # == size2
    217   print('  %d unchanged, totalling %d bytes' %
    218         (len(unchanged), unchanged_symbols_size))
    219 
    220   # High level analysis, always output.
    221   unchanged_sources = maybe_unchanged_sources
    222   for section in sections:
    223     unchanged_sources = unchanged_sources - section.sources
    224   new_sources = (new_symbols.sources -
    225     maybe_unchanged_sources -
    226     removed_symbols.sources)
    227   removed_sources = (removed_symbols.sources -
    228     maybe_unchanged_sources -
    229     new_symbols.sources)
    230   partially_changed_sources = (grown_symbols.sources |
    231     shrunk_symbols.sources | new_symbols.sources |
    232     removed_symbols.sources) - removed_sources - new_sources
    233   allFiles = set()
    234   for section in sections:
    235     allFiles = allFiles | section.sources
    236   allFiles = allFiles | maybe_unchanged_sources
    237   print 'Source stats:'
    238   print('  %d sources encountered.' % len(allFiles))
    239   print('  %d completely new.' % len(new_sources))
    240   print('  %d removed completely.' % len(removed_sources))
    241   print('  %d partially changed.' % len(partially_changed_sources))
    242   print('  %d completely unchanged.' % len(unchanged_sources))
    243   remainder = (allFiles - new_sources - removed_sources -
    244     partially_changed_sources - unchanged_sources)
    245   assert len(remainder) == 0
    246 
    247   if not showsources:
    248     return  # Per-source analysis, only if requested
    249   print 'Per-source Analysis:'
    250   delta_by_path = {}
    251   for section in sections:
    252     for path in section.symbols_by_path:
    253       entry = delta_by_path.get(path)
    254       if not entry:
    255         entry = {'plus': 0, 'minus': 0}
    256         delta_by_path[path] = entry
    257       for symbol_name, symbol_type, size1, size2 in \
    258             section.symbols_by_path[path]:
    259         if size1 is None:
    260           delta = size2
    261         elif size2 is None:
    262           delta = -size1
    263         else:
    264           delta = size2 - size1
    265 
    266         if delta > 0:
    267           entry['plus'] += delta
    268         else:
    269           entry['minus'] += (-1 * delta)
    270 
    271   def delta_sort_key(item):
    272     _path, size_data = item
    273     growth = size_data['plus'] - size_data['minus']
    274     return growth
    275 
    276   for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key,
    277                                 reverse=True):
    278     gain = size_data['plus']
    279     loss = size_data['minus']
    280     delta = size_data['plus'] - size_data['minus']
    281     header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta),
    282                                                           path, gain, loss)
    283     divider = '-' * len(header)
    284     print ''
    285     print divider
    286     print header
    287     print divider
    288     if showsymbols:
    289       if path in new_symbols.symbols_by_path:
    290         print '  New symbols:'
    291         for symbol_name, symbol_type, size1, size2 in \
    292             sorted(new_symbols.symbols_by_path[path],
    293                    key=operator.itemgetter(3),
    294                    reverse=True):
    295           print ('   %8s: %s type=%s, size=%d bytes' %
    296                  (DeltaStr(size2), symbol_name, symbol_type, size2))
    297       if path in removed_symbols.symbols_by_path:
    298         print '  Removed symbols:'
    299         for symbol_name, symbol_type, size1, size2 in \
    300             sorted(removed_symbols.symbols_by_path[path],
    301                    key=operator.itemgetter(2)):
    302           print ('   %8s: %s type=%s, size=%d bytes' %
    303                  (DeltaStr(-size1), symbol_name, symbol_type, size1))
    304       for (changed_symbols_by_path, type_str) in [
    305         (grown_symbols.symbols_by_path, "Grown"),
    306         (shrunk_symbols.symbols_by_path, "Shrunk")]:
    307         if path in changed_symbols_by_path:
    308           print '  %s symbols:' % type_str
    309           def changed_symbol_sortkey(item):
    310             symbol_name, _symbol_type, size1, size2 = item
    311             return (size1 - size2, symbol_name)
    312           for symbol_name, symbol_type, size1, size2 in \
    313               sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey):
    314             print ('   %8s: %s type=%s, (was %d bytes, now %d bytes)'
    315                    % (DeltaStr(size2 - size1), symbol_name,
    316                       symbol_type, size1, size2))
    317 
    318 
    319 def main():
    320   usage = """%prog [options]
    321 
    322   Analyzes the symbolic differences between two binary files
    323   (typically, not necessarily, two different builds of the same
    324   library) and produces a detailed description of symbols that have
    325   been added, removed, or whose size has changed.
    326 
    327   Example:
    328        explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump
    329 
    330   Options are available via '--help'.
    331   """
    332   parser = optparse.OptionParser(usage=usage)
    333   parser.add_option('--nm1', metavar='PATH',
    334                     help='the nm dump of the first library')
    335   parser.add_option('--nm2', metavar='PATH',
    336                     help='the nm dump of the second library')
    337   parser.add_option('--showsources', action='store_true', default=False,
    338                     help='show per-source statistics')
    339   parser.add_option('--showsymbols', action='store_true', default=False,
    340                     help='show all symbol information; implies --showfiles')
    341   parser.add_option('--verbose', action='store_true', default=False,
    342                     help='output internal debugging stuff')
    343   opts, _args = parser.parse_args()
    344 
    345   if not opts.nm1:
    346     parser.error('--nm1 is required')
    347   if not opts.nm2:
    348     parser.error('--nm2 is required')
    349   symbols = []
    350   for path in [opts.nm1, opts.nm2]:
    351     with file(path, 'r') as nm_input:
    352       if opts.verbose:
    353         print 'parsing ' + path + '...'
    354       symbols.append(list(binary_size_utils.ParseNm(nm_input)))
    355   (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1])
    356   CrunchStats(added, removed, changed, unchanged,
    357     opts.showsources | opts.showsymbols, opts.showsymbols)
    358 
    359 if __name__ == '__main__':
    360   sys.exit(main())
    361