Home | History | Annotate | Download | only in resources
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Generate a spatial analysis against an arbitrary library.
      7 
      8 Adapted for Skia's use case from
      9 chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes:
     10 
     11 -- Cleans up some deprecated codes.
     12 -- Always use relative code path so the tree root is Skia repo's root.
     13 -- Instead of outputting the standalone HTML/CSS/JS filesets, writes the
     14     TreeMap JSON data into a Google Storage bucket.
     15 -- Adds githash and total_size to the JSON data.
     16 -- Outputs another summary data in JSON Bench format for skiaperf ingestion.
     17 
     18 The output JSON data for visualization is in the following format:
     19 
     20 {
     21   "githash": 123abc,
     22   "commit_ts": 1234567890,
     23   "total_size": 1234567,
     24   "key": {"source_type": "binary_size"},
     25   "tree_data": {
     26     "maxDepth": 9,
     27     "k": "p", "children":[
     28       {"k":"p","children":[
     29         {"k":"p","children":[
     30           {"k":"p","lastPathElement":true,"children":[
     31             {"k":"b","t":"t","children":[
     32               {"k":"s", "t":"t", "value":4029,
     33                "n":"etc_encode_subblock_helper(unsigned char const*, ...)"
     34               },
     35           ......
     36   }
     37 }
     38 
     39 Another JSON file is generated for size summaries to be used in skiaperf. The
     40 JSON format details can be found at:
     41   https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54
     42 and:
     43   https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go
     44 
     45 In the binary size case, outputs look like:
     46 
     47 {
     48   "gitHash": "123abc",
     49   "key": {
     50     "source_type": "binarysize"
     51   }
     52   "results: {
     53     "src_lazy_global_weak_symbol": {
     54       "memory": {
     55         "bytes": 41,
     56         "options": {
     57           "path": "src_lazy",
     58           "symbol": "global_weak_symbol"
     59         }
     60       }
     61     },
     62     "src_lazy_global_read_only_data": {
     63       "memory": {
     64         "bytes": 13476,
     65         "options": {
     66           "path": "src_lazy",
     67           "symbol": "global_read_only_data"
     68         }
     69       }
     70     },
     71     ...
     72   }
     73 }
     74 
     75 """
     76 
     77 import collections
     78 import datetime
     79 import json
     80 import logging
     81 import multiprocessing
     82 import optparse
     83 import os
     84 import re
     85 import shutil
     86 import struct
     87 import subprocess
     88 import sys
     89 import tempfile
     90 import time
     91 import urllib2
     92 
     93 import binary_size_utils
     94 import elf_symbolizer
     95 
     96 from recipe_engine.types import freeze
     97 
     98 # Node dictionary keys. These are output in json read by the webapp so
     99 # keep them short to save file size.
    100 # Note: If these change, the webapp must also change.
    101 NODE_TYPE_KEY = 'k'
    102 NODE_NAME_KEY = 'n'
    103 NODE_CHILDREN_KEY = 'children'
    104 NODE_SYMBOL_TYPE_KEY = 't'
    105 NODE_SYMBOL_SIZE_KEY = 'value'
    106 NODE_MAX_DEPTH_KEY = 'maxDepth'
    107 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
    108 
    109 # The display name of the bucket where we put symbols without path.
    110 NAME_NO_PATH_BUCKET = '(No Path)'
    111 
    112 # Try to keep data buckets smaller than this to avoid killing the
    113 # graphing lib.
    114 BIG_BUCKET_LIMIT = 3000
    115 
    116 # Skia addition: relative dir for libskia.so from code base.
    117 LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib')
    118 
    119 # Skia addition: dictionary mapping symbol type code to symbol name.
    120 # See
    121 # https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/template/D3SymbolTreeMap.js&l=74
    122 SYMBOL_MAP = freeze({
    123     'A': 'global_absolute',
    124     'B': 'global_uninitialized_data',
    125     'b': 'local_uninitialized_data',
    126     'C': 'global_uninitialized_common',
    127     'D': 'global_initialized_data',
    128     'd': 'local_initialized_data',
    129     'G': 'global_small initialized_data',
    130     'g': 'local_small_initialized_data',
    131     'i': 'indirect_function',
    132     'N': 'debugging',
    133     'p': 'stack_unwind',
    134     'R': 'global_read_only_data',
    135     'r': 'local_read_only_data',
    136     'S': 'global_small_uninitialized_data',
    137     's': 'local_small_uninitialized_data',
    138     'T': 'global_code',
    139     't': 'local_code',
    140     'U': 'undefined',
    141     'u': 'unique',
    142     'V': 'global_weak_object',
    143     'v': 'local_weak_object',
    144     'W': 'global_weak_symbol',
    145     'w': 'local_weak_symbol',
    146     '@': 'vtable_entry',
    147     '-': 'stabs_debugging',
    148     '?': 'unrecognized',
    149 })
    150 
    151 
    152 def _MkChild(node, name):
    153   child = node[NODE_CHILDREN_KEY].get(name)
    154   if child is None:
    155     child = {NODE_NAME_KEY: name,
    156              NODE_CHILDREN_KEY: {}}
    157     node[NODE_CHILDREN_KEY][name] = child
    158   return child
    159 
    160 
    161 def SplitNoPathBucket(node):
    162   """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
    163   handle. Split it into sub-buckets in that case."""
    164   root_children = node[NODE_CHILDREN_KEY]
    165   if NAME_NO_PATH_BUCKET in root_children:
    166     no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
    167     old_children = no_path_bucket[NODE_CHILDREN_KEY]
    168     count = 0
    169     for symbol_type, symbol_bucket in old_children.iteritems():
    170       count += len(symbol_bucket[NODE_CHILDREN_KEY])
    171     if count > BIG_BUCKET_LIMIT:
    172       new_children = {}
    173       no_path_bucket[NODE_CHILDREN_KEY] = new_children
    174       current_bucket = None
    175       index = 0
    176       for symbol_type, symbol_bucket in old_children.iteritems():
    177         for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
    178           if index % BIG_BUCKET_LIMIT == 0:
    179             group_no = (index / BIG_BUCKET_LIMIT) + 1
    180             current_bucket = _MkChild(no_path_bucket,
    181                                       '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
    182                                                           group_no))
    183             assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
    184             node[NODE_TYPE_KEY] = 'p'  # p for path
    185           index += 1
    186           symbol_size = value[NODE_SYMBOL_SIZE_KEY]
    187           AddSymbolIntoFileNode(current_bucket, symbol_type,
    188                                 symbol_name, symbol_size)
    189 
    190 
    191 def MakeChildrenDictsIntoLists(node):
    192   largest_list_len = 0
    193   if NODE_CHILDREN_KEY in node:
    194     largest_list_len = len(node[NODE_CHILDREN_KEY])
    195     child_list = []
    196     for child in node[NODE_CHILDREN_KEY].itervalues():
    197       child_largest_list_len = MakeChildrenDictsIntoLists(child)
    198       if child_largest_list_len > largest_list_len:
    199         largest_list_len = child_largest_list_len
    200       child_list.append(child)
    201     node[NODE_CHILDREN_KEY] = child_list
    202 
    203   return largest_list_len
    204 
    205 
    206 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
    207   """Puts symbol into the file path node |node|.
    208   Returns the number of added levels in tree. I.e. returns 2."""
    209 
    210   # 'node' is the file node and first step is to find its symbol-type bucket.
    211   node[NODE_LAST_PATH_ELEMENT_KEY] = True
    212   node = _MkChild(node, symbol_type)
    213   assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
    214   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
    215   node[NODE_TYPE_KEY] = 'b'  # b for bucket
    216 
    217   # 'node' is now the symbol-type bucket. Make the child entry.
    218   node = _MkChild(node, symbol_name)
    219   if NODE_CHILDREN_KEY in node:
    220     if node[NODE_CHILDREN_KEY]:
    221       logging.warning('A container node used as symbol for %s.' % symbol_name)
    222     # This is going to be used as a leaf so no use for child list.
    223     del node[NODE_CHILDREN_KEY]
    224   node[NODE_SYMBOL_SIZE_KEY] = symbol_size
    225   node[NODE_SYMBOL_TYPE_KEY] = symbol_type
    226   node[NODE_TYPE_KEY] = 's'  # s for symbol
    227 
    228   return 2  # Depth of the added subtree.
    229 
    230 
    231 def MakeCompactTree(symbols, symbol_path_origin_dir):
    232   result = {NODE_NAME_KEY: '/',
    233             NODE_CHILDREN_KEY: {},
    234             NODE_TYPE_KEY: 'p',
    235             NODE_MAX_DEPTH_KEY: 0}
    236   seen_symbol_with_path = False
    237   for symbol_name, symbol_type, symbol_size, file_path in symbols:
    238 
    239     if 'vtable for ' in symbol_name:
    240       symbol_type = '@'  # hack to categorize these separately
    241     if file_path and file_path != "??":
    242       seen_symbol_with_path = True
    243     else:
    244       file_path = NAME_NO_PATH_BUCKET
    245 
    246     path_parts = file_path.split('/')
    247 
    248     # Find pre-existing node in tree, or update if it already exists
    249     node = result
    250     depth = 0
    251     while len(path_parts) > 0:
    252       path_part = path_parts.pop(0)
    253       if len(path_part) == 0:
    254         continue
    255       depth += 1
    256       node = _MkChild(node, path_part)
    257       assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
    258       node[NODE_TYPE_KEY] = 'p'  # p for path
    259 
    260     depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
    261     result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
    262 
    263   if not seen_symbol_with_path:
    264     logging.warning('Symbols lack paths. Data will not be structured.')
    265 
    266   # The (no path) bucket can be extremely large if we failed to get
    267   # path information. Split it into subgroups if needed.
    268   SplitNoPathBucket(result)
    269 
    270   largest_list_len = MakeChildrenDictsIntoLists(result)
    271 
    272   if largest_list_len > BIG_BUCKET_LIMIT:
    273     logging.warning('There are sections with %d nodes. '
    274                     'Results might be unusable.' % largest_list_len)
    275   return result
    276 
    277 
    278 # Skia added: summarizes tree size by symbol type for the given root node.
    279 # Returns a dict keyed by symbol type, and value the type's overall size.
    280 # e.g., {"t": 12345, "W": 543}.
    281 def GetTreeSizes(node):
    282   if 'children' not in node or not node['children']:
    283     return {node['t']: node['value']}
    284   dic = {}
    285   for i in node['children']:
    286     for k, v in GetTreeSizes(i).items():
    287       dic.setdefault(k, 0)
    288       dic[k] += v
    289 
    290   return dic
    291 
    292 
    293 # Skia added: creates dict to be converted to JSON in bench format.
    294 # See top of file for the structure description.
    295 def GetBenchDict(githash, tree_root):
    296   dic = {'gitHash': githash,
    297          'key': {'source_type': 'binarysize'},
    298          'results': {},}
    299   for i in tree_root['children']:
    300     if '(No Path)' == i['n']:  # Already at symbol summary level.
    301       for k, v in GetTreeSizes(i).items():
    302         dic['results']['no_path_' + SYMBOL_MAP[k]] = {
    303             'memory': {
    304               'bytes': v,
    305               'options': {'path': 'no_path',
    306                           'symbol': SYMBOL_MAP[k],},}}
    307     else:  # We need to go deeper.
    308       for c in i['children']:
    309         path = i['n'] + '_' + c['n']
    310         for k, v in GetTreeSizes(c).items():
    311           dic['results'][path + '_' + SYMBOL_MAP[k]] = {
    312               'memory': {
    313                 'bytes': v,
    314                 'options': {'path': path,
    315                             'symbol': SYMBOL_MAP[k],}}}
    316 
    317   return dic
    318 
    319 
    320 # Skia added: constructs 'gsutil cp' subprocess command list.
    321 def GetGsCopyCommandList(gsutil, src, dst):
    322   return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a',
    323           'public-read', src, dst]
    324 
    325 
    326 def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil):
    327   tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)
    328   json_data = {'tree_data': tree_root,
    329                'githash': ha,
    330                'commit_ts': ts,
    331                'key': {'source_type': 'binary_size'},
    332                'total_size': sum(GetTreeSizes(tree_root).values()),}
    333   tmpfile = tempfile.NamedTemporaryFile(delete=False).name
    334   with open(tmpfile, 'w') as out:
    335     # Use separators without whitespace to get a smaller file.
    336     json.dump(json_data, out, separators=(',', ':'))
    337 
    338   GS_PREFIX = 'gs://skia-perf/'
    339   # Writes to Google Storage for visualization.
    340   subprocess.check_call(GetGsCopyCommandList(
    341       gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json'))
    342   # Updates the latest data.
    343   if not issue:
    344     subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,
    345                                                GS_PREFIX + 'size/latest.json'))
    346   # Writes an extra copy using year/month/day/hour path for easy ingestion.
    347   with open(tmpfile, 'w') as out:
    348     json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':'))
    349   now = datetime.datetime.utcnow()
    350   ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4),
    351                           str(now.month).zfill(2), str(now.day).zfill(2),
    352                           str(now.hour).zfill(2)))
    353   if issue:
    354     ingest_path = '/'.join('trybot', ingest_path, issue)
    355   subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile,
    356       GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json'))
    357 
    358 
    359 def MakeSourceMap(symbols):
    360   sources = {}
    361   for _sym, _symbol_type, size, path in symbols:
    362     key = None
    363     if path:
    364       key = os.path.normpath(path)
    365     else:
    366       key = '[no path]'
    367     if key not in sources:
    368       sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
    369     record = sources[key]
    370     record['size'] += size
    371     record['symbol_count'] += 1
    372   return sources
    373 
    374 
    375 # Regex for parsing "nm" output. A sample line looks like this:
    376 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
    377 #
    378 # The fields are: address, size, type, name, source location
    379 # Regular expression explained ( see also: https://xkcd.com/208 ):
    380 # ([0-9a-f]{8,}+)   The address
    381 # [\s]+             Whitespace separator
    382 # ([0-9a-f]{8,}+)   The size. From here on out it's all optional.
    383 # [\s]+             Whitespace separator
    384 # (\S?)             The symbol type, which is any non-whitespace char
    385 # [\s*]             Whitespace separator
    386 # ([^\t]*)          Symbol name, any non-tab character (spaces ok!)
    387 # [\t]?             Tab separator
    388 # (.*)              The location (filename[:linennum|?][ (discriminator n)]
    389 sNmPattern = re.compile(
    390   r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
    391 
    392 class Progress():
    393   def __init__(self):
    394     self.count = 0
    395     self.skip_count = 0
    396     self.collisions = 0
    397     self.time_last_output = time.time()
    398     self.count_last_output = 0
    399     self.disambiguations = 0
    400     self.was_ambiguous = 0
    401 
    402 
    403 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
    404                      disambiguate, src_path):
    405   nm_output = RunNm(library, nm_binary)
    406   nm_output_lines = nm_output.splitlines()
    407   nm_output_lines_len = len(nm_output_lines)
    408   address_symbol = {}
    409   progress = Progress()
    410   def map_address_symbol(symbol, addr):
    411     progress.count += 1
    412     if addr in address_symbol:
    413       # 'Collision between %s and %s.' % (str(symbol.name),
    414       #                                   str(address_symbol[addr].name))
    415       progress.collisions += 1
    416     else:
    417       if symbol.disambiguated:
    418         progress.disambiguations += 1
    419       if symbol.was_ambiguous:
    420         progress.was_ambiguous += 1
    421 
    422       address_symbol[addr] = symbol
    423 
    424     progress_output()
    425 
    426   def progress_output():
    427     progress_chunk = 100
    428     if progress.count % progress_chunk == 0:
    429       time_now = time.time()
    430       time_spent = time_now - progress.time_last_output
    431       if time_spent > 1.0:
    432         # Only output at most once per second.
    433         progress.time_last_output = time_now
    434         chunk_size = progress.count - progress.count_last_output
    435         progress.count_last_output = progress.count
    436         if time_spent > 0:
    437           speed = chunk_size / time_spent
    438         else:
    439           speed = 0
    440         progress_percent = (100.0 * (progress.count + progress.skip_count) /
    441                             nm_output_lines_len)
    442         disambiguation_percent = 0
    443         if progress.disambiguations != 0:
    444           disambiguation_percent = (100.0 * progress.disambiguations /
    445                                     progress.was_ambiguous)
    446 
    447         sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '
    448               '%d disambiguations where %.1f%% succeeded)'
    449               ' - %.1f lookups/s.' %
    450               (progress_percent, progress.count, progress.collisions,
    451                progress.disambiguations, disambiguation_percent, speed))
    452 
    453   # In case disambiguation was disabled, we remove the source path (which upon
    454   # being set signals the symbolizer to enable disambiguation)
    455   if not disambiguate:
    456     src_path = None
    457   symbol_path_origin_dir = os.path.dirname(library)
    458   # Skia specific.
    459   symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')
    460   symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
    461                                             map_address_symbol,
    462                                             max_concurrent_jobs=jobs,
    463                                             source_root_path=src_path,
    464                                             prefix_to_remove=symbol_path_prefix)
    465   user_interrupted = False
    466   try:
    467     for line in nm_output_lines:
    468       match = sNmPattern.match(line)
    469       if match:
    470         location = match.group(5)
    471         if not location:
    472           addr = int(match.group(1), 16)
    473           size = int(match.group(2), 16)
    474           if addr in address_symbol:  # Already looked up, shortcut
    475                                       # ELFSymbolizer.
    476             map_address_symbol(address_symbol[addr], addr)
    477             continue
    478           elif size == 0:
    479             # Save time by not looking up empty symbols (do they even exist?)
    480             print('Empty symbol: ' + line)
    481           else:
    482             symbolizer.SymbolizeAsync(addr, addr)
    483             continue
    484 
    485       progress.skip_count += 1
    486   except KeyboardInterrupt:
    487     user_interrupted = True
    488     print('Interrupting - killing subprocesses. Please wait.')
    489 
    490   try:
    491     symbolizer.Join()
    492   except KeyboardInterrupt:
    493     # Don't want to abort here since we will be finished in a few seconds.
    494     user_interrupted = True
    495     print('Patience you must have my young padawan.')
    496 
    497   print ''
    498 
    499   if user_interrupted:
    500     print('Skipping the rest of the file mapping. '
    501           'Output will not be fully classified.')
    502 
    503   symbol_path_origin_dir = os.path.dirname(library)
    504   # Skia specific: path prefix to strip.
    505   symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '')
    506 
    507   with open(outfile, 'w') as out:
    508     for line in nm_output_lines:
    509       match = sNmPattern.match(line)
    510       if match:
    511         location = match.group(5)
    512         if not location:
    513           addr = int(match.group(1), 16)
    514           symbol = address_symbol.get(addr)
    515           if symbol is not None:
    516             path = '??'
    517             if symbol.source_path is not None:
    518               path = symbol.source_path.replace(symbol_path_prefix, '')
    519             line_number = 0
    520             if symbol.source_line is not None:
    521               line_number = symbol.source_line
    522             out.write('%s\t%s:%d\n' % (line, path, line_number))
    523             continue
    524 
    525       out.write('%s\n' % line)
    526 
    527   print('%d symbols in the results.' % len(address_symbol))
    528 
    529 
    530 def RunNm(binary, nm_binary):
    531   cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
    532          binary]
    533   nm_process = subprocess.Popen(cmd,
    534                                 stdout=subprocess.PIPE,
    535                                 stderr=subprocess.PIPE)
    536   (process_output, err_output) = nm_process.communicate()
    537 
    538   if nm_process.returncode != 0:
    539     if err_output:
    540       raise Exception, err_output
    541     else:
    542       raise Exception, process_output
    543 
    544   return process_output
    545 
    546 
    547 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
    548                  addr2line_binary, nm_binary, disambiguate, src_path):
    549   if nm_infile is None:
    550     if outfile is None:
    551       outfile = tempfile.NamedTemporaryFile(delete=False).name
    552 
    553     if verbose:
    554       print 'Running parallel addr2line, dumping symbols to ' + outfile
    555     RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
    556                      disambiguate, src_path)
    557 
    558     nm_infile = outfile
    559 
    560   elif verbose:
    561     print 'Using nm input from ' + nm_infile
    562   with file(nm_infile, 'r') as infile:
    563     return list(binary_size_utils.ParseNm(infile))
    564 
    565 
    566 PAK_RESOURCE_ID_TO_STRING = { "inited": False }
    567 
    568 def LoadPakIdsFromResourceFile(filename):
    569   """Given a file name, it loads everything that looks like a resource id
    570   into PAK_RESOURCE_ID_TO_STRING."""
    571   with open(filename) as resource_header:
    572     for line in resource_header:
    573       if line.startswith("#define "):
    574         line_data = line.split()
    575         if len(line_data) == 3:
    576           try:
    577             resource_number = int(line_data[2])
    578             resource_name = line_data[1]
    579             PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name
    580           except ValueError:
    581             pass
    582 
    583 def GetReadablePakResourceName(pak_file, resource_id):
    584   """Pak resources have a numeric identifier. It is not helpful when
    585   trying to locate where footprint is generated. This does its best to
    586   map the number to a usable string."""
    587   if not PAK_RESOURCE_ID_TO_STRING['inited']:
    588     # Try to find resource header files generated by grit when
    589     # building the pak file. We'll look for files named *resources.h"
    590     # and lines of the type:
    591     #    #define MY_RESOURCE_JS 1234
    592     PAK_RESOURCE_ID_TO_STRING['inited'] = True
    593     gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')
    594     if os.path.isdir(gen_dir):
    595       for dirname, _dirs, files in os.walk(gen_dir):
    596         for filename in files:
    597           if filename.endswith('resources.h'):
    598             LoadPakIdsFromResourceFile(os.path.join(dirname, filename))
    599   return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
    600                                        'Pak Resource %d' % resource_id)
    601 
    602 def AddPakData(symbols, pak_file):
    603   """Adds pseudo-symbols from a pak file."""
    604   pak_file = os.path.abspath(pak_file)
    605   with open(pak_file, 'rb') as pak:
    606     data = pak.read()
    607 
    608   PAK_FILE_VERSION = 4
    609   HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries)
    610                              # and one uint8 (encoding of text resources)
    611   INDEX_ENTRY_SIZE = 2 + 4  # Each entry is a uint16 and a uint32.
    612   version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
    613   assert version == PAK_FILE_VERSION, ('Unsupported pak file '
    614                                        'version (%d) in %s. Only '
    615                                        'support version %d' %
    616                                        (version, pak_file, PAK_FILE_VERSION))
    617   if num_entries > 0:
    618     # Read the index and data.
    619     data = data[HEADER_LENGTH:]
    620     for _ in range(num_entries):
    621       resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
    622       data = data[INDEX_ENTRY_SIZE:]
    623       _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
    624       resource_size = next_offset - offset
    625 
    626       symbol_name = GetReadablePakResourceName(pak_file, resource_id)
    627       symbol_path = pak_file
    628       symbol_type = 'd' # Data. Approximation.
    629       symbol_size = resource_size
    630       symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
    631 
    632 def _find_in_system_path(binary):
    633   """Locate the full path to binary in the system path or return None
    634   if not found."""
    635   system_path = os.environ["PATH"].split(os.pathsep)
    636   for path in system_path:
    637     binary_path = os.path.join(path, binary)
    638     if os.path.isfile(binary_path):
    639       return binary_path
    640   return None
    641 
    642 def CheckDebugFormatSupport(library, addr2line_binary):
    643   """Kills the program if debug data is in an unsupported format.
    644 
    645   There are two common versions of the DWARF debug formats and
    646   since we are right now transitioning from DWARF2 to newer formats,
    647   it's possible to have a mix of tools that are not compatible. Detect
    648   that and abort rather than produce meaningless output."""
    649   tool_output = subprocess.check_output([addr2line_binary, '--version'])
    650   version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
    651   parsed_output = version_re.match(tool_output)
    652   major = int(parsed_output.group(1))
    653   minor = int(parsed_output.group(2))
    654   supports_dwarf4 = major > 2 or major == 2 and minor > 22
    655 
    656   if supports_dwarf4:
    657     return
    658 
    659   print('Checking version of debug information in %s.' % library)
    660   debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
    661                                        '--dwarf-depth=1', library])
    662   dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
    663   parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
    664   version = int(parsed_dwarf_format_output.group(1))
    665   if version > 2:
    666     print('The supplied tools only support DWARF2 debug data but the binary\n' +
    667           'uses DWARF%d. Update the tools or compile the binary\n' % version +
    668           'with -gdwarf-2.')
    669     sys.exit(1)
    670 
    671 
    672 def main():
    673   usage = """%prog [options]
    674 
    675   Runs a spatial analysis on a given library, looking up the source locations
    676   of its symbols and calculating how much space each directory, source file,
    677   and so on is taking. The result is a report that can be used to pinpoint
    678   sources of large portions of the binary, etceteras.
    679 
    680   Under normal circumstances, you only need to pass two arguments, thusly:
    681 
    682       %prog --library /path/to/library --destdir /path/to/output
    683 
    684   In this mode, the program will dump the symbols from the specified library
    685   and map those symbols back to source locations, producing a web-based
    686   report in the specified output directory.
    687 
    688   Other options are available via '--help'.
    689   """
    690   parser = optparse.OptionParser(usage=usage)
    691   parser.add_option('--nm-in', metavar='PATH',
    692                     help='if specified, use nm input from <path> instead of '
    693                     'generating it. Note that source locations should be '
    694                     'present in the file; i.e., no addr2line symbol lookups '
    695                     'will be performed when this option is specified. '
    696                     'Mutually exclusive with --library.')
    697   parser.add_option('--destdir', metavar='PATH',
    698                     help='write output to the specified directory. An HTML '
    699                     'report is generated here along with supporting files; '
    700                     'any existing report will be overwritten. Not used in '
    701                     'Skia.')
    702   parser.add_option('--library', metavar='PATH',
    703                     help='if specified, process symbols in the library at '
    704                     'the specified path. Mutually exclusive with --nm-in.')
    705   parser.add_option('--pak', metavar='PATH',
    706                     help='if specified, includes the contents of the '
    707                     'specified *.pak file in the output.')
    708   parser.add_option('--nm-binary',
    709                     help='use the specified nm binary to analyze library. '
    710                     'This is to be used when the nm in the path is not for '
    711                     'the right architecture or of the right version.')
    712   parser.add_option('--addr2line-binary',
    713                     help='use the specified addr2line binary to analyze '
    714                     'library. This is to be used when the addr2line in '
    715                     'the path is not for the right architecture or '
    716                     'of the right version.')
    717   parser.add_option('--jobs', type='int',
    718                     help='number of jobs to use for the parallel '
    719                     'addr2line processing pool; defaults to 1. More '
    720                     'jobs greatly improve throughput but eat RAM like '
    721                     'popcorn, and take several gigabytes each. Start low '
    722                     'and ramp this number up until your machine begins to '
    723                     'struggle with RAM. '
    724                     'This argument is only valid when using --library.')
    725   parser.add_option('-v', dest='verbose', action='store_true',
    726                     help='be verbose, printing lots of status information.')
    727   parser.add_option('--nm-out', metavar='PATH',
    728                     help='keep the nm output file, and store it at the '
    729                     'specified path. This is useful if you want to see the '
    730                     'fully processed nm output after the symbols have been '
    731                     'mapped to source locations. By default, a tempfile is '
    732                     'used and is deleted when the program terminates.'
    733                     'This argument is only valid when using --library.')
    734   parser.add_option('--legacy', action='store_true',
    735                     help='emit legacy binary size report instead of modern')
    736   parser.add_option('--disable-disambiguation', action='store_true',
    737                     help='disables the disambiguation process altogether,'
    738                     ' NOTE: this may, depending on your toolchain, produce'
    739                     ' output with some symbols at the top layer if addr2line'
    740                     ' could not get the entire source path.')
    741   parser.add_option('--source-path', default='./',
    742                     help='the path to the source code of the output binary, '
    743                     'default set to current directory. Used in the'
    744                     ' disambiguation process.')
    745   parser.add_option('--githash', default='latest',
    746                     help='Git hash for the binary version. Added by Skia.')
    747   parser.add_option('--commit_ts', type='int', default=-1,
    748                     help='Timestamp for the commit. Added by Skia.')
    749   parser.add_option('--issue_number', default='',
    750                     help='The trybot issue number in string. Added by Skia.')
    751   parser.add_option('--gsutil_path', default='gsutil',
    752                     help='Path to gsutil binary. Added by Skia.')
    753   opts, _args = parser.parse_args()
    754 
    755   if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
    756     parser.error('exactly one of --library or --nm-in is required')
    757   if (opts.nm_in):
    758     if opts.jobs:
    759       print >> sys.stderr, ('WARNING: --jobs has no effect '
    760                             'when used with --nm-in')
    761   if not opts.jobs:
    762     # Use the number of processors but cap between 2 and 4 since raw
    763     # CPU power isn't the limiting factor. It's I/O limited, memory
    764     # bus limited and available-memory-limited. Too many processes and
    765     # the computer will run out of memory and it will be slow.
    766     opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
    767 
    768   if opts.addr2line_binary:
    769     assert os.path.isfile(opts.addr2line_binary)
    770     addr2line_binary = opts.addr2line_binary
    771   else:
    772     addr2line_binary = _find_in_system_path('addr2line')
    773     assert addr2line_binary, 'Unable to find addr2line in the path. '\
    774         'Use --addr2line-binary to specify location.'
    775 
    776   if opts.nm_binary:
    777     assert os.path.isfile(opts.nm_binary)
    778     nm_binary = opts.nm_binary
    779   else:
    780     nm_binary = _find_in_system_path('nm')
    781     assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
    782         'to specify location.'
    783 
    784   if opts.pak:
    785     assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak
    786 
    787   print('addr2line: %s' % addr2line_binary)
    788   print('nm: %s' % nm_binary)
    789 
    790   if opts.library:
    791     CheckDebugFormatSupport(opts.library, addr2line_binary)
    792 
    793   symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
    794                          opts.jobs, opts.verbose is True,
    795                          addr2line_binary, nm_binary,
    796                          opts.disable_disambiguation is None,
    797                          opts.source_path)
    798 
    799   if opts.pak:
    800     AddPakData(symbols, opts.pak)
    801 
    802   if opts.legacy: # legacy report
    803     print 'Do Not set legacy flag.'
    804 
    805   else: # modern report
    806     if opts.library:
    807       symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
    808     else:
    809       # Just a guess. Hopefully all paths in the input file are absolute.
    810       symbol_path_origin_dir = os.path.abspath(os.getcwd())
    811     DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash,
    812                     opts.commit_ts, opts.issue_number, opts.gsutil_path)
    813     print 'Report data uploaded to GS.'
    814 
    815 
    816 if __name__ == '__main__':
    817   sys.exit(main())
    818