1 #!/usr/bin/env python 2 # Copyright 2014 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Generate a spatial analysis against an arbitrary library. 7 8 To use, build the 'binary_size_tool' target. Then run this tool, passing 9 in the location of the library to be analyzed along with any other options 10 you desire. 11 """ 12 13 import collections 14 import json 15 import logging 16 import multiprocessing 17 import optparse 18 import os 19 import re 20 import shutil 21 import subprocess 22 import sys 23 import tempfile 24 import time 25 26 import binary_size_utils 27 28 # This path changee is not beautiful. Temporary (I hope) measure until 29 # the chromium project has figured out a proper way to organize the 30 # library of python tools. http://crbug.com/375725 31 elf_symbolizer_path = os.path.abspath(os.path.join( 32 os.path.dirname(__file__), 33 '..', 34 '..', 35 'build', 36 'android', 37 'pylib')) 38 sys.path.append(elf_symbolizer_path) 39 import symbols.elf_symbolizer as elf_symbolizer # pylint: disable=F0401 40 41 42 # Node dictionary keys. These are output in json read by the webapp so 43 # keep them short to save file size. 44 # Note: If these change, the webapp must also change. 45 NODE_TYPE_KEY = 'k' 46 NODE_NAME_KEY = 'n' 47 NODE_CHILDREN_KEY = 'children' 48 NODE_SYMBOL_TYPE_KEY = 't' 49 NODE_SYMBOL_SIZE_KEY = 'value' 50 NODE_MAX_DEPTH_KEY = 'maxDepth' 51 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' 52 53 # The display name of the bucket where we put symbols without path. 54 NAME_NO_PATH_BUCKET = '(No Path)' 55 56 # Try to keep data buckets smaller than this to avoid killing the 57 # graphing lib. 58 BIG_BUCKET_LIMIT = 3000 59 60 61 # TODO(andrewhayden): Only used for legacy reports. Delete. 62 def FormatBytes(byte_count): 63 """Pretty-print a number of bytes.""" 64 if byte_count > 1e6: 65 byte_count = byte_count / 1.0e6 66 return '%.1fm' % byte_count 67 if byte_count > 1e3: 68 byte_count = byte_count / 1.0e3 69 return '%.1fk' % byte_count 70 return str(byte_count) 71 72 73 # TODO(andrewhayden): Only used for legacy reports. Delete. 74 def SymbolTypeToHuman(symbol_type): 75 """Convert a symbol type as printed by nm into a human-readable name.""" 76 return {'b': 'bss', 77 'd': 'data', 78 'r': 'read-only data', 79 't': 'code', 80 'w': 'weak symbol', 81 'v': 'weak symbol'}[symbol_type] 82 83 84 def _MkChild(node, name): 85 child = node[NODE_CHILDREN_KEY].get(name) 86 if child is None: 87 child = {NODE_NAME_KEY: name, 88 NODE_CHILDREN_KEY: {}} 89 node[NODE_CHILDREN_KEY][name] = child 90 return child 91 92 93 94 def SplitNoPathBucket(node): 95 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to 96 handle. Split it into sub-buckets in that case.""" 97 root_children = node[NODE_CHILDREN_KEY] 98 if NAME_NO_PATH_BUCKET in root_children: 99 no_path_bucket = root_children[NAME_NO_PATH_BUCKET] 100 old_children = no_path_bucket[NODE_CHILDREN_KEY] 101 count = 0 102 for symbol_type, symbol_bucket in old_children.iteritems(): 103 count += len(symbol_bucket[NODE_CHILDREN_KEY]) 104 if count > BIG_BUCKET_LIMIT: 105 new_children = {} 106 no_path_bucket[NODE_CHILDREN_KEY] = new_children 107 current_bucket = None 108 index = 0 109 for symbol_type, symbol_bucket in old_children.iteritems(): 110 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): 111 if index % BIG_BUCKET_LIMIT == 0: 112 group_no = (index / BIG_BUCKET_LIMIT) + 1 113 current_bucket = _MkChild(no_path_bucket, 114 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, 115 group_no)) 116 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' 117 node[NODE_TYPE_KEY] = 'p' # p for path 118 index += 1 119 symbol_size = value[NODE_SYMBOL_SIZE_KEY] 120 AddSymbolIntoFileNode(current_bucket, symbol_type, 121 symbol_name, symbol_size) 122 123 124 def MakeChildrenDictsIntoLists(node): 125 largest_list_len = 0 126 if NODE_CHILDREN_KEY in node: 127 largest_list_len = len(node[NODE_CHILDREN_KEY]) 128 child_list = [] 129 for child in node[NODE_CHILDREN_KEY].itervalues(): 130 child_largest_list_len = MakeChildrenDictsIntoLists(child) 131 if child_largest_list_len > largest_list_len: 132 largest_list_len = child_largest_list_len 133 child_list.append(child) 134 node[NODE_CHILDREN_KEY] = child_list 135 136 return largest_list_len 137 138 139 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): 140 """Puts symbol into the file path node |node|. 141 Returns the number of added levels in tree. I.e. returns 2.""" 142 143 # 'node' is the file node and first step is to find its symbol-type bucket. 144 node[NODE_LAST_PATH_ELEMENT_KEY] = True 145 node = _MkChild(node, symbol_type) 146 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' 147 node[NODE_SYMBOL_TYPE_KEY] = symbol_type 148 node[NODE_TYPE_KEY] = 'b' # b for bucket 149 150 # 'node' is now the symbol-type bucket. Make the child entry. 151 node = _MkChild(node, symbol_name) 152 if NODE_CHILDREN_KEY in node: 153 if node[NODE_CHILDREN_KEY]: 154 logging.warning('A container node used as symbol for %s.' % symbol_name) 155 # This is going to be used as a leaf so no use for child list. 156 del node[NODE_CHILDREN_KEY] 157 node[NODE_SYMBOL_SIZE_KEY] = symbol_size 158 node[NODE_SYMBOL_TYPE_KEY] = symbol_type 159 node[NODE_TYPE_KEY] = 's' # s for symbol 160 161 return 2 # Depth of the added subtree. 162 163 164 def MakeCompactTree(symbols): 165 result = {NODE_NAME_KEY: '/', 166 NODE_CHILDREN_KEY: {}, 167 NODE_TYPE_KEY: 'p', 168 NODE_MAX_DEPTH_KEY: 0} 169 seen_symbol_with_path = False 170 for symbol_name, symbol_type, symbol_size, file_path in symbols: 171 172 if 'vtable for ' in symbol_name: 173 symbol_type = '@' # hack to categorize these separately 174 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] 175 if file_path: 176 file_path = os.path.normpath(file_path) 177 seen_symbol_with_path = True 178 else: 179 file_path = NAME_NO_PATH_BUCKET 180 181 if file_path.startswith('/'): 182 file_path = file_path[1:] 183 path_parts = file_path.split('/') 184 185 # Find pre-existing node in tree, or update if it already exists 186 node = result 187 depth = 0 188 while len(path_parts) > 0: 189 path_part = path_parts.pop(0) 190 if len(path_part) == 0: 191 continue 192 depth += 1 193 node = _MkChild(node, path_part) 194 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' 195 node[NODE_TYPE_KEY] = 'p' # p for path 196 197 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) 198 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) 199 200 if not seen_symbol_with_path: 201 logging.warning('Symbols lack paths. Data will not be structured.') 202 203 # The (no path) bucket can be extremely large if we failed to get 204 # path information. Split it into subgroups if needed. 205 SplitNoPathBucket(result) 206 207 largest_list_len = MakeChildrenDictsIntoLists(result) 208 209 if largest_list_len > BIG_BUCKET_LIMIT: 210 logging.warning('There are sections with %d nodes. ' 211 'Results might be unusable.' % largest_list_len) 212 return result 213 214 215 # TODO(andrewhayden): Only used for legacy reports. Delete. 216 def TreeifySymbols(symbols): 217 """Convert symbols into a path-based tree, calculating size information 218 along the way. 219 220 The result is a dictionary that contains two kinds of nodes: 221 1. Leaf nodes, representing source code locations (e.g., c++ files) 222 These nodes have the following dictionary entries: 223 sizes: a dictionary whose keys are categories (such as code, data, 224 vtable, etceteras) and whose values are the size, in bytes, of 225 those categories; 226 size: the total size, in bytes, of all the entries in the sizes dict 227 2. Non-leaf nodes, representing directories 228 These nodes have the following dictionary entries: 229 children: a dictionary whose keys are names (path entries; either 230 directory or file names) and whose values are other nodes; 231 size: the total size, in bytes, of all the leaf nodes that are 232 contained within the children dict (recursively expanded) 233 234 The result object is itself a dictionary that represents the common ancestor 235 of all child nodes, e.g. a path to which all other nodes beneath it are 236 relative. The 'size' attribute of this dict yields the sum of the size of all 237 leaf nodes within the data structure. 238 """ 239 dirs = {'children': {}, 'size': 0} 240 for sym, symbol_type, size, path in symbols: 241 dirs['size'] += size 242 if path: 243 path = os.path.normpath(path) 244 if path.startswith('/'): 245 path = path[1:] 246 247 parts = None 248 if path: 249 parts = path.split('/') 250 251 if parts: 252 assert path 253 file_key = parts.pop() 254 tree = dirs 255 try: 256 # Traverse the tree to the parent of the file node, creating as needed 257 for part in parts: 258 assert part != '' 259 if part not in tree['children']: 260 tree['children'][part] = {'children': {}, 'size': 0} 261 tree = tree['children'][part] 262 tree['size'] += size 263 264 # Get (creating if necessary) the node for the file 265 # This node doesn't have a 'children' attribute 266 if file_key not in tree['children']: 267 tree['children'][file_key] = {'sizes': collections.defaultdict(int), 268 'size': 0} 269 tree = tree['children'][file_key] 270 tree['size'] += size 271 272 # Accumulate size into a bucket within the file 273 symbol_type = symbol_type.lower() 274 if 'vtable for ' in sym: 275 tree['sizes']['[vtable]'] += size 276 elif 'r' == symbol_type: 277 tree['sizes']['[rodata]'] += size 278 elif 'd' == symbol_type: 279 tree['sizes']['[data]'] += size 280 elif 'b' == symbol_type: 281 tree['sizes']['[bss]'] += size 282 elif 't' == symbol_type: 283 # 'text' in binary parlance means 'code'. 284 tree['sizes']['[code]'] += size 285 elif 'w' == symbol_type: 286 tree['sizes']['[weak]'] += size 287 else: 288 tree['sizes']['[other]'] += size 289 except: 290 print >> sys.stderr, sym, parts, file_key 291 raise 292 else: 293 key = 'symbols without paths' 294 if key not in dirs['children']: 295 dirs['children'][key] = {'sizes': collections.defaultdict(int), 296 'size': 0} 297 tree = dirs['children'][key] 298 subkey = 'misc' 299 if (sym.endswith('::__FUNCTION__') or 300 sym.endswith('::__PRETTY_FUNCTION__')): 301 subkey = '__FUNCTION__' 302 elif sym.startswith('CSWTCH.'): 303 subkey = 'CSWTCH' 304 elif '::' in sym: 305 subkey = sym[0:sym.find('::') + 2] 306 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size 307 tree['size'] += size 308 return dirs 309 310 311 # TODO(andrewhayden): Only used for legacy reports. Delete. 312 def JsonifyTree(tree, name): 313 """Convert TreeifySymbols output to a JSON treemap. 314 315 The format is very similar, with the notable exceptions being 316 lists of children instead of maps and some different attribute names.""" 317 children = [] 318 css_class_map = { 319 '[vtable]': 'vtable', 320 '[rodata]': 'read-only_data', 321 '[data]': 'data', 322 '[bss]': 'bss', 323 '[code]': 'code', 324 '[weak]': 'weak_symbol' 325 } 326 if 'children' in tree: 327 # Non-leaf node. Recurse. 328 for child_name, child in tree['children'].iteritems(): 329 children.append(JsonifyTree(child, child_name)) 330 else: 331 # Leaf node; dump per-file stats as entries in the treemap 332 for kind, size in tree['sizes'].iteritems(): 333 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', 334 'data': { '$area': size }} 335 css_class = css_class_map.get(kind) 336 if css_class is not None: 337 child_json['data']['$symbol'] = css_class 338 children.append(child_json) 339 # Sort children by size, largest to smallest. 340 children.sort(key=lambda child: -child['data']['$area']) 341 342 # For leaf nodes, the 'size' attribute is the size of the leaf; 343 # Non-leaf nodes don't really have a size, but their 'size' attribute is 344 # the sum of the sizes of all their children. 345 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', 346 'data': { '$area': tree['size'] }, 347 'children': children } 348 349 def DumpCompactTree(symbols, outfile): 350 tree_root = MakeCompactTree(symbols) 351 with open(outfile, 'w') as out: 352 out.write('var tree_data = ') 353 json.dump(tree_root, out) 354 print('Writing %d bytes json' % os.path.getsize(outfile)) 355 356 357 # TODO(andrewhayden): Only used for legacy reports. Delete. 358 def DumpTreemap(symbols, outfile): 359 dirs = TreeifySymbols(symbols) 360 out = open(outfile, 'w') 361 try: 362 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) 363 finally: 364 out.flush() 365 out.close() 366 367 368 # TODO(andrewhayden): Only used for legacy reports. Delete. 369 def DumpLargestSymbols(symbols, outfile, n): 370 # a list of (sym, symbol_type, size, path); sort by size. 371 symbols = sorted(symbols, key=lambda x: -x[2]) 372 dumped = 0 373 out = open(outfile, 'w') 374 try: 375 out.write('var largestSymbols = [\n') 376 for sym, symbol_type, size, path in symbols: 377 if symbol_type in ('b', 'w'): 378 continue # skip bss and weak symbols 379 if path is None: 380 path = '' 381 entry = {'size': FormatBytes(size), 382 'symbol': sym, 383 'type': SymbolTypeToHuman(symbol_type), 384 'location': path } 385 out.write(json.dumps(entry)) 386 out.write(',\n') 387 dumped += 1 388 if dumped >= n: 389 return 390 finally: 391 out.write('];\n') 392 out.flush() 393 out.close() 394 395 396 def MakeSourceMap(symbols): 397 sources = {} 398 for _sym, _symbol_type, size, path in symbols: 399 key = None 400 if path: 401 key = os.path.normpath(path) 402 else: 403 key = '[no path]' 404 if key not in sources: 405 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} 406 record = sources[key] 407 record['size'] += size 408 record['symbol_count'] += 1 409 return sources 410 411 412 # TODO(andrewhayden): Only used for legacy reports. Delete. 413 def DumpLargestSources(symbols, outfile, n): 414 source_map = MakeSourceMap(symbols) 415 sources = sorted(source_map.values(), key=lambda x: -x['size']) 416 dumped = 0 417 out = open(outfile, 'w') 418 try: 419 out.write('var largestSources = [\n') 420 for record in sources: 421 entry = {'size': FormatBytes(record['size']), 422 'symbol_count': str(record['symbol_count']), 423 'location': record['path']} 424 out.write(json.dumps(entry)) 425 out.write(',\n') 426 dumped += 1 427 if dumped >= n: 428 return 429 finally: 430 out.write('];\n') 431 out.flush() 432 out.close() 433 434 435 # TODO(andrewhayden): Only used for legacy reports. Delete. 436 def DumpLargestVTables(symbols, outfile, n): 437 vtables = [] 438 for symbol, _type, size, path in symbols: 439 if 'vtable for ' in symbol: 440 vtables.append({'symbol': symbol, 'path': path, 'size': size}) 441 vtables = sorted(vtables, key=lambda x: -x['size']) 442 dumped = 0 443 out = open(outfile, 'w') 444 try: 445 out.write('var largestVTables = [\n') 446 for record in vtables: 447 entry = {'size': FormatBytes(record['size']), 448 'symbol': record['symbol'], 449 'location': record['path']} 450 out.write(json.dumps(entry)) 451 out.write(',\n') 452 dumped += 1 453 if dumped >= n: 454 return 455 finally: 456 out.write('];\n') 457 out.flush() 458 out.close() 459 460 461 # Regex for parsing "nm" output. A sample line looks like this: 462 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 463 # 464 # The fields are: address, size, type, name, source location 465 # Regular expression explained ( see also: https://xkcd.com/208 ): 466 # ([0-9a-f]{8,}+) The address 467 # [\s]+ Whitespace separator 468 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. 469 # [\s]+ Whitespace separator 470 # (\S?) The symbol type, which is any non-whitespace char 471 # [\s*] Whitespace separator 472 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) 473 # [\t]? Tab separator 474 # (.*) The location (filename[:linennum|?][ (discriminator n)] 475 sNmPattern = re.compile( 476 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') 477 478 class Progress(): 479 def __init__(self): 480 self.count = 0 481 self.skip_count = 0 482 self.collisions = 0 483 self.time_last_output = time.time() 484 self.count_last_output = 0 485 486 487 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs): 488 nm_output = RunNm(library, nm_binary) 489 nm_output_lines = nm_output.splitlines() 490 nm_output_lines_len = len(nm_output_lines) 491 address_symbol = {} 492 progress = Progress() 493 def map_address_symbol(symbol, addr): 494 progress.count += 1 495 if addr in address_symbol: 496 # 'Collision between %s and %s.' % (str(symbol.name), 497 # str(address_symbol[addr].name)) 498 progress.collisions += 1 499 else: 500 address_symbol[addr] = symbol 501 502 progress_chunk = 100 503 if progress.count % progress_chunk == 0: 504 time_now = time.time() 505 time_spent = time_now - progress.time_last_output 506 if time_spent > 1.0: 507 # Only output at most once per second. 508 progress.time_last_output = time_now 509 chunk_size = progress.count - progress.count_last_output 510 progress.count_last_output = progress.count 511 if time_spent > 0: 512 speed = chunk_size / time_spent 513 else: 514 speed = 0 515 progress_percent = (100.0 * (progress.count + progress.skip_count) / 516 nm_output_lines_len) 517 print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' % 518 (progress_percent, progress.count, progress.collisions, speed)) 519 520 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, 521 map_address_symbol, 522 max_concurrent_jobs=jobs) 523 user_interrupted = False 524 try: 525 for line in nm_output_lines: 526 match = sNmPattern.match(line) 527 if match: 528 location = match.group(5) 529 if not location: 530 addr = int(match.group(1), 16) 531 size = int(match.group(2), 16) 532 if addr in address_symbol: # Already looked up, shortcut 533 # ELFSymbolizer. 534 map_address_symbol(address_symbol[addr], addr) 535 continue 536 elif size == 0: 537 # Save time by not looking up empty symbols (do they even exist?) 538 print('Empty symbol: ' + line) 539 else: 540 symbolizer.SymbolizeAsync(addr, addr) 541 continue 542 543 progress.skip_count += 1 544 except KeyboardInterrupt: 545 user_interrupted = True 546 print('Interrupting - killing subprocesses. Please wait.') 547 548 try: 549 symbolizer.Join() 550 except KeyboardInterrupt: 551 # Don't want to abort here since we will be finished in a few seconds. 552 user_interrupted = True 553 print('Patience you must have my young padawan.') 554 555 if user_interrupted: 556 print('Skipping the rest of the file mapping. ' 557 'Output will not be fully classified.') 558 559 with open(outfile, 'w') as out: 560 for line in nm_output_lines: 561 match = sNmPattern.match(line) 562 if match: 563 location = match.group(5) 564 if not location: 565 addr = int(match.group(1), 16) 566 symbol = address_symbol.get(addr) 567 if symbol is not None: 568 path = '??' 569 if symbol.source_path is not None: 570 path = symbol.source_path 571 line_number = 0 572 if symbol.source_line is not None: 573 line_number = symbol.source_line 574 out.write('%s\t%s:%d\n' % (line, path, line_number)) 575 continue 576 577 out.write('%s\n' % line) 578 579 print('%d symbols in the results.' % len(address_symbol)) 580 581 582 def RunNm(binary, nm_binary): 583 print('Starting nm') 584 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', 585 binary] 586 nm_process = subprocess.Popen(cmd, 587 stdout=subprocess.PIPE, 588 stderr=subprocess.PIPE) 589 (process_output, err_output) = nm_process.communicate() 590 591 if nm_process.returncode != 0: 592 if err_output: 593 raise Exception, err_output 594 else: 595 raise Exception, process_output 596 597 print('Finished nm') 598 return process_output 599 600 601 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, 602 addr2line_binary, nm_binary): 603 if nm_infile is None: 604 if outfile is None: 605 outfile = tempfile.NamedTemporaryFile(delete=False).name 606 607 if verbose: 608 print 'Running parallel addr2line, dumping symbols to ' + outfile 609 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs) 610 611 nm_infile = outfile 612 613 elif verbose: 614 print 'Using nm input from ' + nm_infile 615 with file(nm_infile, 'r') as infile: 616 return list(binary_size_utils.ParseNm(infile)) 617 618 619 def _find_in_system_path(binary): 620 """Locate the full path to binary in the system path or return None 621 if not found.""" 622 system_path = os.environ["PATH"].split(os.pathsep) 623 for path in system_path: 624 binary_path = os.path.join(path, binary) 625 if os.path.isfile(binary_path): 626 return binary_path 627 return None 628 629 def CheckDebugFormatSupport(library, addr2line_binary): 630 """Kills the program if debug data is in an unsupported format. 631 632 There are two common versions of the DWARF debug formats and 633 since we are right now transitioning from DWARF2 to newer formats, 634 it's possible to have a mix of tools that are not compatible. Detect 635 that and abort rather than produce meaningless output.""" 636 tool_output = subprocess.check_output([addr2line_binary, '--version']) 637 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) 638 parsed_output = version_re.match(tool_output) 639 major = int(parsed_output.group(1)) 640 minor = int(parsed_output.group(2)) 641 supports_dwarf4 = major > 2 or major == 2 and minor > 22 642 643 if supports_dwarf4: 644 return 645 646 print('Checking version of debug information in %s.' % library) 647 debug_info = subprocess.check_output(['readelf', '--debug-dump=info', 648 '--dwarf-depth=1', library]) 649 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) 650 parsed_dwarf_format_output = dwarf_version_re.search(debug_info) 651 version = int(parsed_dwarf_format_output.group(1)) 652 if version > 2: 653 print('The supplied tools only support DWARF2 debug data but the binary\n' + 654 'uses DWARF%d. Update the tools or compile the binary\n' % version + 655 'with -gdwarf-2.') 656 sys.exit(1) 657 658 659 def main(): 660 usage = """%prog [options] 661 662 Runs a spatial analysis on a given library, looking up the source locations 663 of its symbols and calculating how much space each directory, source file, 664 and so on is taking. The result is a report that can be used to pinpoint 665 sources of large portions of the binary, etceteras. 666 667 Under normal circumstances, you only need to pass two arguments, thusly: 668 669 %prog --library /path/to/library --destdir /path/to/output 670 671 In this mode, the program will dump the symbols from the specified library 672 and map those symbols back to source locations, producing a web-based 673 report in the specified output directory. 674 675 Other options are available via '--help'. 676 """ 677 parser = optparse.OptionParser(usage=usage) 678 parser.add_option('--nm-in', metavar='PATH', 679 help='if specified, use nm input from <path> instead of ' 680 'generating it. Note that source locations should be ' 681 'present in the file; i.e., no addr2line symbol lookups ' 682 'will be performed when this option is specified. ' 683 'Mutually exclusive with --library.') 684 parser.add_option('--destdir', metavar='PATH', 685 help='write output to the specified directory. An HTML ' 686 'report is generated here along with supporting files; ' 687 'any existing report will be overwritten.') 688 parser.add_option('--library', metavar='PATH', 689 help='if specified, process symbols in the library at ' 690 'the specified path. Mutually exclusive with --nm-in.') 691 parser.add_option('--nm-binary', 692 help='use the specified nm binary to analyze library. ' 693 'This is to be used when the nm in the path is not for ' 694 'the right architecture or of the right version.') 695 parser.add_option('--addr2line-binary', 696 help='use the specified addr2line binary to analyze ' 697 'library. This is to be used when the addr2line in ' 698 'the path is not for the right architecture or ' 699 'of the right version.') 700 parser.add_option('--jobs', type='int', 701 help='number of jobs to use for the parallel ' 702 'addr2line processing pool; defaults to 1. More ' 703 'jobs greatly improve throughput but eat RAM like ' 704 'popcorn, and take several gigabytes each. Start low ' 705 'and ramp this number up until your machine begins to ' 706 'struggle with RAM. ' 707 'This argument is only valid when using --library.') 708 parser.add_option('-v', dest='verbose', action='store_true', 709 help='be verbose, printing lots of status information.') 710 parser.add_option('--nm-out', metavar='PATH', 711 help='keep the nm output file, and store it at the ' 712 'specified path. This is useful if you want to see the ' 713 'fully processed nm output after the symbols have been ' 714 'mapped to source locations. By default, a tempfile is ' 715 'used and is deleted when the program terminates.' 716 'This argument is only valid when using --library.') 717 parser.add_option('--legacy', action='store_true', 718 help='emit legacy binary size report instead of modern') 719 opts, _args = parser.parse_args() 720 721 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): 722 parser.error('exactly one of --library or --nm-in is required') 723 if (opts.nm_in): 724 if opts.jobs: 725 print >> sys.stderr, ('WARNING: --jobs has no effect ' 726 'when used with --nm-in') 727 if not opts.destdir: 728 parser.error('--destdir is required argument') 729 if not opts.jobs: 730 # Use the number of processors but cap between 2 and 4 since raw 731 # CPU power isn't the limiting factor. It's I/O limited, memory 732 # bus limited and available-memory-limited. Too many processes and 733 # the computer will run out of memory and it will be slow. 734 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) 735 736 if opts.addr2line_binary: 737 assert os.path.isfile(opts.addr2line_binary) 738 addr2line_binary = opts.addr2line_binary 739 else: 740 addr2line_binary = _find_in_system_path('addr2line') 741 assert addr2line_binary, 'Unable to find addr2line in the path. '\ 742 'Use --addr2line-binary to specify location.' 743 744 if opts.nm_binary: 745 assert os.path.isfile(opts.nm_binary) 746 nm_binary = opts.nm_binary 747 else: 748 nm_binary = _find_in_system_path('nm') 749 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ 750 'to specify location.' 751 752 print('addr2line: %s' % addr2line_binary) 753 print('nm: %s' % nm_binary) 754 755 CheckDebugFormatSupport(opts.library, addr2line_binary) 756 757 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, 758 opts.jobs, opts.verbose is True, 759 addr2line_binary, nm_binary) 760 if not os.path.exists(opts.destdir): 761 os.makedirs(opts.destdir, 0755) 762 763 764 if opts.legacy: # legacy report 765 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) 766 DumpLargestSymbols(symbols, 767 os.path.join(opts.destdir, 'largest-symbols.js'), 100) 768 DumpLargestSources(symbols, 769 os.path.join(opts.destdir, 'largest-sources.js'), 100) 770 DumpLargestVTables(symbols, 771 os.path.join(opts.destdir, 'largest-vtables.js'), 100) 772 treemap_out = os.path.join(opts.destdir, 'webtreemap') 773 if not os.path.exists(treemap_out): 774 os.makedirs(treemap_out, 0755) 775 treemap_src = os.path.join('third_party', 'webtreemap', 'src') 776 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) 777 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) 778 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) 779 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template', 780 'index.html'), opts.destdir) 781 else: # modern report 782 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js')) 783 d3_out = os.path.join(opts.destdir, 'd3') 784 if not os.path.exists(d3_out): 785 os.makedirs(d3_out, 0755) 786 d3_src = os.path.join(os.path.dirname(__file__), 787 '..', 788 '..', 789 'third_party', 'd3', 'src') 790 template_src = os.path.join(os.path.dirname(__file__), 791 'template') 792 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) 793 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) 794 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) 795 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) 796 797 print 'Report saved to ' + opts.destdir + '/index.html' 798 799 800 if __name__ == '__main__': 801 sys.exit(main()) 802