1 #!/usr/bin/env python 2 # Copyright 2014 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Generate a spatial analysis against an arbitrary library. 7 8 To use, build the 'binary_size_tool' target. Then run this tool, passing 9 in the location of the library to be analyzed along with any other options 10 you desire. 11 """ 12 13 import collections 14 import json 15 import logging 16 import multiprocessing 17 import optparse 18 import os 19 import re 20 import shutil 21 import struct 22 import subprocess 23 import sys 24 import tempfile 25 import time 26 27 import binary_size_utils 28 29 # This path changee is not beautiful. Temporary (I hope) measure until 30 # the chromium project has figured out a proper way to organize the 31 # library of python tools. http://crbug.com/375725 32 elf_symbolizer_path = os.path.abspath(os.path.join( 33 os.path.dirname(__file__), 34 '..', 35 '..', 36 'build', 37 'android', 38 'pylib')) 39 sys.path.append(elf_symbolizer_path) 40 import symbols.elf_symbolizer as elf_symbolizer # pylint: disable=F0401 41 42 43 # Node dictionary keys. These are output in json read by the webapp so 44 # keep them short to save file size. 45 # Note: If these change, the webapp must also change. 46 NODE_TYPE_KEY = 'k' 47 NODE_NAME_KEY = 'n' 48 NODE_CHILDREN_KEY = 'children' 49 NODE_SYMBOL_TYPE_KEY = 't' 50 NODE_SYMBOL_SIZE_KEY = 'value' 51 NODE_MAX_DEPTH_KEY = 'maxDepth' 52 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' 53 54 # The display name of the bucket where we put symbols without path. 55 NAME_NO_PATH_BUCKET = '(No Path)' 56 57 # Try to keep data buckets smaller than this to avoid killing the 58 # graphing lib. 59 BIG_BUCKET_LIMIT = 3000 60 61 62 # TODO(andrewhayden): Only used for legacy reports. Delete. 63 def FormatBytes(byte_count): 64 """Pretty-print a number of bytes.""" 65 if byte_count > 1e6: 66 byte_count = byte_count / 1.0e6 67 return '%.1fm' % byte_count 68 if byte_count > 1e3: 69 byte_count = byte_count / 1.0e3 70 return '%.1fk' % byte_count 71 return str(byte_count) 72 73 74 # TODO(andrewhayden): Only used for legacy reports. Delete. 75 def SymbolTypeToHuman(symbol_type): 76 """Convert a symbol type as printed by nm into a human-readable name.""" 77 return {'b': 'bss', 78 'd': 'data', 79 'r': 'read-only data', 80 't': 'code', 81 'w': 'weak symbol', 82 'v': 'weak symbol'}[symbol_type] 83 84 85 def _MkChild(node, name): 86 child = node[NODE_CHILDREN_KEY].get(name) 87 if child is None: 88 child = {NODE_NAME_KEY: name, 89 NODE_CHILDREN_KEY: {}} 90 node[NODE_CHILDREN_KEY][name] = child 91 return child 92 93 94 95 def SplitNoPathBucket(node): 96 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to 97 handle. Split it into sub-buckets in that case.""" 98 root_children = node[NODE_CHILDREN_KEY] 99 if NAME_NO_PATH_BUCKET in root_children: 100 no_path_bucket = root_children[NAME_NO_PATH_BUCKET] 101 old_children = no_path_bucket[NODE_CHILDREN_KEY] 102 count = 0 103 for symbol_type, symbol_bucket in old_children.iteritems(): 104 count += len(symbol_bucket[NODE_CHILDREN_KEY]) 105 if count > BIG_BUCKET_LIMIT: 106 new_children = {} 107 no_path_bucket[NODE_CHILDREN_KEY] = new_children 108 current_bucket = None 109 index = 0 110 for symbol_type, symbol_bucket in old_children.iteritems(): 111 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): 112 if index % BIG_BUCKET_LIMIT == 0: 113 group_no = (index / BIG_BUCKET_LIMIT) + 1 114 current_bucket = _MkChild(no_path_bucket, 115 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, 116 group_no)) 117 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' 118 node[NODE_TYPE_KEY] = 'p' # p for path 119 index += 1 120 symbol_size = value[NODE_SYMBOL_SIZE_KEY] 121 AddSymbolIntoFileNode(current_bucket, symbol_type, 122 symbol_name, symbol_size) 123 124 125 def MakeChildrenDictsIntoLists(node): 126 largest_list_len = 0 127 if NODE_CHILDREN_KEY in node: 128 largest_list_len = len(node[NODE_CHILDREN_KEY]) 129 child_list = [] 130 for child in node[NODE_CHILDREN_KEY].itervalues(): 131 child_largest_list_len = MakeChildrenDictsIntoLists(child) 132 if child_largest_list_len > largest_list_len: 133 largest_list_len = child_largest_list_len 134 child_list.append(child) 135 node[NODE_CHILDREN_KEY] = child_list 136 137 return largest_list_len 138 139 140 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): 141 """Puts symbol into the file path node |node|. 142 Returns the number of added levels in tree. I.e. returns 2.""" 143 144 # 'node' is the file node and first step is to find its symbol-type bucket. 145 node[NODE_LAST_PATH_ELEMENT_KEY] = True 146 node = _MkChild(node, symbol_type) 147 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' 148 node[NODE_SYMBOL_TYPE_KEY] = symbol_type 149 node[NODE_TYPE_KEY] = 'b' # b for bucket 150 151 # 'node' is now the symbol-type bucket. Make the child entry. 152 node = _MkChild(node, symbol_name) 153 if NODE_CHILDREN_KEY in node: 154 if node[NODE_CHILDREN_KEY]: 155 logging.warning('A container node used as symbol for %s.' % symbol_name) 156 # This is going to be used as a leaf so no use for child list. 157 del node[NODE_CHILDREN_KEY] 158 node[NODE_SYMBOL_SIZE_KEY] = symbol_size 159 node[NODE_SYMBOL_TYPE_KEY] = symbol_type 160 node[NODE_TYPE_KEY] = 's' # s for symbol 161 162 return 2 # Depth of the added subtree. 163 164 165 def MakeCompactTree(symbols, symbol_path_origin_dir): 166 result = {NODE_NAME_KEY: '/', 167 NODE_CHILDREN_KEY: {}, 168 NODE_TYPE_KEY: 'p', 169 NODE_MAX_DEPTH_KEY: 0} 170 seen_symbol_with_path = False 171 cwd = os.path.abspath(os.getcwd()) 172 for symbol_name, symbol_type, symbol_size, file_path in symbols: 173 174 if 'vtable for ' in symbol_name: 175 symbol_type = '@' # hack to categorize these separately 176 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] 177 if file_path and file_path != "??": 178 file_path = os.path.abspath(os.path.join(symbol_path_origin_dir, 179 file_path)) 180 # Let the output structure be relative to $CWD if inside $CWD, 181 # otherwise relative to the disk root. This is to avoid 182 # unnecessary click-through levels in the output. 183 if file_path.startswith(cwd + os.sep): 184 file_path = file_path[len(cwd):] 185 if file_path.startswith('/'): 186 file_path = file_path[1:] 187 seen_symbol_with_path = True 188 else: 189 file_path = NAME_NO_PATH_BUCKET 190 191 path_parts = file_path.split('/') 192 193 # Find pre-existing node in tree, or update if it already exists 194 node = result 195 depth = 0 196 while len(path_parts) > 0: 197 path_part = path_parts.pop(0) 198 if len(path_part) == 0: 199 continue 200 depth += 1 201 node = _MkChild(node, path_part) 202 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' 203 node[NODE_TYPE_KEY] = 'p' # p for path 204 205 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) 206 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) 207 208 if not seen_symbol_with_path: 209 logging.warning('Symbols lack paths. Data will not be structured.') 210 211 # The (no path) bucket can be extremely large if we failed to get 212 # path information. Split it into subgroups if needed. 213 SplitNoPathBucket(result) 214 215 largest_list_len = MakeChildrenDictsIntoLists(result) 216 217 if largest_list_len > BIG_BUCKET_LIMIT: 218 logging.warning('There are sections with %d nodes. ' 219 'Results might be unusable.' % largest_list_len) 220 return result 221 222 223 # TODO(andrewhayden): Only used for legacy reports. Delete. 224 def TreeifySymbols(symbols): 225 """Convert symbols into a path-based tree, calculating size information 226 along the way. 227 228 The result is a dictionary that contains two kinds of nodes: 229 1. Leaf nodes, representing source code locations (e.g., c++ files) 230 These nodes have the following dictionary entries: 231 sizes: a dictionary whose keys are categories (such as code, data, 232 vtable, etceteras) and whose values are the size, in bytes, of 233 those categories; 234 size: the total size, in bytes, of all the entries in the sizes dict 235 2. Non-leaf nodes, representing directories 236 These nodes have the following dictionary entries: 237 children: a dictionary whose keys are names (path entries; either 238 directory or file names) and whose values are other nodes; 239 size: the total size, in bytes, of all the leaf nodes that are 240 contained within the children dict (recursively expanded) 241 242 The result object is itself a dictionary that represents the common ancestor 243 of all child nodes, e.g. a path to which all other nodes beneath it are 244 relative. The 'size' attribute of this dict yields the sum of the size of all 245 leaf nodes within the data structure. 246 """ 247 dirs = {'children': {}, 'size': 0} 248 for sym, symbol_type, size, path in symbols: 249 dirs['size'] += size 250 if path: 251 path = os.path.normpath(path) 252 if path.startswith('/'): 253 path = path[1:] 254 255 parts = None 256 if path: 257 parts = path.split('/') 258 259 if parts: 260 assert path 261 file_key = parts.pop() 262 tree = dirs 263 try: 264 # Traverse the tree to the parent of the file node, creating as needed 265 for part in parts: 266 assert part != '' 267 if part not in tree['children']: 268 tree['children'][part] = {'children': {}, 'size': 0} 269 tree = tree['children'][part] 270 tree['size'] += size 271 272 # Get (creating if necessary) the node for the file 273 # This node doesn't have a 'children' attribute 274 if file_key not in tree['children']: 275 tree['children'][file_key] = {'sizes': collections.defaultdict(int), 276 'size': 0} 277 tree = tree['children'][file_key] 278 tree['size'] += size 279 280 # Accumulate size into a bucket within the file 281 symbol_type = symbol_type.lower() 282 if 'vtable for ' in sym: 283 tree['sizes']['[vtable]'] += size 284 elif 'r' == symbol_type: 285 tree['sizes']['[rodata]'] += size 286 elif 'd' == symbol_type: 287 tree['sizes']['[data]'] += size 288 elif 'b' == symbol_type: 289 tree['sizes']['[bss]'] += size 290 elif 't' == symbol_type: 291 # 'text' in binary parlance means 'code'. 292 tree['sizes']['[code]'] += size 293 elif 'w' == symbol_type: 294 tree['sizes']['[weak]'] += size 295 else: 296 tree['sizes']['[other]'] += size 297 except: 298 print >> sys.stderr, sym, parts, file_key 299 raise 300 else: 301 key = 'symbols without paths' 302 if key not in dirs['children']: 303 dirs['children'][key] = {'sizes': collections.defaultdict(int), 304 'size': 0} 305 tree = dirs['children'][key] 306 subkey = 'misc' 307 if (sym.endswith('::__FUNCTION__') or 308 sym.endswith('::__PRETTY_FUNCTION__')): 309 subkey = '__FUNCTION__' 310 elif sym.startswith('CSWTCH.'): 311 subkey = 'CSWTCH' 312 elif '::' in sym: 313 subkey = sym[0:sym.find('::') + 2] 314 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size 315 tree['size'] += size 316 return dirs 317 318 319 # TODO(andrewhayden): Only used for legacy reports. Delete. 320 def JsonifyTree(tree, name): 321 """Convert TreeifySymbols output to a JSON treemap. 322 323 The format is very similar, with the notable exceptions being 324 lists of children instead of maps and some different attribute names.""" 325 children = [] 326 css_class_map = { 327 '[vtable]': 'vtable', 328 '[rodata]': 'read-only_data', 329 '[data]': 'data', 330 '[bss]': 'bss', 331 '[code]': 'code', 332 '[weak]': 'weak_symbol' 333 } 334 if 'children' in tree: 335 # Non-leaf node. Recurse. 336 for child_name, child in tree['children'].iteritems(): 337 children.append(JsonifyTree(child, child_name)) 338 else: 339 # Leaf node; dump per-file stats as entries in the treemap 340 for kind, size in tree['sizes'].iteritems(): 341 child_json = {'name': kind + ' (' + FormatBytes(size) + ')', 342 'data': { '$area': size }} 343 css_class = css_class_map.get(kind) 344 if css_class is not None: 345 child_json['data']['$symbol'] = css_class 346 children.append(child_json) 347 # Sort children by size, largest to smallest. 348 children.sort(key=lambda child: -child['data']['$area']) 349 350 # For leaf nodes, the 'size' attribute is the size of the leaf; 351 # Non-leaf nodes don't really have a size, but their 'size' attribute is 352 # the sum of the sizes of all their children. 353 return {'name': name + ' (' + FormatBytes(tree['size']) + ')', 354 'data': { '$area': tree['size'] }, 355 'children': children } 356 357 def DumpCompactTree(symbols, symbol_path_origin_dir, outfile): 358 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) 359 with open(outfile, 'w') as out: 360 out.write('var tree_data=') 361 # Use separators without whitespace to get a smaller file. 362 json.dump(tree_root, out, separators=(',', ':')) 363 print('Writing %d bytes json' % os.path.getsize(outfile)) 364 365 366 # TODO(andrewhayden): Only used for legacy reports. Delete. 367 def DumpTreemap(symbols, outfile): 368 dirs = TreeifySymbols(symbols) 369 out = open(outfile, 'w') 370 try: 371 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/'))) 372 finally: 373 out.flush() 374 out.close() 375 376 377 # TODO(andrewhayden): Only used for legacy reports. Delete. 378 def DumpLargestSymbols(symbols, outfile, n): 379 # a list of (sym, symbol_type, size, path); sort by size. 380 symbols = sorted(symbols, key=lambda x: -x[2]) 381 dumped = 0 382 out = open(outfile, 'w') 383 try: 384 out.write('var largestSymbols = [\n') 385 for sym, symbol_type, size, path in symbols: 386 if symbol_type in ('b', 'w'): 387 continue # skip bss and weak symbols 388 if path is None: 389 path = '' 390 entry = {'size': FormatBytes(size), 391 'symbol': sym, 392 'type': SymbolTypeToHuman(symbol_type), 393 'location': path } 394 out.write(json.dumps(entry)) 395 out.write(',\n') 396 dumped += 1 397 if dumped >= n: 398 return 399 finally: 400 out.write('];\n') 401 out.flush() 402 out.close() 403 404 405 def MakeSourceMap(symbols): 406 sources = {} 407 for _sym, _symbol_type, size, path in symbols: 408 key = None 409 if path: 410 key = os.path.normpath(path) 411 else: 412 key = '[no path]' 413 if key not in sources: 414 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} 415 record = sources[key] 416 record['size'] += size 417 record['symbol_count'] += 1 418 return sources 419 420 421 # TODO(andrewhayden): Only used for legacy reports. Delete. 422 def DumpLargestSources(symbols, outfile, n): 423 source_map = MakeSourceMap(symbols) 424 sources = sorted(source_map.values(), key=lambda x: -x['size']) 425 dumped = 0 426 out = open(outfile, 'w') 427 try: 428 out.write('var largestSources = [\n') 429 for record in sources: 430 entry = {'size': FormatBytes(record['size']), 431 'symbol_count': str(record['symbol_count']), 432 'location': record['path']} 433 out.write(json.dumps(entry)) 434 out.write(',\n') 435 dumped += 1 436 if dumped >= n: 437 return 438 finally: 439 out.write('];\n') 440 out.flush() 441 out.close() 442 443 444 # TODO(andrewhayden): Only used for legacy reports. Delete. 445 def DumpLargestVTables(symbols, outfile, n): 446 vtables = [] 447 for symbol, _type, size, path in symbols: 448 if 'vtable for ' in symbol: 449 vtables.append({'symbol': symbol, 'path': path, 'size': size}) 450 vtables = sorted(vtables, key=lambda x: -x['size']) 451 dumped = 0 452 out = open(outfile, 'w') 453 try: 454 out.write('var largestVTables = [\n') 455 for record in vtables: 456 entry = {'size': FormatBytes(record['size']), 457 'symbol': record['symbol'], 458 'location': record['path']} 459 out.write(json.dumps(entry)) 460 out.write(',\n') 461 dumped += 1 462 if dumped >= n: 463 return 464 finally: 465 out.write('];\n') 466 out.flush() 467 out.close() 468 469 470 # Regex for parsing "nm" output. A sample line looks like this: 471 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 472 # 473 # The fields are: address, size, type, name, source location 474 # Regular expression explained ( see also: https://xkcd.com/208 ): 475 # ([0-9a-f]{8,}+) The address 476 # [\s]+ Whitespace separator 477 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. 478 # [\s]+ Whitespace separator 479 # (\S?) The symbol type, which is any non-whitespace char 480 # [\s*] Whitespace separator 481 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) 482 # [\t]? Tab separator 483 # (.*) The location (filename[:linennum|?][ (discriminator n)] 484 sNmPattern = re.compile( 485 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') 486 487 class Progress(): 488 def __init__(self): 489 self.count = 0 490 self.skip_count = 0 491 self.collisions = 0 492 self.time_last_output = time.time() 493 self.count_last_output = 0 494 self.disambiguations = 0 495 self.was_ambiguous = 0 496 497 498 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, 499 disambiguate, src_path): 500 nm_output = RunNm(library, nm_binary) 501 nm_output_lines = nm_output.splitlines() 502 nm_output_lines_len = len(nm_output_lines) 503 address_symbol = {} 504 progress = Progress() 505 def map_address_symbol(symbol, addr): 506 progress.count += 1 507 if addr in address_symbol: 508 # 'Collision between %s and %s.' % (str(symbol.name), 509 # str(address_symbol[addr].name)) 510 progress.collisions += 1 511 else: 512 if symbol.disambiguated: 513 progress.disambiguations += 1 514 if symbol.was_ambiguous: 515 progress.was_ambiguous += 1 516 517 address_symbol[addr] = symbol 518 519 progress_output() 520 521 def progress_output(): 522 progress_chunk = 100 523 if progress.count % progress_chunk == 0: 524 time_now = time.time() 525 time_spent = time_now - progress.time_last_output 526 if time_spent > 1.0: 527 # Only output at most once per second. 528 progress.time_last_output = time_now 529 chunk_size = progress.count - progress.count_last_output 530 progress.count_last_output = progress.count 531 if time_spent > 0: 532 speed = chunk_size / time_spent 533 else: 534 speed = 0 535 progress_percent = (100.0 * (progress.count + progress.skip_count) / 536 nm_output_lines_len) 537 disambiguation_percent = 0 538 if progress.disambiguations != 0: 539 disambiguation_percent = (100.0 * progress.disambiguations / 540 progress.was_ambiguous) 541 542 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' 543 '%d disambiguations where %.1f%% succeeded)' 544 ' - %.1f lookups/s.' % 545 (progress_percent, progress.count, progress.collisions, 546 progress.disambiguations, disambiguation_percent, speed)) 547 548 # In case disambiguation was disabled, we remove the source path (which upon 549 # being set signals the symbolizer to enable disambiguation) 550 if not disambiguate: 551 src_path = None 552 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, 553 map_address_symbol, 554 max_concurrent_jobs=jobs, 555 source_root_path=src_path) 556 user_interrupted = False 557 try: 558 for line in nm_output_lines: 559 match = sNmPattern.match(line) 560 if match: 561 location = match.group(5) 562 if not location: 563 addr = int(match.group(1), 16) 564 size = int(match.group(2), 16) 565 if addr in address_symbol: # Already looked up, shortcut 566 # ELFSymbolizer. 567 map_address_symbol(address_symbol[addr], addr) 568 continue 569 elif size == 0: 570 # Save time by not looking up empty symbols (do they even exist?) 571 print('Empty symbol: ' + line) 572 else: 573 symbolizer.SymbolizeAsync(addr, addr) 574 continue 575 576 progress.skip_count += 1 577 except KeyboardInterrupt: 578 user_interrupted = True 579 print('Interrupting - killing subprocesses. Please wait.') 580 581 try: 582 symbolizer.Join() 583 except KeyboardInterrupt: 584 # Don't want to abort here since we will be finished in a few seconds. 585 user_interrupted = True 586 print('Patience you must have my young padawan.') 587 588 print '' 589 590 if user_interrupted: 591 print('Skipping the rest of the file mapping. ' 592 'Output will not be fully classified.') 593 594 symbol_path_origin_dir = os.path.dirname(os.path.abspath(library)) 595 596 with open(outfile, 'w') as out: 597 for line in nm_output_lines: 598 match = sNmPattern.match(line) 599 if match: 600 location = match.group(5) 601 if not location: 602 addr = int(match.group(1), 16) 603 symbol = address_symbol.get(addr) 604 if symbol is not None: 605 path = '??' 606 if symbol.source_path is not None: 607 path = os.path.abspath(os.path.join(symbol_path_origin_dir, 608 symbol.source_path)) 609 line_number = 0 610 if symbol.source_line is not None: 611 line_number = symbol.source_line 612 out.write('%s\t%s:%d\n' % (line, path, line_number)) 613 continue 614 615 out.write('%s\n' % line) 616 617 print('%d symbols in the results.' % len(address_symbol)) 618 619 620 def RunNm(binary, nm_binary): 621 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', 622 binary] 623 nm_process = subprocess.Popen(cmd, 624 stdout=subprocess.PIPE, 625 stderr=subprocess.PIPE) 626 (process_output, err_output) = nm_process.communicate() 627 628 if nm_process.returncode != 0: 629 if err_output: 630 raise Exception, err_output 631 else: 632 raise Exception, process_output 633 634 return process_output 635 636 637 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, 638 addr2line_binary, nm_binary, disambiguate, src_path): 639 if nm_infile is None: 640 if outfile is None: 641 outfile = tempfile.NamedTemporaryFile(delete=False).name 642 643 if verbose: 644 print 'Running parallel addr2line, dumping symbols to ' + outfile 645 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, 646 disambiguate, src_path) 647 648 nm_infile = outfile 649 650 elif verbose: 651 print 'Using nm input from ' + nm_infile 652 with file(nm_infile, 'r') as infile: 653 return list(binary_size_utils.ParseNm(infile)) 654 655 656 PAK_RESOURCE_ID_TO_STRING = { "inited": False } 657 658 def LoadPakIdsFromResourceFile(filename): 659 """Given a file name, it loads everything that looks like a resource id 660 into PAK_RESOURCE_ID_TO_STRING.""" 661 with open(filename) as resource_header: 662 for line in resource_header: 663 if line.startswith("#define "): 664 line_data = line.split() 665 if len(line_data) == 3: 666 try: 667 resource_number = int(line_data[2]) 668 resource_name = line_data[1] 669 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name 670 except ValueError: 671 pass 672 673 def GetReadablePakResourceName(pak_file, resource_id): 674 """Pak resources have a numeric identifier. It is not helpful when 675 trying to locate where footprint is generated. This does its best to 676 map the number to a usable string.""" 677 if not PAK_RESOURCE_ID_TO_STRING['inited']: 678 # Try to find resource header files generated by grit when 679 # building the pak file. We'll look for files named *resources.h" 680 # and lines of the type: 681 # #define MY_RESOURCE_JS 1234 682 PAK_RESOURCE_ID_TO_STRING['inited'] = True 683 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') 684 if os.path.isdir(gen_dir): 685 for dirname, _dirs, files in os.walk(gen_dir): 686 for filename in files: 687 if filename.endswith('resources.h'): 688 LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) 689 return PAK_RESOURCE_ID_TO_STRING.get(resource_id, 690 'Pak Resource %d' % resource_id) 691 692 def AddPakData(symbols, pak_file): 693 """Adds pseudo-symbols from a pak file.""" 694 pak_file = os.path.abspath(pak_file) 695 with open(pak_file, 'rb') as pak: 696 data = pak.read() 697 698 PAK_FILE_VERSION = 4 699 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) 700 # and one uint8 (encoding of text resources) 701 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. 702 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) 703 assert version == PAK_FILE_VERSION, ('Unsupported pak file ' 704 'version (%d) in %s. Only ' 705 'support version %d' % 706 (version, pak_file, PAK_FILE_VERSION)) 707 if num_entries > 0: 708 # Read the index and data. 709 data = data[HEADER_LENGTH:] 710 for _ in range(num_entries): 711 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) 712 data = data[INDEX_ENTRY_SIZE:] 713 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) 714 resource_size = next_offset - offset 715 716 symbol_name = GetReadablePakResourceName(pak_file, resource_id) 717 symbol_path = pak_file 718 symbol_type = 'd' # Data. Approximation. 719 symbol_size = resource_size 720 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) 721 722 def _find_in_system_path(binary): 723 """Locate the full path to binary in the system path or return None 724 if not found.""" 725 system_path = os.environ["PATH"].split(os.pathsep) 726 for path in system_path: 727 binary_path = os.path.join(path, binary) 728 if os.path.isfile(binary_path): 729 return binary_path 730 return None 731 732 def CheckDebugFormatSupport(library, addr2line_binary): 733 """Kills the program if debug data is in an unsupported format. 734 735 There are two common versions of the DWARF debug formats and 736 since we are right now transitioning from DWARF2 to newer formats, 737 it's possible to have a mix of tools that are not compatible. Detect 738 that and abort rather than produce meaningless output.""" 739 tool_output = subprocess.check_output([addr2line_binary, '--version']) 740 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) 741 parsed_output = version_re.match(tool_output) 742 major = int(parsed_output.group(1)) 743 minor = int(parsed_output.group(2)) 744 supports_dwarf4 = major > 2 or major == 2 and minor > 22 745 746 if supports_dwarf4: 747 return 748 749 print('Checking version of debug information in %s.' % library) 750 debug_info = subprocess.check_output(['readelf', '--debug-dump=info', 751 '--dwarf-depth=1', library]) 752 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) 753 parsed_dwarf_format_output = dwarf_version_re.search(debug_info) 754 version = int(parsed_dwarf_format_output.group(1)) 755 if version > 2: 756 print('The supplied tools only support DWARF2 debug data but the binary\n' + 757 'uses DWARF%d. Update the tools or compile the binary\n' % version + 758 'with -gdwarf-2.') 759 sys.exit(1) 760 761 762 def main(): 763 usage = """%prog [options] 764 765 Runs a spatial analysis on a given library, looking up the source locations 766 of its symbols and calculating how much space each directory, source file, 767 and so on is taking. The result is a report that can be used to pinpoint 768 sources of large portions of the binary, etceteras. 769 770 Under normal circumstances, you only need to pass two arguments, thusly: 771 772 %prog --library /path/to/library --destdir /path/to/output 773 774 In this mode, the program will dump the symbols from the specified library 775 and map those symbols back to source locations, producing a web-based 776 report in the specified output directory. 777 778 Other options are available via '--help'. 779 """ 780 parser = optparse.OptionParser(usage=usage) 781 parser.add_option('--nm-in', metavar='PATH', 782 help='if specified, use nm input from <path> instead of ' 783 'generating it. Note that source locations should be ' 784 'present in the file; i.e., no addr2line symbol lookups ' 785 'will be performed when this option is specified. ' 786 'Mutually exclusive with --library.') 787 parser.add_option('--destdir', metavar='PATH', 788 help='write output to the specified directory. An HTML ' 789 'report is generated here along with supporting files; ' 790 'any existing report will be overwritten.') 791 parser.add_option('--library', metavar='PATH', 792 help='if specified, process symbols in the library at ' 793 'the specified path. Mutually exclusive with --nm-in.') 794 parser.add_option('--pak', metavar='PATH', 795 help='if specified, includes the contents of the ' 796 'specified *.pak file in the output.') 797 parser.add_option('--nm-binary', 798 help='use the specified nm binary to analyze library. ' 799 'This is to be used when the nm in the path is not for ' 800 'the right architecture or of the right version.') 801 parser.add_option('--addr2line-binary', 802 help='use the specified addr2line binary to analyze ' 803 'library. This is to be used when the addr2line in ' 804 'the path is not for the right architecture or ' 805 'of the right version.') 806 parser.add_option('--jobs', type='int', 807 help='number of jobs to use for the parallel ' 808 'addr2line processing pool; defaults to 1. More ' 809 'jobs greatly improve throughput but eat RAM like ' 810 'popcorn, and take several gigabytes each. Start low ' 811 'and ramp this number up until your machine begins to ' 812 'struggle with RAM. ' 813 'This argument is only valid when using --library.') 814 parser.add_option('-v', dest='verbose', action='store_true', 815 help='be verbose, printing lots of status information.') 816 parser.add_option('--nm-out', metavar='PATH', 817 help='keep the nm output file, and store it at the ' 818 'specified path. This is useful if you want to see the ' 819 'fully processed nm output after the symbols have been ' 820 'mapped to source locations. By default, a tempfile is ' 821 'used and is deleted when the program terminates.' 822 'This argument is only valid when using --library.') 823 parser.add_option('--legacy', action='store_true', 824 help='emit legacy binary size report instead of modern') 825 parser.add_option('--disable-disambiguation', action='store_true', 826 help='disables the disambiguation process altogether,' 827 ' NOTE: this may, depending on your toolchain, produce' 828 ' output with some symbols at the top layer if addr2line' 829 ' could not get the entire source path.') 830 parser.add_option('--source-path', default='./', 831 help='the path to the source code of the output binary, ' 832 'default set to current directory. Used in the' 833 ' disambiguation process.') 834 opts, _args = parser.parse_args() 835 836 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): 837 parser.error('exactly one of --library or --nm-in is required') 838 if (opts.nm_in): 839 if opts.jobs: 840 print >> sys.stderr, ('WARNING: --jobs has no effect ' 841 'when used with --nm-in') 842 if not opts.destdir: 843 parser.error('--destdir is required argument') 844 if not opts.jobs: 845 # Use the number of processors but cap between 2 and 4 since raw 846 # CPU power isn't the limiting factor. It's I/O limited, memory 847 # bus limited and available-memory-limited. Too many processes and 848 # the computer will run out of memory and it will be slow. 849 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) 850 851 if opts.addr2line_binary: 852 assert os.path.isfile(opts.addr2line_binary) 853 addr2line_binary = opts.addr2line_binary 854 else: 855 addr2line_binary = _find_in_system_path('addr2line') 856 assert addr2line_binary, 'Unable to find addr2line in the path. '\ 857 'Use --addr2line-binary to specify location.' 858 859 if opts.nm_binary: 860 assert os.path.isfile(opts.nm_binary) 861 nm_binary = opts.nm_binary 862 else: 863 nm_binary = _find_in_system_path('nm') 864 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ 865 'to specify location.' 866 867 if opts.pak: 868 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak 869 870 print('addr2line: %s' % addr2line_binary) 871 print('nm: %s' % nm_binary) 872 873 if opts.library: 874 CheckDebugFormatSupport(opts.library, addr2line_binary) 875 876 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, 877 opts.jobs, opts.verbose is True, 878 addr2line_binary, nm_binary, 879 opts.disable_disambiguation is None, 880 opts.source_path) 881 882 if opts.pak: 883 AddPakData(symbols, opts.pak) 884 885 if not os.path.exists(opts.destdir): 886 os.makedirs(opts.destdir, 0755) 887 888 889 if opts.legacy: # legacy report 890 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js')) 891 DumpLargestSymbols(symbols, 892 os.path.join(opts.destdir, 'largest-symbols.js'), 100) 893 DumpLargestSources(symbols, 894 os.path.join(opts.destdir, 'largest-sources.js'), 100) 895 DumpLargestVTables(symbols, 896 os.path.join(opts.destdir, 'largest-vtables.js'), 100) 897 treemap_out = os.path.join(opts.destdir, 'webtreemap') 898 if not os.path.exists(treemap_out): 899 os.makedirs(treemap_out, 0755) 900 treemap_src = os.path.join('third_party', 'webtreemap', 'src') 901 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out) 902 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out) 903 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out) 904 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template', 905 'index.html'), opts.destdir) 906 else: # modern report 907 if opts.library: 908 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) 909 else: 910 # Just a guess. Hopefully all paths in the input file are absolute. 911 symbol_path_origin_dir = os.path.abspath(os.getcwd()) 912 data_js_file_name = os.path.join(opts.destdir, 'data.js') 913 DumpCompactTree(symbols, symbol_path_origin_dir, data_js_file_name) 914 d3_out = os.path.join(opts.destdir, 'd3') 915 if not os.path.exists(d3_out): 916 os.makedirs(d3_out, 0755) 917 d3_src = os.path.join(os.path.dirname(__file__), 918 '..', 919 '..', 920 'third_party', 'd3', 'src') 921 template_src = os.path.join(os.path.dirname(__file__), 922 'template') 923 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) 924 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) 925 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) 926 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) 927 928 print 'Report saved to ' + opts.destdir + '/index.html' 929 930 931 if __name__ == '__main__': 932 sys.exit(main()) 933