1 #!/usr/bin/env python 2 # Copyright 2014 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Generate a spatial analysis against an arbitrary library. 7 8 Adapted for Skia's use case from 9 chromium/src/tools/binary_size/run_binary_size_analysis.py. Main changes: 10 11 -- Cleans up some deprecated codes. 12 -- Always use relative code path so the tree root is Skia repo's root. 13 -- Instead of outputting the standalone HTML/CSS/JS filesets, writes the 14 TreeMap JSON data into a Google Storage bucket. 15 -- Adds githash and total_size to the JSON data. 16 -- Outputs another summary data in JSON Bench format for skiaperf ingestion. 17 18 The output JSON data for visualization is in the following format: 19 20 { 21 "githash": 123abc, 22 "commit_ts": 1234567890, 23 "total_size": 1234567, 24 "key": {"source_type": "binary_size"}, 25 "tree_data": { 26 "maxDepth": 9, 27 "k": "p", "children":[ 28 {"k":"p","children":[ 29 {"k":"p","children":[ 30 {"k":"p","lastPathElement":true,"children":[ 31 {"k":"b","t":"t","children":[ 32 {"k":"s", "t":"t", "value":4029, 33 "n":"etc_encode_subblock_helper(unsigned char const*, ...)" 34 }, 35 ...... 36 } 37 } 38 39 Another JSON file is generated for size summaries to be used in skiaperf. The 40 JSON format details can be found at: 41 https://github.com/google/skia/blob/master/bench/ResultsWriter.h#L54 42 and: 43 https://skia.googlesource.com/buildbot/+/master/perf/go/ingester/nanobench.go 44 45 In the binary size case, outputs look like: 46 47 { 48 "gitHash": "123abc", 49 "key": { 50 "source_type": "binarysize" 51 } 52 "results: { 53 "src_lazy_global_weak_symbol": { 54 "memory": { 55 "bytes": 41, 56 "options": { 57 "path": "src_lazy", 58 "symbol": "global_weak_symbol" 59 } 60 } 61 }, 62 "src_lazy_global_read_only_data": { 63 "memory": { 64 "bytes": 13476, 65 "options": { 66 "path": "src_lazy", 67 "symbol": "global_read_only_data" 68 } 69 } 70 }, 71 ... 72 } 73 } 74 75 """ 76 77 import collections 78 import datetime 79 import json 80 import logging 81 import multiprocessing 82 import optparse 83 import os 84 import re 85 import shutil 86 import struct 87 import subprocess 88 import sys 89 import tempfile 90 import time 91 import urllib2 92 93 import binary_size_utils 94 import elf_symbolizer 95 96 from recipe_engine.types import freeze 97 98 # Node dictionary keys. These are output in json read by the webapp so 99 # keep them short to save file size. 100 # Note: If these change, the webapp must also change. 101 NODE_TYPE_KEY = 'k' 102 NODE_NAME_KEY = 'n' 103 NODE_CHILDREN_KEY = 'children' 104 NODE_SYMBOL_TYPE_KEY = 't' 105 NODE_SYMBOL_SIZE_KEY = 'value' 106 NODE_MAX_DEPTH_KEY = 'maxDepth' 107 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement' 108 109 # The display name of the bucket where we put symbols without path. 110 NAME_NO_PATH_BUCKET = '(No Path)' 111 112 # Try to keep data buckets smaller than this to avoid killing the 113 # graphing lib. 114 BIG_BUCKET_LIMIT = 3000 115 116 # Skia addition: relative dir for libskia.so from code base. 117 LIBSKIA_RELATIVE_PATH = os.path.join('out', 'Release', 'lib') 118 119 # Skia addition: dictionary mapping symbol type code to symbol name. 120 # See 121 # https://code.google.com/p/chromium/codesearch#chromium/src/tools/binary_size/template/D3SymbolTreeMap.js&l=74 122 SYMBOL_MAP = freeze({ 123 'A': 'global_absolute', 124 'B': 'global_uninitialized_data', 125 'b': 'local_uninitialized_data', 126 'C': 'global_uninitialized_common', 127 'D': 'global_initialized_data', 128 'd': 'local_initialized_data', 129 'G': 'global_small initialized_data', 130 'g': 'local_small_initialized_data', 131 'i': 'indirect_function', 132 'N': 'debugging', 133 'p': 'stack_unwind', 134 'R': 'global_read_only_data', 135 'r': 'local_read_only_data', 136 'S': 'global_small_uninitialized_data', 137 's': 'local_small_uninitialized_data', 138 'T': 'global_code', 139 't': 'local_code', 140 'U': 'undefined', 141 'u': 'unique', 142 'V': 'global_weak_object', 143 'v': 'local_weak_object', 144 'W': 'global_weak_symbol', 145 'w': 'local_weak_symbol', 146 '@': 'vtable_entry', 147 '-': 'stabs_debugging', 148 '?': 'unrecognized', 149 }) 150 151 152 def _MkChild(node, name): 153 child = node[NODE_CHILDREN_KEY].get(name) 154 if child is None: 155 child = {NODE_NAME_KEY: name, 156 NODE_CHILDREN_KEY: {}} 157 node[NODE_CHILDREN_KEY][name] = child 158 return child 159 160 161 def SplitNoPathBucket(node): 162 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to 163 handle. Split it into sub-buckets in that case.""" 164 root_children = node[NODE_CHILDREN_KEY] 165 if NAME_NO_PATH_BUCKET in root_children: 166 no_path_bucket = root_children[NAME_NO_PATH_BUCKET] 167 old_children = no_path_bucket[NODE_CHILDREN_KEY] 168 count = 0 169 for symbol_type, symbol_bucket in old_children.iteritems(): 170 count += len(symbol_bucket[NODE_CHILDREN_KEY]) 171 if count > BIG_BUCKET_LIMIT: 172 new_children = {} 173 no_path_bucket[NODE_CHILDREN_KEY] = new_children 174 current_bucket = None 175 index = 0 176 for symbol_type, symbol_bucket in old_children.iteritems(): 177 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems(): 178 if index % BIG_BUCKET_LIMIT == 0: 179 group_no = (index / BIG_BUCKET_LIMIT) + 1 180 current_bucket = _MkChild(no_path_bucket, 181 '%s subgroup %d' % (NAME_NO_PATH_BUCKET, 182 group_no)) 183 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' 184 node[NODE_TYPE_KEY] = 'p' # p for path 185 index += 1 186 symbol_size = value[NODE_SYMBOL_SIZE_KEY] 187 AddSymbolIntoFileNode(current_bucket, symbol_type, 188 symbol_name, symbol_size) 189 190 191 def MakeChildrenDictsIntoLists(node): 192 largest_list_len = 0 193 if NODE_CHILDREN_KEY in node: 194 largest_list_len = len(node[NODE_CHILDREN_KEY]) 195 child_list = [] 196 for child in node[NODE_CHILDREN_KEY].itervalues(): 197 child_largest_list_len = MakeChildrenDictsIntoLists(child) 198 if child_largest_list_len > largest_list_len: 199 largest_list_len = child_largest_list_len 200 child_list.append(child) 201 node[NODE_CHILDREN_KEY] = child_list 202 203 return largest_list_len 204 205 206 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size): 207 """Puts symbol into the file path node |node|. 208 Returns the number of added levels in tree. I.e. returns 2.""" 209 210 # 'node' is the file node and first step is to find its symbol-type bucket. 211 node[NODE_LAST_PATH_ELEMENT_KEY] = True 212 node = _MkChild(node, symbol_type) 213 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b' 214 node[NODE_SYMBOL_TYPE_KEY] = symbol_type 215 node[NODE_TYPE_KEY] = 'b' # b for bucket 216 217 # 'node' is now the symbol-type bucket. Make the child entry. 218 node = _MkChild(node, symbol_name) 219 if NODE_CHILDREN_KEY in node: 220 if node[NODE_CHILDREN_KEY]: 221 logging.warning('A container node used as symbol for %s.' % symbol_name) 222 # This is going to be used as a leaf so no use for child list. 223 del node[NODE_CHILDREN_KEY] 224 node[NODE_SYMBOL_SIZE_KEY] = symbol_size 225 node[NODE_SYMBOL_TYPE_KEY] = symbol_type 226 node[NODE_TYPE_KEY] = 's' # s for symbol 227 228 return 2 # Depth of the added subtree. 229 230 231 def MakeCompactTree(symbols, symbol_path_origin_dir): 232 result = {NODE_NAME_KEY: '/', 233 NODE_CHILDREN_KEY: {}, 234 NODE_TYPE_KEY: 'p', 235 NODE_MAX_DEPTH_KEY: 0} 236 seen_symbol_with_path = False 237 for symbol_name, symbol_type, symbol_size, file_path in symbols: 238 239 if 'vtable for ' in symbol_name: 240 symbol_type = '@' # hack to categorize these separately 241 if file_path and file_path != "??": 242 seen_symbol_with_path = True 243 else: 244 file_path = NAME_NO_PATH_BUCKET 245 246 path_parts = file_path.split('/') 247 248 # Find pre-existing node in tree, or update if it already exists 249 node = result 250 depth = 0 251 while len(path_parts) > 0: 252 path_part = path_parts.pop(0) 253 if len(path_part) == 0: 254 continue 255 depth += 1 256 node = _MkChild(node, path_part) 257 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p' 258 node[NODE_TYPE_KEY] = 'p' # p for path 259 260 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size) 261 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth) 262 263 if not seen_symbol_with_path: 264 logging.warning('Symbols lack paths. Data will not be structured.') 265 266 # The (no path) bucket can be extremely large if we failed to get 267 # path information. Split it into subgroups if needed. 268 SplitNoPathBucket(result) 269 270 largest_list_len = MakeChildrenDictsIntoLists(result) 271 272 if largest_list_len > BIG_BUCKET_LIMIT: 273 logging.warning('There are sections with %d nodes. ' 274 'Results might be unusable.' % largest_list_len) 275 return result 276 277 278 # Skia added: summarizes tree size by symbol type for the given root node. 279 # Returns a dict keyed by symbol type, and value the type's overall size. 280 # e.g., {"t": 12345, "W": 543}. 281 def GetTreeSizes(node): 282 if 'children' not in node or not node['children']: 283 return {node['t']: node['value']} 284 dic = {} 285 for i in node['children']: 286 for k, v in GetTreeSizes(i).items(): 287 dic.setdefault(k, 0) 288 dic[k] += v 289 290 return dic 291 292 293 # Skia added: creates dict to be converted to JSON in bench format. 294 # See top of file for the structure description. 295 def GetBenchDict(githash, tree_root): 296 dic = {'gitHash': githash, 297 'key': {'source_type': 'binarysize'}, 298 'results': {},} 299 for i in tree_root['children']: 300 if '(No Path)' == i['n']: # Already at symbol summary level. 301 for k, v in GetTreeSizes(i).items(): 302 dic['results']['no_path_' + SYMBOL_MAP[k]] = { 303 'memory': { 304 'bytes': v, 305 'options': {'path': 'no_path', 306 'symbol': SYMBOL_MAP[k],},}} 307 else: # We need to go deeper. 308 for c in i['children']: 309 path = i['n'] + '_' + c['n'] 310 for k, v in GetTreeSizes(c).items(): 311 dic['results'][path + '_' + SYMBOL_MAP[k]] = { 312 'memory': { 313 'bytes': v, 314 'options': {'path': path, 315 'symbol': SYMBOL_MAP[k],}}} 316 317 return dic 318 319 320 # Skia added: constructs 'gsutil cp' subprocess command list. 321 def GetGsCopyCommandList(gsutil, src, dst): 322 return [gsutil, '-h', 'Content-Type:application/json', 'cp', '-a', 323 'public-read', src, dst] 324 325 326 def DumpCompactTree(symbols, symbol_path_origin_dir, ha, ts, issue, gsutil): 327 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir) 328 json_data = {'tree_data': tree_root, 329 'githash': ha, 330 'commit_ts': ts, 331 'key': {'source_type': 'binary_size'}, 332 'total_size': sum(GetTreeSizes(tree_root).values()),} 333 tmpfile = tempfile.NamedTemporaryFile(delete=False).name 334 with open(tmpfile, 'w') as out: 335 # Use separators without whitespace to get a smaller file. 336 json.dump(json_data, out, separators=(',', ':')) 337 338 GS_PREFIX = 'gs://skia-perf/' 339 # Writes to Google Storage for visualization. 340 subprocess.check_call(GetGsCopyCommandList( 341 gsutil, tmpfile, GS_PREFIX + 'size/' + ha + '.json')) 342 # Updates the latest data. 343 if not issue: 344 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, 345 GS_PREFIX + 'size/latest.json')) 346 # Writes an extra copy using year/month/day/hour path for easy ingestion. 347 with open(tmpfile, 'w') as out: 348 json.dump(GetBenchDict(ha, tree_root), out, separators=(',', ':')) 349 now = datetime.datetime.utcnow() 350 ingest_path = '/'.join(('nano-json-v1', str(now.year).zfill(4), 351 str(now.month).zfill(2), str(now.day).zfill(2), 352 str(now.hour).zfill(2))) 353 if issue: 354 ingest_path = '/'.join('trybot', ingest_path, issue) 355 subprocess.check_call(GetGsCopyCommandList(gsutil, tmpfile, 356 GS_PREFIX + ingest_path + '/binarysize_' + ha + '.json')) 357 358 359 def MakeSourceMap(symbols): 360 sources = {} 361 for _sym, _symbol_type, size, path in symbols: 362 key = None 363 if path: 364 key = os.path.normpath(path) 365 else: 366 key = '[no path]' 367 if key not in sources: 368 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0} 369 record = sources[key] 370 record['size'] += size 371 record['symbol_count'] += 1 372 return sources 373 374 375 # Regex for parsing "nm" output. A sample line looks like this: 376 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 377 # 378 # The fields are: address, size, type, name, source location 379 # Regular expression explained ( see also: https://xkcd.com/208 ): 380 # ([0-9a-f]{8,}+) The address 381 # [\s]+ Whitespace separator 382 # ([0-9a-f]{8,}+) The size. From here on out it's all optional. 383 # [\s]+ Whitespace separator 384 # (\S?) The symbol type, which is any non-whitespace char 385 # [\s*] Whitespace separator 386 # ([^\t]*) Symbol name, any non-tab character (spaces ok!) 387 # [\t]? Tab separator 388 # (.*) The location (filename[:linennum|?][ (discriminator n)] 389 sNmPattern = re.compile( 390 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') 391 392 class Progress(): 393 def __init__(self): 394 self.count = 0 395 self.skip_count = 0 396 self.collisions = 0 397 self.time_last_output = time.time() 398 self.count_last_output = 0 399 self.disambiguations = 0 400 self.was_ambiguous = 0 401 402 403 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, 404 disambiguate, src_path): 405 nm_output = RunNm(library, nm_binary) 406 nm_output_lines = nm_output.splitlines() 407 nm_output_lines_len = len(nm_output_lines) 408 address_symbol = {} 409 progress = Progress() 410 def map_address_symbol(symbol, addr): 411 progress.count += 1 412 if addr in address_symbol: 413 # 'Collision between %s and %s.' % (str(symbol.name), 414 # str(address_symbol[addr].name)) 415 progress.collisions += 1 416 else: 417 if symbol.disambiguated: 418 progress.disambiguations += 1 419 if symbol.was_ambiguous: 420 progress.was_ambiguous += 1 421 422 address_symbol[addr] = symbol 423 424 progress_output() 425 426 def progress_output(): 427 progress_chunk = 100 428 if progress.count % progress_chunk == 0: 429 time_now = time.time() 430 time_spent = time_now - progress.time_last_output 431 if time_spent > 1.0: 432 # Only output at most once per second. 433 progress.time_last_output = time_now 434 chunk_size = progress.count - progress.count_last_output 435 progress.count_last_output = progress.count 436 if time_spent > 0: 437 speed = chunk_size / time_spent 438 else: 439 speed = 0 440 progress_percent = (100.0 * (progress.count + progress.skip_count) / 441 nm_output_lines_len) 442 disambiguation_percent = 0 443 if progress.disambiguations != 0: 444 disambiguation_percent = (100.0 * progress.disambiguations / 445 progress.was_ambiguous) 446 447 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, ' 448 '%d disambiguations where %.1f%% succeeded)' 449 ' - %.1f lookups/s.' % 450 (progress_percent, progress.count, progress.collisions, 451 progress.disambiguations, disambiguation_percent, speed)) 452 453 # In case disambiguation was disabled, we remove the source path (which upon 454 # being set signals the symbolizer to enable disambiguation) 455 if not disambiguate: 456 src_path = None 457 symbol_path_origin_dir = os.path.dirname(library) 458 # Skia specific. 459 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') 460 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, 461 map_address_symbol, 462 max_concurrent_jobs=jobs, 463 source_root_path=src_path, 464 prefix_to_remove=symbol_path_prefix) 465 user_interrupted = False 466 try: 467 for line in nm_output_lines: 468 match = sNmPattern.match(line) 469 if match: 470 location = match.group(5) 471 if not location: 472 addr = int(match.group(1), 16) 473 size = int(match.group(2), 16) 474 if addr in address_symbol: # Already looked up, shortcut 475 # ELFSymbolizer. 476 map_address_symbol(address_symbol[addr], addr) 477 continue 478 elif size == 0: 479 # Save time by not looking up empty symbols (do they even exist?) 480 print('Empty symbol: ' + line) 481 else: 482 symbolizer.SymbolizeAsync(addr, addr) 483 continue 484 485 progress.skip_count += 1 486 except KeyboardInterrupt: 487 user_interrupted = True 488 print('Interrupting - killing subprocesses. Please wait.') 489 490 try: 491 symbolizer.Join() 492 except KeyboardInterrupt: 493 # Don't want to abort here since we will be finished in a few seconds. 494 user_interrupted = True 495 print('Patience you must have my young padawan.') 496 497 print '' 498 499 if user_interrupted: 500 print('Skipping the rest of the file mapping. ' 501 'Output will not be fully classified.') 502 503 symbol_path_origin_dir = os.path.dirname(library) 504 # Skia specific: path prefix to strip. 505 symbol_path_prefix = symbol_path_origin_dir.replace(LIBSKIA_RELATIVE_PATH, '') 506 507 with open(outfile, 'w') as out: 508 for line in nm_output_lines: 509 match = sNmPattern.match(line) 510 if match: 511 location = match.group(5) 512 if not location: 513 addr = int(match.group(1), 16) 514 symbol = address_symbol.get(addr) 515 if symbol is not None: 516 path = '??' 517 if symbol.source_path is not None: 518 path = symbol.source_path.replace(symbol_path_prefix, '') 519 line_number = 0 520 if symbol.source_line is not None: 521 line_number = symbol.source_line 522 out.write('%s\t%s:%d\n' % (line, path, line_number)) 523 continue 524 525 out.write('%s\n' % line) 526 527 print('%d symbols in the results.' % len(address_symbol)) 528 529 530 def RunNm(binary, nm_binary): 531 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort', 532 binary] 533 nm_process = subprocess.Popen(cmd, 534 stdout=subprocess.PIPE, 535 stderr=subprocess.PIPE) 536 (process_output, err_output) = nm_process.communicate() 537 538 if nm_process.returncode != 0: 539 if err_output: 540 raise Exception, err_output 541 else: 542 raise Exception, process_output 543 544 return process_output 545 546 547 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, 548 addr2line_binary, nm_binary, disambiguate, src_path): 549 if nm_infile is None: 550 if outfile is None: 551 outfile = tempfile.NamedTemporaryFile(delete=False).name 552 553 if verbose: 554 print 'Running parallel addr2line, dumping symbols to ' + outfile 555 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs, 556 disambiguate, src_path) 557 558 nm_infile = outfile 559 560 elif verbose: 561 print 'Using nm input from ' + nm_infile 562 with file(nm_infile, 'r') as infile: 563 return list(binary_size_utils.ParseNm(infile)) 564 565 566 PAK_RESOURCE_ID_TO_STRING = { "inited": False } 567 568 def LoadPakIdsFromResourceFile(filename): 569 """Given a file name, it loads everything that looks like a resource id 570 into PAK_RESOURCE_ID_TO_STRING.""" 571 with open(filename) as resource_header: 572 for line in resource_header: 573 if line.startswith("#define "): 574 line_data = line.split() 575 if len(line_data) == 3: 576 try: 577 resource_number = int(line_data[2]) 578 resource_name = line_data[1] 579 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name 580 except ValueError: 581 pass 582 583 def GetReadablePakResourceName(pak_file, resource_id): 584 """Pak resources have a numeric identifier. It is not helpful when 585 trying to locate where footprint is generated. This does its best to 586 map the number to a usable string.""" 587 if not PAK_RESOURCE_ID_TO_STRING['inited']: 588 # Try to find resource header files generated by grit when 589 # building the pak file. We'll look for files named *resources.h" 590 # and lines of the type: 591 # #define MY_RESOURCE_JS 1234 592 PAK_RESOURCE_ID_TO_STRING['inited'] = True 593 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen') 594 if os.path.isdir(gen_dir): 595 for dirname, _dirs, files in os.walk(gen_dir): 596 for filename in files: 597 if filename.endswith('resources.h'): 598 LoadPakIdsFromResourceFile(os.path.join(dirname, filename)) 599 return PAK_RESOURCE_ID_TO_STRING.get(resource_id, 600 'Pak Resource %d' % resource_id) 601 602 def AddPakData(symbols, pak_file): 603 """Adds pseudo-symbols from a pak file.""" 604 pak_file = os.path.abspath(pak_file) 605 with open(pak_file, 'rb') as pak: 606 data = pak.read() 607 608 PAK_FILE_VERSION = 4 609 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) 610 # and one uint8 (encoding of text resources) 611 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32. 612 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH]) 613 assert version == PAK_FILE_VERSION, ('Unsupported pak file ' 614 'version (%d) in %s. Only ' 615 'support version %d' % 616 (version, pak_file, PAK_FILE_VERSION)) 617 if num_entries > 0: 618 # Read the index and data. 619 data = data[HEADER_LENGTH:] 620 for _ in range(num_entries): 621 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) 622 data = data[INDEX_ENTRY_SIZE:] 623 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE]) 624 resource_size = next_offset - offset 625 626 symbol_name = GetReadablePakResourceName(pak_file, resource_id) 627 symbol_path = pak_file 628 symbol_type = 'd' # Data. Approximation. 629 symbol_size = resource_size 630 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path)) 631 632 def _find_in_system_path(binary): 633 """Locate the full path to binary in the system path or return None 634 if not found.""" 635 system_path = os.environ["PATH"].split(os.pathsep) 636 for path in system_path: 637 binary_path = os.path.join(path, binary) 638 if os.path.isfile(binary_path): 639 return binary_path 640 return None 641 642 def CheckDebugFormatSupport(library, addr2line_binary): 643 """Kills the program if debug data is in an unsupported format. 644 645 There are two common versions of the DWARF debug formats and 646 since we are right now transitioning from DWARF2 to newer formats, 647 it's possible to have a mix of tools that are not compatible. Detect 648 that and abort rather than produce meaningless output.""" 649 tool_output = subprocess.check_output([addr2line_binary, '--version']) 650 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M) 651 parsed_output = version_re.match(tool_output) 652 major = int(parsed_output.group(1)) 653 minor = int(parsed_output.group(2)) 654 supports_dwarf4 = major > 2 or major == 2 and minor > 22 655 656 if supports_dwarf4: 657 return 658 659 print('Checking version of debug information in %s.' % library) 660 debug_info = subprocess.check_output(['readelf', '--debug-dump=info', 661 '--dwarf-depth=1', library]) 662 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M) 663 parsed_dwarf_format_output = dwarf_version_re.search(debug_info) 664 version = int(parsed_dwarf_format_output.group(1)) 665 if version > 2: 666 print('The supplied tools only support DWARF2 debug data but the binary\n' + 667 'uses DWARF%d. Update the tools or compile the binary\n' % version + 668 'with -gdwarf-2.') 669 sys.exit(1) 670 671 672 def main(): 673 usage = """%prog [options] 674 675 Runs a spatial analysis on a given library, looking up the source locations 676 of its symbols and calculating how much space each directory, source file, 677 and so on is taking. The result is a report that can be used to pinpoint 678 sources of large portions of the binary, etceteras. 679 680 Under normal circumstances, you only need to pass two arguments, thusly: 681 682 %prog --library /path/to/library --destdir /path/to/output 683 684 In this mode, the program will dump the symbols from the specified library 685 and map those symbols back to source locations, producing a web-based 686 report in the specified output directory. 687 688 Other options are available via '--help'. 689 """ 690 parser = optparse.OptionParser(usage=usage) 691 parser.add_option('--nm-in', metavar='PATH', 692 help='if specified, use nm input from <path> instead of ' 693 'generating it. Note that source locations should be ' 694 'present in the file; i.e., no addr2line symbol lookups ' 695 'will be performed when this option is specified. ' 696 'Mutually exclusive with --library.') 697 parser.add_option('--destdir', metavar='PATH', 698 help='write output to the specified directory. An HTML ' 699 'report is generated here along with supporting files; ' 700 'any existing report will be overwritten. Not used in ' 701 'Skia.') 702 parser.add_option('--library', metavar='PATH', 703 help='if specified, process symbols in the library at ' 704 'the specified path. Mutually exclusive with --nm-in.') 705 parser.add_option('--pak', metavar='PATH', 706 help='if specified, includes the contents of the ' 707 'specified *.pak file in the output.') 708 parser.add_option('--nm-binary', 709 help='use the specified nm binary to analyze library. ' 710 'This is to be used when the nm in the path is not for ' 711 'the right architecture or of the right version.') 712 parser.add_option('--addr2line-binary', 713 help='use the specified addr2line binary to analyze ' 714 'library. This is to be used when the addr2line in ' 715 'the path is not for the right architecture or ' 716 'of the right version.') 717 parser.add_option('--jobs', type='int', 718 help='number of jobs to use for the parallel ' 719 'addr2line processing pool; defaults to 1. More ' 720 'jobs greatly improve throughput but eat RAM like ' 721 'popcorn, and take several gigabytes each. Start low ' 722 'and ramp this number up until your machine begins to ' 723 'struggle with RAM. ' 724 'This argument is only valid when using --library.') 725 parser.add_option('-v', dest='verbose', action='store_true', 726 help='be verbose, printing lots of status information.') 727 parser.add_option('--nm-out', metavar='PATH', 728 help='keep the nm output file, and store it at the ' 729 'specified path. This is useful if you want to see the ' 730 'fully processed nm output after the symbols have been ' 731 'mapped to source locations. By default, a tempfile is ' 732 'used and is deleted when the program terminates.' 733 'This argument is only valid when using --library.') 734 parser.add_option('--legacy', action='store_true', 735 help='emit legacy binary size report instead of modern') 736 parser.add_option('--disable-disambiguation', action='store_true', 737 help='disables the disambiguation process altogether,' 738 ' NOTE: this may, depending on your toolchain, produce' 739 ' output with some symbols at the top layer if addr2line' 740 ' could not get the entire source path.') 741 parser.add_option('--source-path', default='./', 742 help='the path to the source code of the output binary, ' 743 'default set to current directory. Used in the' 744 ' disambiguation process.') 745 parser.add_option('--githash', default='latest', 746 help='Git hash for the binary version. Added by Skia.') 747 parser.add_option('--commit_ts', type='int', default=-1, 748 help='Timestamp for the commit. Added by Skia.') 749 parser.add_option('--issue_number', default='', 750 help='The trybot issue number in string. Added by Skia.') 751 parser.add_option('--gsutil_path', default='gsutil', 752 help='Path to gsutil binary. Added by Skia.') 753 opts, _args = parser.parse_args() 754 755 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in): 756 parser.error('exactly one of --library or --nm-in is required') 757 if (opts.nm_in): 758 if opts.jobs: 759 print >> sys.stderr, ('WARNING: --jobs has no effect ' 760 'when used with --nm-in') 761 if not opts.jobs: 762 # Use the number of processors but cap between 2 and 4 since raw 763 # CPU power isn't the limiting factor. It's I/O limited, memory 764 # bus limited and available-memory-limited. Too many processes and 765 # the computer will run out of memory and it will be slow. 766 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count()))) 767 768 if opts.addr2line_binary: 769 assert os.path.isfile(opts.addr2line_binary) 770 addr2line_binary = opts.addr2line_binary 771 else: 772 addr2line_binary = _find_in_system_path('addr2line') 773 assert addr2line_binary, 'Unable to find addr2line in the path. '\ 774 'Use --addr2line-binary to specify location.' 775 776 if opts.nm_binary: 777 assert os.path.isfile(opts.nm_binary) 778 nm_binary = opts.nm_binary 779 else: 780 nm_binary = _find_in_system_path('nm') 781 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ 782 'to specify location.' 783 784 if opts.pak: 785 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak 786 787 print('addr2line: %s' % addr2line_binary) 788 print('nm: %s' % nm_binary) 789 790 if opts.library: 791 CheckDebugFormatSupport(opts.library, addr2line_binary) 792 793 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, 794 opts.jobs, opts.verbose is True, 795 addr2line_binary, nm_binary, 796 opts.disable_disambiguation is None, 797 opts.source_path) 798 799 if opts.pak: 800 AddPakData(symbols, opts.pak) 801 802 if opts.legacy: # legacy report 803 print 'Do Not set legacy flag.' 804 805 else: # modern report 806 if opts.library: 807 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library)) 808 else: 809 # Just a guess. Hopefully all paths in the input file are absolute. 810 symbol_path_origin_dir = os.path.abspath(os.getcwd()) 811 DumpCompactTree(symbols, symbol_path_origin_dir, opts.githash, 812 opts.commit_ts, opts.issue_number, opts.gsutil_path) 813 print 'Report data uploaded to GS.' 814 815 816 if __name__ == '__main__': 817 sys.exit(main()) 818