1 #!/usr/bin/env python 2 # 3 # Copyright 2012 the V8 project authors. All rights reserved. 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following 12 # disclaimer in the documentation and/or other materials provided 13 # with the distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 import bisect 31 import collections 32 import ctypes 33 import disasm 34 import mmap 35 import optparse 36 import os 37 import re 38 import subprocess 39 import sys 40 import time 41 42 43 USAGE="""usage: %prog [OPTION]... 44 45 Analyses V8 and perf logs to produce profiles. 46 47 Perf logs can be collected using a command like: 48 $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof 49 # -R: collect all data 50 # -e cycles: use cpu-cycles event (run "perf list" for details) 51 # -c 10000: write a sample after each 10000 events 52 # -f: force output file overwrite 53 # -i: limit profiling to our process and the kernel 54 # --ll-prof shell flag enables the right V8 logs 55 This will produce a binary trace file (perf.data) that %prog can analyse. 56 57 Examples: 58 # Print flat profile with annotated disassembly for the 10 top 59 # symbols. Use default log names and include the snapshot log. 60 $ %prog --snapshot --disasm-top=10 61 62 # Print flat profile with annotated disassembly for all used symbols. 63 # Use default log names and include kernel symbols into analysis. 64 $ %prog --disasm-all --kernel 65 66 # Print flat profile. Use custom log names. 67 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot 68 """ 69 70 71 # Must match kGcFakeMmap. 72 V8_GC_FAKE_MMAP = "/tmp/__v8_gc__" 73 74 JS_ORIGIN = "js" 75 JS_SNAPSHOT_ORIGIN = "js-snapshot" 76 77 OBJDUMP_BIN = disasm.OBJDUMP_BIN 78 79 80 class Code(object): 81 """Code object.""" 82 83 _id = 0 84 85 def __init__(self, name, start_address, end_address, origin, origin_offset): 86 self.id = Code._id 87 Code._id += 1 88 self.name = name 89 self.other_names = None 90 self.start_address = start_address 91 self.end_address = end_address 92 self.origin = origin 93 self.origin_offset = origin_offset 94 self.self_ticks = 0 95 self.self_ticks_map = None 96 self.callee_ticks = None 97 98 def AddName(self, name): 99 assert self.name != name 100 if self.other_names is None: 101 self.other_names = [name] 102 return 103 if not name in self.other_names: 104 self.other_names.append(name) 105 106 def FullName(self): 107 if self.other_names is None: 108 return self.name 109 self.other_names.sort() 110 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 111 112 def IsUsed(self): 113 return self.self_ticks > 0 or self.callee_ticks is not None 114 115 def Tick(self, pc): 116 self.self_ticks += 1 117 if self.self_ticks_map is None: 118 self.self_ticks_map = collections.defaultdict(lambda: 0) 119 offset = pc - self.start_address 120 self.self_ticks_map[offset] += 1 121 122 def CalleeTick(self, callee): 123 if self.callee_ticks is None: 124 self.callee_ticks = collections.defaultdict(lambda: 0) 125 self.callee_ticks[callee] += 1 126 127 def PrintAnnotated(self, arch, options): 128 if self.self_ticks_map is None: 129 ticks_map = [] 130 else: 131 ticks_map = self.self_ticks_map.items() 132 # Convert the ticks map to offsets and counts arrays so that later 133 # we can do binary search in the offsets array. 134 ticks_map.sort(key=lambda t: t[0]) 135 ticks_offsets = [t[0] for t in ticks_map] 136 ticks_counts = [t[1] for t in ticks_map] 137 # Get a list of disassembled lines and their addresses. 138 lines = self._GetDisasmLines(arch, options) 139 if len(lines) == 0: 140 return 141 # Print annotated lines. 142 address = lines[0][0] 143 total_count = 0 144 for i in xrange(len(lines)): 145 start_offset = lines[i][0] - address 146 if i == len(lines) - 1: 147 end_offset = self.end_address - self.start_address 148 else: 149 end_offset = lines[i + 1][0] - address 150 # Ticks (reported pc values) are not always precise, i.e. not 151 # necessarily point at instruction starts. So we have to search 152 # for ticks that touch the current instruction line. 153 j = bisect.bisect_left(ticks_offsets, end_offset) 154 count = 0 155 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 156 if offset < start_offset: 157 break 158 count += cnt 159 total_count += count 160 count = 100.0 * count / self.self_ticks 161 if count >= 0.01: 162 print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1]) 163 else: 164 print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1]) 165 print 166 assert total_count == self.self_ticks, \ 167 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 168 169 def __str__(self): 170 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 171 self.name, 172 self.start_address, 173 self.end_address, 174 self.end_address - self.start_address, 175 self.origin) 176 177 def _GetDisasmLines(self, arch, options): 178 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN: 179 inplace = False 180 filename = options.log + ".ll" 181 else: 182 inplace = True 183 filename = self.origin 184 return disasm.GetDisasmLines(filename, 185 self.origin_offset, 186 self.end_address - self.start_address, 187 arch, 188 inplace) 189 190 191 class CodePage(object): 192 """Group of adjacent code objects.""" 193 194 SHIFT = 12 # 4K pages 195 SIZE = (1 << SHIFT) 196 MASK = ~(SIZE - 1) 197 198 @staticmethod 199 def PageAddress(address): 200 return address & CodePage.MASK 201 202 @staticmethod 203 def PageId(address): 204 return address >> CodePage.SHIFT 205 206 @staticmethod 207 def PageAddressFromId(id): 208 return id << CodePage.SHIFT 209 210 def __init__(self, address): 211 self.address = address 212 self.code_objects = [] 213 214 def Add(self, code): 215 self.code_objects.append(code) 216 217 def Remove(self, code): 218 self.code_objects.remove(code) 219 220 def Find(self, pc): 221 code_objects = self.code_objects 222 for i, code in enumerate(code_objects): 223 if code.start_address <= pc < code.end_address: 224 code_objects[0], code_objects[i] = code, code_objects[0] 225 return code 226 return None 227 228 def __iter__(self): 229 return self.code_objects.__iter__() 230 231 232 class CodeMap(object): 233 """Code object map.""" 234 235 def __init__(self): 236 self.pages = {} 237 self.min_address = 1 << 64 238 self.max_address = -1 239 240 def Add(self, code, max_pages=-1): 241 page_id = CodePage.PageId(code.start_address) 242 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 243 pages = 0 244 while page_id < limit_id: 245 if max_pages >= 0 and pages > max_pages: 246 print >>sys.stderr, \ 247 "Warning: page limit (%d) reached for %s [%s]" % ( 248 max_pages, code.name, code.origin) 249 break 250 if page_id in self.pages: 251 page = self.pages[page_id] 252 else: 253 page = CodePage(CodePage.PageAddressFromId(page_id)) 254 self.pages[page_id] = page 255 page.Add(code) 256 page_id += 1 257 pages += 1 258 self.min_address = min(self.min_address, code.start_address) 259 self.max_address = max(self.max_address, code.end_address) 260 261 def Remove(self, code): 262 page_id = CodePage.PageId(code.start_address) 263 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 264 removed = False 265 while page_id < limit_id: 266 if page_id not in self.pages: 267 page_id += 1 268 continue 269 page = self.pages[page_id] 270 page.Remove(code) 271 removed = True 272 page_id += 1 273 return removed 274 275 def AllCode(self): 276 for page in self.pages.itervalues(): 277 for code in page: 278 if CodePage.PageAddress(code.start_address) == page.address: 279 yield code 280 281 def UsedCode(self): 282 for code in self.AllCode(): 283 if code.IsUsed(): 284 yield code 285 286 def Print(self): 287 for code in self.AllCode(): 288 print code 289 290 def Find(self, pc): 291 if pc < self.min_address or pc >= self.max_address: 292 return None 293 page_id = CodePage.PageId(pc) 294 if page_id not in self.pages: 295 return None 296 return self.pages[page_id].Find(pc) 297 298 299 class CodeInfo(object): 300 """Generic info about generated code objects.""" 301 302 def __init__(self, arch, header_size): 303 self.arch = arch 304 self.header_size = header_size 305 306 307 class SnapshotLogReader(object): 308 """V8 snapshot log reader.""" 309 310 _SNAPSHOT_CODE_NAME_RE = re.compile( 311 r"snapshot-code-name,(\d+),\"(.*)\"") 312 313 def __init__(self, log_name): 314 self.log_name = log_name 315 316 def ReadNameMap(self): 317 log = open(self.log_name, "r") 318 try: 319 snapshot_pos_to_name = {} 320 for line in log: 321 match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line) 322 if match: 323 pos = int(match.group(1)) 324 name = match.group(2) 325 snapshot_pos_to_name[pos] = name 326 finally: 327 log.close() 328 return snapshot_pos_to_name 329 330 331 class LogReader(object): 332 """V8 low-level (binary) log reader.""" 333 334 _ARCH_TO_POINTER_TYPE_MAP = { 335 "ia32": ctypes.c_uint32, 336 "arm": ctypes.c_uint32, 337 "mips": ctypes.c_uint32, 338 "x64": ctypes.c_uint64 339 } 340 341 _CODE_CREATE_TAG = "C" 342 _CODE_MOVE_TAG = "M" 343 _CODE_DELETE_TAG = "D" 344 _SNAPSHOT_POSITION_TAG = "P" 345 _CODE_MOVING_GC_TAG = "G" 346 347 def __init__(self, log_name, code_map, snapshot_pos_to_name): 348 self.log_file = open(log_name, "r") 349 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) 350 self.log_pos = 0 351 self.code_map = code_map 352 self.snapshot_pos_to_name = snapshot_pos_to_name 353 self.address_to_snapshot_name = {} 354 355 self.arch = self.log[:self.log.find("\0")] 356 self.log_pos += len(self.arch) + 1 357 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ 358 "Unsupported architecture %s" % self.arch 359 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] 360 361 self.code_create_struct = LogReader._DefineStruct([ 362 ("name_size", ctypes.c_int32), 363 ("code_address", pointer_type), 364 ("code_size", ctypes.c_int32)]) 365 366 self.code_move_struct = LogReader._DefineStruct([ 367 ("from_address", pointer_type), 368 ("to_address", pointer_type)]) 369 370 self.code_delete_struct = LogReader._DefineStruct([ 371 ("address", pointer_type)]) 372 373 self.snapshot_position_struct = LogReader._DefineStruct([ 374 ("address", pointer_type), 375 ("position", ctypes.c_int32)]) 376 377 def ReadUpToGC(self): 378 while self.log_pos < self.log.size(): 379 tag = self.log[self.log_pos] 380 self.log_pos += 1 381 382 if tag == LogReader._CODE_MOVING_GC_TAG: 383 self.address_to_snapshot_name.clear() 384 return 385 386 if tag == LogReader._CODE_CREATE_TAG: 387 event = self.code_create_struct.from_buffer(self.log, self.log_pos) 388 self.log_pos += ctypes.sizeof(event) 389 start_address = event.code_address 390 end_address = start_address + event.code_size 391 if start_address in self.address_to_snapshot_name: 392 name = self.address_to_snapshot_name[start_address] 393 origin = JS_SNAPSHOT_ORIGIN 394 else: 395 name = self.log[self.log_pos:self.log_pos + event.name_size] 396 origin = JS_ORIGIN 397 self.log_pos += event.name_size 398 origin_offset = self.log_pos 399 self.log_pos += event.code_size 400 code = Code(name, start_address, end_address, origin, origin_offset) 401 conficting_code = self.code_map.Find(start_address) 402 if conficting_code: 403 if not (conficting_code.start_address == code.start_address and 404 conficting_code.end_address == code.end_address): 405 self.code_map.Remove(conficting_code) 406 else: 407 LogReader._HandleCodeConflict(conficting_code, code) 408 # TODO(vitalyr): this warning is too noisy because of our 409 # attempts to reconstruct code log from the snapshot. 410 # print >>sys.stderr, \ 411 # "Warning: Skipping duplicate code log entry %s" % code 412 continue 413 self.code_map.Add(code) 414 continue 415 416 if tag == LogReader._CODE_MOVE_TAG: 417 event = self.code_move_struct.from_buffer(self.log, self.log_pos) 418 self.log_pos += ctypes.sizeof(event) 419 old_start_address = event.from_address 420 new_start_address = event.to_address 421 if old_start_address == new_start_address: 422 # Skip useless code move entries. 423 continue 424 code = self.code_map.Find(old_start_address) 425 if not code: 426 print >>sys.stderr, "Warning: Not found %x" % old_start_address 427 continue 428 assert code.start_address == old_start_address, \ 429 "Inexact move address %x for %s" % (old_start_address, code) 430 self.code_map.Remove(code) 431 size = code.end_address - code.start_address 432 code.start_address = new_start_address 433 code.end_address = new_start_address + size 434 self.code_map.Add(code) 435 continue 436 437 if tag == LogReader._CODE_DELETE_TAG: 438 event = self.code_delete_struct.from_buffer(self.log, self.log_pos) 439 self.log_pos += ctypes.sizeof(event) 440 old_start_address = event.address 441 code = self.code_map.Find(old_start_address) 442 if not code: 443 print >>sys.stderr, "Warning: Not found %x" % old_start_address 444 continue 445 assert code.start_address == old_start_address, \ 446 "Inexact delete address %x for %s" % (old_start_address, code) 447 self.code_map.Remove(code) 448 continue 449 450 if tag == LogReader._SNAPSHOT_POSITION_TAG: 451 event = self.snapshot_position_struct.from_buffer(self.log, 452 self.log_pos) 453 self.log_pos += ctypes.sizeof(event) 454 start_address = event.address 455 snapshot_pos = event.position 456 if snapshot_pos in self.snapshot_pos_to_name: 457 self.address_to_snapshot_name[start_address] = \ 458 self.snapshot_pos_to_name[snapshot_pos] 459 continue 460 461 assert False, "Unknown tag %s" % tag 462 463 def Dispose(self): 464 self.log.close() 465 self.log_file.close() 466 467 @staticmethod 468 def _DefineStruct(fields): 469 class Struct(ctypes.Structure): 470 _fields_ = fields 471 return Struct 472 473 @staticmethod 474 def _HandleCodeConflict(old_code, new_code): 475 assert (old_code.start_address == new_code.start_address and 476 old_code.end_address == new_code.end_address), \ 477 "Conficting code log entries %s and %s" % (old_code, new_code) 478 if old_code.name == new_code.name: 479 return 480 # Code object may be shared by a few functions. Collect the full 481 # set of names. 482 old_code.AddName(new_code.name) 483 484 485 class Descriptor(object): 486 """Descriptor of a structure in the binary trace log.""" 487 488 CTYPE_MAP = { 489 "u16": ctypes.c_uint16, 490 "u32": ctypes.c_uint32, 491 "u64": ctypes.c_uint64 492 } 493 494 def __init__(self, fields): 495 class TraceItem(ctypes.Structure): 496 _fields_ = Descriptor.CtypesFields(fields) 497 498 def __str__(self): 499 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 500 for field, _ in TraceItem._fields_) 501 502 self.ctype = TraceItem 503 504 def Read(self, trace, offset): 505 return self.ctype.from_buffer(trace, offset) 506 507 @staticmethod 508 def CtypesFields(fields): 509 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 510 511 512 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 513 # for the gory details. 514 515 516 TRACE_HEADER_DESC = Descriptor([ 517 ("magic", "u64"), 518 ("size", "u64"), 519 ("attr_size", "u64"), 520 ("attrs_offset", "u64"), 521 ("attrs_size", "u64"), 522 ("data_offset", "u64"), 523 ("data_size", "u64"), 524 ("event_types_offset", "u64"), 525 ("event_types_size", "u64") 526 ]) 527 528 529 PERF_EVENT_ATTR_DESC = Descriptor([ 530 ("type", "u32"), 531 ("size", "u32"), 532 ("config", "u64"), 533 ("sample_period_or_freq", "u64"), 534 ("sample_type", "u64"), 535 ("read_format", "u64"), 536 ("flags", "u64"), 537 ("wakeup_events_or_watermark", "u32"), 538 ("bt_type", "u32"), 539 ("bp_addr", "u64"), 540 ("bp_len", "u64"), 541 ]) 542 543 544 PERF_EVENT_HEADER_DESC = Descriptor([ 545 ("type", "u32"), 546 ("misc", "u16"), 547 ("size", "u16") 548 ]) 549 550 551 PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 552 ("pid", "u32"), 553 ("tid", "u32"), 554 ("addr", "u64"), 555 ("len", "u64"), 556 ("pgoff", "u64") 557 ]) 558 559 560 # perf_event_attr.sample_type bits control the set of 561 # perf_sample_event fields. 562 PERF_SAMPLE_IP = 1 << 0 563 PERF_SAMPLE_TID = 1 << 1 564 PERF_SAMPLE_TIME = 1 << 2 565 PERF_SAMPLE_ADDR = 1 << 3 566 PERF_SAMPLE_READ = 1 << 4 567 PERF_SAMPLE_CALLCHAIN = 1 << 5 568 PERF_SAMPLE_ID = 1 << 6 569 PERF_SAMPLE_CPU = 1 << 7 570 PERF_SAMPLE_PERIOD = 1 << 8 571 PERF_SAMPLE_STREAM_ID = 1 << 9 572 PERF_SAMPLE_RAW = 1 << 10 573 574 575 PERF_SAMPLE_EVENT_BODY_FIELDS = [ 576 ("ip", "u64", PERF_SAMPLE_IP), 577 ("pid", "u32", PERF_SAMPLE_TID), 578 ("tid", "u32", PERF_SAMPLE_TID), 579 ("time", "u64", PERF_SAMPLE_TIME), 580 ("addr", "u64", PERF_SAMPLE_ADDR), 581 ("id", "u64", PERF_SAMPLE_ID), 582 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 583 ("cpu", "u32", PERF_SAMPLE_CPU), 584 ("res", "u32", PERF_SAMPLE_CPU), 585 ("period", "u64", PERF_SAMPLE_PERIOD), 586 # Don't want to handle read format that comes after the period and 587 # before the callchain and has variable size. 588 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 589 # Raw data follows the callchain and is ignored. 590 ] 591 592 593 PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 594 595 596 PERF_RECORD_MMAP = 1 597 PERF_RECORD_SAMPLE = 9 598 599 600 class TraceReader(object): 601 """Perf (linux-2.6/tools/perf) trace file reader.""" 602 603 _TRACE_HEADER_MAGIC = 4993446653023372624 604 605 def __init__(self, trace_name): 606 self.trace_file = open(trace_name, "r") 607 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 608 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 609 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 610 print >>sys.stderr, "Warning: unsupported trace header magic" 611 self.offset = self.trace_header.data_offset 612 self.limit = self.trace_header.data_offset + self.trace_header.data_size 613 assert self.limit <= self.trace.size(), \ 614 "Trace data limit exceeds trace file size" 615 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 616 assert self.trace_header.attrs_size != 0, \ 617 "No perf event attributes found in the trace" 618 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 619 self.trace_header.attrs_offset) 620 self.sample_event_body_desc = self._SampleEventBodyDesc( 621 perf_event_attr.sample_type) 622 self.callchain_supported = \ 623 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 624 if self.callchain_supported: 625 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 626 self.ip_size = ctypes.sizeof(self.ip_struct) 627 628 def ReadEventHeader(self): 629 if self.offset >= self.limit: 630 return None, 0 631 offset = self.offset 632 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 633 self.offset += header.size 634 return header, offset 635 636 def ReadMmap(self, header, offset): 637 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 638 offset + self.header_size) 639 # Read null-terminated filename. 640 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 641 offset + header.size] 642 mmap_info.filename = filename[:filename.find(chr(0))] 643 return mmap_info 644 645 def ReadSample(self, header, offset): 646 sample = self.sample_event_body_desc.Read(self.trace, 647 offset + self.header_size) 648 if not self.callchain_supported: 649 return sample 650 sample.ips = [] 651 offset += self.header_size + ctypes.sizeof(sample) 652 for _ in xrange(sample.nr): 653 sample.ips.append( 654 self.ip_struct.from_buffer(self.trace, offset).value) 655 offset += self.ip_size 656 return sample 657 658 def Dispose(self): 659 self.trace.close() 660 self.trace_file.close() 661 662 def _SampleEventBodyDesc(self, sample_type): 663 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 664 "Can't hande read format in samples" 665 fields = [(field, format) 666 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 667 if (bit & sample_type) != 0] 668 return Descriptor(fields) 669 670 671 OBJDUMP_SECTION_HEADER_RE = re.compile( 672 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 673 OBJDUMP_SYMBOL_LINE_RE = re.compile( 674 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 675 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 676 r"^DYNAMIC SYMBOL TABLE") 677 OBJDUMP_SKIP_RE = re.compile( 678 r"^.*ld\.so\.cache$") 679 KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 680 PERF_KERNEL_ALLSYMS_RE = re.compile( 681 r".*kallsyms.*") 682 KERNEL_ALLSYMS_LINE_RE = re.compile( 683 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 684 685 686 class LibraryRepo(object): 687 def __init__(self): 688 self.infos = [] 689 self.names = set() 690 self.ticks = {} 691 692 def Load(self, mmap_info, code_map, options): 693 # Skip kernel mmaps when requested using the fact that their tid 694 # is 0. 695 if mmap_info.tid == 0 and not options.kernel: 696 return True 697 if OBJDUMP_SKIP_RE.match(mmap_info.filename): 698 return True 699 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 700 return self._LoadKernelSymbols(code_map) 701 self.infos.append(mmap_info) 702 mmap_info.ticks = 0 703 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 704 if not os.path.exists(mmap_info.filename): 705 return True 706 # Request section headers (-h), symbols (-t), and dynamic symbols 707 # (-T) from objdump. 708 # Unfortunately, section headers span two lines, so we have to 709 # keep the just seen section name (from the first line in each 710 # section header) in the after_section variable. 711 process = subprocess.Popen( 712 "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename), 713 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 714 pipe = process.stdout 715 after_section = None 716 code_sections = set() 717 reloc_sections = set() 718 dynamic = False 719 try: 720 for line in pipe: 721 if after_section: 722 if line.find("CODE") != -1: 723 code_sections.add(after_section) 724 if line.find("RELOC") != -1: 725 reloc_sections.add(after_section) 726 after_section = None 727 continue 728 729 match = OBJDUMP_SECTION_HEADER_RE.match(line) 730 if match: 731 after_section = match.group(1) 732 continue 733 734 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 735 dynamic = True 736 continue 737 738 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 739 if match: 740 start_address = int(match.group(1), 16) 741 origin_offset = start_address 742 flags = match.group(2) 743 section = match.group(3) 744 if section in code_sections: 745 if dynamic or section in reloc_sections: 746 start_address += mmap_info.addr 747 size = int(match.group(4), 16) 748 name = match.group(5) 749 origin = mmap_info.filename 750 code_map.Add(Code(name, start_address, start_address + size, 751 origin, origin_offset)) 752 finally: 753 pipe.close() 754 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 755 756 def Tick(self, pc): 757 for i, mmap_info in enumerate(self.infos): 758 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 759 mmap_info.ticks += 1 760 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 761 return True 762 return False 763 764 def _UniqueMmapName(self, mmap_info): 765 name = mmap_info.filename 766 index = 1 767 while name in self.names: 768 name = "%s-%d" % (mmap_info.filename, index) 769 index += 1 770 self.names.add(name) 771 return name 772 773 def _LoadKernelSymbols(self, code_map): 774 if not os.path.exists(KERNEL_ALLSYMS_FILE): 775 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE 776 return False 777 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 778 code = None 779 for line in kallsyms: 780 match = KERNEL_ALLSYMS_LINE_RE.match(line) 781 if match: 782 start_address = int(match.group(1), 16) 783 end_address = start_address 784 name = match.group(2) 785 if code: 786 code.end_address = start_address 787 code_map.Add(code, 16) 788 code = Code(name, start_address, end_address, "kernel", 0) 789 return True 790 791 792 def PrintReport(code_map, library_repo, arch, ticks, options): 793 print "Ticks per symbol:" 794 used_code = [code for code in code_map.UsedCode()] 795 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 796 for i, code in enumerate(used_code): 797 code_ticks = code.self_ticks 798 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 799 code.FullName(), code.origin) 800 if options.disasm_all or i < options.disasm_top: 801 code.PrintAnnotated(arch, options) 802 print 803 print "Ticks per library:" 804 mmap_infos = [m for m in library_repo.infos] 805 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 806 for mmap_info in mmap_infos: 807 mmap_ticks = mmap_info.ticks 808 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 809 mmap_info.unique_name) 810 811 812 def PrintDot(code_map, options): 813 print "digraph G {" 814 for code in code_map.UsedCode(): 815 if code.self_ticks < 10: 816 continue 817 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) 818 if code.callee_ticks: 819 for callee, ticks in code.callee_ticks.iteritems(): 820 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) 821 print "}" 822 823 824 if __name__ == "__main__": 825 parser = optparse.OptionParser(USAGE) 826 parser.add_option("--snapshot-log", 827 default="obj/release/snapshot.log", 828 help="V8 snapshot log file name [default: %default]") 829 parser.add_option("--log", 830 default="v8.log", 831 help="V8 log file name [default: %default]") 832 parser.add_option("--snapshot", 833 default=False, 834 action="store_true", 835 help="process V8 snapshot log [default: %default]") 836 parser.add_option("--trace", 837 default="perf.data", 838 help="perf trace file name [default: %default]") 839 parser.add_option("--kernel", 840 default=False, 841 action="store_true", 842 help="process kernel entries [default: %default]") 843 parser.add_option("--disasm-top", 844 default=0, 845 type="int", 846 help=("number of top symbols to disassemble and annotate " 847 "[default: %default]")) 848 parser.add_option("--disasm-all", 849 default=False, 850 action="store_true", 851 help=("disassemble and annotate all used symbols " 852 "[default: %default]")) 853 parser.add_option("--dot", 854 default=False, 855 action="store_true", 856 help="produce dot output (WIP) [default: %default]") 857 parser.add_option("--quiet", "-q", 858 default=False, 859 action="store_true", 860 help="no auxiliary messages [default: %default]") 861 options, args = parser.parse_args() 862 863 if not options.quiet: 864 if options.snapshot: 865 print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log, 866 options.log, 867 options.log) 868 else: 869 print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log) 870 print "Perf trace file: %s" % options.trace 871 872 # Stats. 873 events = 0 874 ticks = 0 875 missed_ticks = 0 876 really_missed_ticks = 0 877 mmap_time = 0 878 sample_time = 0 879 880 # Process the snapshot log to fill the snapshot name map. 881 snapshot_name_map = {} 882 if options.snapshot: 883 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log) 884 snapshot_name_map = snapshot_log_reader.ReadNameMap() 885 886 # Initialize the log reader. 887 code_map = CodeMap() 888 log_reader = LogReader(log_name=options.log + ".ll", 889 code_map=code_map, 890 snapshot_pos_to_name=snapshot_name_map) 891 if not options.quiet: 892 print "Generated code architecture: %s" % log_reader.arch 893 print 894 sys.stdout.flush() 895 896 # Process the code and trace logs. 897 library_repo = LibraryRepo() 898 log_reader.ReadUpToGC() 899 trace_reader = TraceReader(options.trace) 900 while True: 901 header, offset = trace_reader.ReadEventHeader() 902 if not header: 903 break 904 events += 1 905 if header.type == PERF_RECORD_MMAP: 906 start = time.time() 907 mmap_info = trace_reader.ReadMmap(header, offset) 908 if mmap_info.filename == V8_GC_FAKE_MMAP: 909 log_reader.ReadUpToGC() 910 else: 911 library_repo.Load(mmap_info, code_map, options) 912 mmap_time += time.time() - start 913 elif header.type == PERF_RECORD_SAMPLE: 914 ticks += 1 915 start = time.time() 916 sample = trace_reader.ReadSample(header, offset) 917 code = code_map.Find(sample.ip) 918 if code: 919 code.Tick(sample.ip) 920 else: 921 missed_ticks += 1 922 if not library_repo.Tick(sample.ip) and not code: 923 really_missed_ticks += 1 924 if trace_reader.callchain_supported: 925 for ip in sample.ips: 926 caller_code = code_map.Find(ip) 927 if caller_code: 928 if code: 929 caller_code.CalleeTick(code) 930 code = caller_code 931 sample_time += time.time() - start 932 933 if options.dot: 934 PrintDot(code_map, options) 935 else: 936 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 937 938 if not options.quiet: 939 print 940 print "Stats:" 941 print "%10d total trace events" % events 942 print "%10d total ticks" % ticks 943 print "%10d ticks not in symbols" % missed_ticks 944 print "%10d unaccounted ticks" % really_missed_ticks 945 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 946 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 947 print "%9.2fs library processing time" % mmap_time 948 print "%9.2fs tick processing time" % sample_time 949 950 log_reader.Dispose() 951 trace_reader.Dispose() 952