1 #!/usr/bin/env python 2 # 3 # Copyright 2010 the V8 project authors. All rights reserved. 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following 12 # disclaimer in the documentation and/or other materials provided 13 # with the distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 import bisect 31 import collections 32 import ctypes 33 import disasm 34 import mmap 35 import optparse 36 import os 37 import re 38 import subprocess 39 import sys 40 import time 41 42 43 USAGE="""usage: %prog [OPTION]... 44 45 Analyses V8 and perf logs to produce profiles. 46 47 Perf logs can be collected using a command like: 48 $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof 49 # -R: collect all data 50 # -e cycles: use cpu-cycles event (run "perf list" for details) 51 # -c 10000: write a sample after each 10000 events 52 # -f: force output file overwrite 53 # -i: limit profiling to our process and the kernel 54 # --ll-prof shell flag enables the right V8 logs 55 This will produce a binary trace file (perf.data) that %prog can analyse. 56 57 Examples: 58 # Print flat profile with annotated disassembly for the 10 top 59 # symbols. Use default log names and include the snapshot log. 60 $ %prog --snapshot --disasm-top=10 61 62 # Print flat profile with annotated disassembly for all used symbols. 63 # Use default log names and include kernel symbols into analysis. 64 $ %prog --disasm-all --kernel 65 66 # Print flat profile. Use custom log names. 67 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot 68 """ 69 70 71 # Must match kGcFakeMmap. 72 V8_GC_FAKE_MMAP = "/tmp/__v8_gc__" 73 74 JS_ORIGIN = "js" 75 JS_SNAPSHOT_ORIGIN = "js-snapshot" 76 77 OBJDUMP_BIN = disasm.OBJDUMP_BIN 78 79 80 class Code(object): 81 """Code object.""" 82 83 _id = 0 84 85 def __init__(self, name, start_address, end_address, origin, origin_offset): 86 self.id = Code._id 87 Code._id += 1 88 self.name = name 89 self.other_names = None 90 self.start_address = start_address 91 self.end_address = end_address 92 self.origin = origin 93 self.origin_offset = origin_offset 94 self.self_ticks = 0 95 self.self_ticks_map = None 96 self.callee_ticks = None 97 98 def AddName(self, name): 99 assert self.name != name 100 if self.other_names is None: 101 self.other_names = [name] 102 return 103 if not name in self.other_names: 104 self.other_names.append(name) 105 106 def FullName(self): 107 if self.other_names is None: 108 return self.name 109 self.other_names.sort() 110 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 111 112 def IsUsed(self): 113 return self.self_ticks > 0 or self.callee_ticks is not None 114 115 def Tick(self, pc): 116 self.self_ticks += 1 117 if self.self_ticks_map is None: 118 self.self_ticks_map = collections.defaultdict(lambda: 0) 119 offset = pc - self.start_address 120 self.self_ticks_map[offset] += 1 121 122 def CalleeTick(self, callee): 123 if self.callee_ticks is None: 124 self.callee_ticks = collections.defaultdict(lambda: 0) 125 self.callee_ticks[callee] += 1 126 127 def PrintAnnotated(self, code_info, options): 128 if self.self_ticks_map is None: 129 ticks_map = [] 130 else: 131 ticks_map = self.self_ticks_map.items() 132 # Convert the ticks map to offsets and counts arrays so that later 133 # we can do binary search in the offsets array. 134 ticks_map.sort(key=lambda t: t[0]) 135 ticks_offsets = [t[0] for t in ticks_map] 136 ticks_counts = [t[1] for t in ticks_map] 137 # Get a list of disassembled lines and their addresses. 138 lines = self._GetDisasmLines(code_info, options) 139 if len(lines) == 0: 140 return 141 # Print annotated lines. 142 address = lines[0][0] 143 total_count = 0 144 for i in xrange(len(lines)): 145 start_offset = lines[i][0] - address 146 if i == len(lines) - 1: 147 end_offset = self.end_address - self.start_address 148 else: 149 end_offset = lines[i + 1][0] - address 150 # Ticks (reported pc values) are not always precise, i.e. not 151 # necessarily point at instruction starts. So we have to search 152 # for ticks that touch the current instruction line. 153 j = bisect.bisect_left(ticks_offsets, end_offset) 154 count = 0 155 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 156 if offset < start_offset: 157 break 158 count += cnt 159 total_count += count 160 count = 100.0 * count / self.self_ticks 161 if count >= 0.01: 162 print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1]) 163 else: 164 print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1]) 165 print 166 assert total_count == self.self_ticks, \ 167 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 168 169 def __str__(self): 170 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 171 self.name, 172 self.start_address, 173 self.end_address, 174 self.end_address - self.start_address, 175 self.origin) 176 177 def _GetDisasmLines(self, code_info, options): 178 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN: 179 inplace = False 180 filename = options.log + ".code" 181 else: 182 inplace = True 183 filename = self.origin 184 return disasm.GetDisasmLines(filename, 185 self.origin_offset, 186 self.end_address - self.start_address, 187 code_info.arch, 188 inplace) 189 190 191 class CodePage(object): 192 """Group of adjacent code objects.""" 193 194 SHIFT = 12 # 4K pages 195 SIZE = (1 << SHIFT) 196 MASK = ~(SIZE - 1) 197 198 @staticmethod 199 def PageAddress(address): 200 return address & CodePage.MASK 201 202 @staticmethod 203 def PageId(address): 204 return address >> CodePage.SHIFT 205 206 @staticmethod 207 def PageAddressFromId(id): 208 return id << CodePage.SHIFT 209 210 def __init__(self, address): 211 self.address = address 212 self.code_objects = [] 213 214 def Add(self, code): 215 self.code_objects.append(code) 216 217 def Remove(self, code): 218 self.code_objects.remove(code) 219 220 def Find(self, pc): 221 code_objects = self.code_objects 222 for i, code in enumerate(code_objects): 223 if code.start_address <= pc < code.end_address: 224 code_objects[0], code_objects[i] = code, code_objects[0] 225 return code 226 return None 227 228 def __iter__(self): 229 return self.code_objects.__iter__() 230 231 232 class CodeMap(object): 233 """Code object map.""" 234 235 def __init__(self): 236 self.pages = {} 237 self.min_address = 1 << 64 238 self.max_address = -1 239 240 def Add(self, code, max_pages=-1): 241 page_id = CodePage.PageId(code.start_address) 242 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 243 pages = 0 244 while page_id < limit_id: 245 if max_pages >= 0 and pages > max_pages: 246 print >>sys.stderr, \ 247 "Warning: page limit (%d) reached for %s [%s]" % ( 248 max_pages, code.name, code.origin) 249 break 250 if page_id in self.pages: 251 page = self.pages[page_id] 252 else: 253 page = CodePage(CodePage.PageAddressFromId(page_id)) 254 self.pages[page_id] = page 255 page.Add(code) 256 page_id += 1 257 pages += 1 258 self.min_address = min(self.min_address, code.start_address) 259 self.max_address = max(self.max_address, code.end_address) 260 261 def Remove(self, code): 262 page_id = CodePage.PageId(code.start_address) 263 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 264 removed = False 265 while page_id < limit_id: 266 if page_id not in self.pages: 267 page_id += 1 268 continue 269 page = self.pages[page_id] 270 page.Remove(code) 271 removed = True 272 page_id += 1 273 return removed 274 275 def AllCode(self): 276 for page in self.pages.itervalues(): 277 for code in page: 278 if CodePage.PageAddress(code.start_address) == page.address: 279 yield code 280 281 def UsedCode(self): 282 for code in self.AllCode(): 283 if code.IsUsed(): 284 yield code 285 286 def Print(self): 287 for code in self.AllCode(): 288 print code 289 290 def Find(self, pc): 291 if pc < self.min_address or pc >= self.max_address: 292 return None 293 page_id = CodePage.PageId(pc) 294 if page_id not in self.pages: 295 return None 296 return self.pages[page_id].Find(pc) 297 298 299 class CodeInfo(object): 300 """Generic info about generated code objects.""" 301 302 def __init__(self, arch, header_size): 303 self.arch = arch 304 self.header_size = header_size 305 306 307 class CodeLogReader(object): 308 """V8 code event log reader.""" 309 310 _CODE_INFO_RE = re.compile( 311 r"code-info,([^,]+),(\d+)") 312 313 _CODE_CREATE_RE = re.compile( 314 r"code-creation,([^,]+),(0x[a-f0-9]+),(\d+),\"(.*)\"(?:,(0x[a-f0-9]+),([~*])?)?(?:,(\d+))?") 315 316 _CODE_MOVE_RE = re.compile( 317 r"code-move,(0x[a-f0-9]+),(0x[a-f0-9]+)") 318 319 _CODE_DELETE_RE = re.compile( 320 r"code-delete,(0x[a-f0-9]+)") 321 322 _SNAPSHOT_POS_RE = re.compile( 323 r"snapshot-pos,(0x[a-f0-9]+),(\d+)") 324 325 _CODE_MOVING_GC = "code-moving-gc" 326 327 def __init__(self, log_name, code_map, is_snapshot, snapshot_pos_to_name): 328 self.log = open(log_name, "r") 329 self.code_map = code_map 330 self.is_snapshot = is_snapshot 331 self.snapshot_pos_to_name = snapshot_pos_to_name 332 self.address_to_snapshot_name = {} 333 334 def ReadCodeInfo(self): 335 line = self.log.readline() or "" 336 match = CodeLogReader._CODE_INFO_RE.match(line) 337 assert match, "No code info in log" 338 return CodeInfo(arch=match.group(1), header_size=int(match.group(2))) 339 340 def ReadUpToGC(self, code_info): 341 made_progress = False 342 code_header_size = code_info.header_size 343 while True: 344 line = self.log.readline() 345 if not line: 346 return made_progress 347 made_progress = True 348 349 if line.startswith(CodeLogReader._CODE_MOVING_GC): 350 self.address_to_snapshot_name.clear() 351 return made_progress 352 353 match = CodeLogReader._CODE_CREATE_RE.match(line) 354 if match: 355 start_address = int(match.group(2), 16) + code_header_size 356 end_address = start_address + int(match.group(3)) - code_header_size 357 if start_address in self.address_to_snapshot_name: 358 name = self.address_to_snapshot_name[start_address] 359 origin = JS_SNAPSHOT_ORIGIN 360 else: 361 tag = match.group(1) 362 optimization_status = match.group(6) 363 func_name = match.group(4) 364 if optimization_status: 365 name = "%s:%s%s" % (tag, optimization_status, func_name) 366 else: 367 name = "%s:%s" % (tag, func_name) 368 origin = JS_ORIGIN 369 if self.is_snapshot: 370 origin_offset = 0 371 else: 372 origin_offset = int(match.group(7)) 373 code = Code(name, start_address, end_address, origin, origin_offset) 374 conficting_code = self.code_map.Find(start_address) 375 if conficting_code: 376 CodeLogReader._HandleCodeConflict(conficting_code, code) 377 # TODO(vitalyr): this warning is too noisy because of our 378 # attempts to reconstruct code log from the snapshot. 379 # print >>sys.stderr, \ 380 # "Warning: Skipping duplicate code log entry %s" % code 381 continue 382 self.code_map.Add(code) 383 continue 384 385 match = CodeLogReader._CODE_MOVE_RE.match(line) 386 if match: 387 old_start_address = int(match.group(1), 16) + code_header_size 388 new_start_address = int(match.group(2), 16) + code_header_size 389 if old_start_address == new_start_address: 390 # Skip useless code move entries. 391 continue 392 code = self.code_map.Find(old_start_address) 393 if not code: 394 print >>sys.stderr, "Warning: Not found %x" % old_start_address 395 continue 396 assert code.start_address == old_start_address, \ 397 "Inexact move address %x for %s" % (old_start_address, code) 398 self.code_map.Remove(code) 399 size = code.end_address - code.start_address 400 code.start_address = new_start_address 401 code.end_address = new_start_address + size 402 self.code_map.Add(code) 403 continue 404 405 match = CodeLogReader._CODE_DELETE_RE.match(line) 406 if match: 407 old_start_address = int(match.group(1), 16) + code_header_size 408 code = self.code_map.Find(old_start_address) 409 if not code: 410 print >>sys.stderr, "Warning: Not found %x" % old_start_address 411 continue 412 assert code.start_address == old_start_address, \ 413 "Inexact delete address %x for %s" % (old_start_address, code) 414 self.code_map.Remove(code) 415 continue 416 417 match = CodeLogReader._SNAPSHOT_POS_RE.match(line) 418 if match: 419 start_address = int(match.group(1), 16) + code_header_size 420 snapshot_pos = int(match.group(2)) 421 if self.is_snapshot: 422 code = self.code_map.Find(start_address) 423 if code: 424 assert code.start_address == start_address, \ 425 "Inexact snapshot address %x for %s" % (start_address, code) 426 self.snapshot_pos_to_name[snapshot_pos] = code.name 427 else: 428 if snapshot_pos in self.snapshot_pos_to_name: 429 self.address_to_snapshot_name[start_address] = \ 430 self.snapshot_pos_to_name[snapshot_pos] 431 432 def Dispose(self): 433 self.log.close() 434 435 @staticmethod 436 def _HandleCodeConflict(old_code, new_code): 437 assert (old_code.start_address == new_code.start_address and 438 old_code.end_address == new_code.end_address), \ 439 "Conficting code log entries %s and %s" % (old_code, new_code) 440 CodeLogReader._UpdateNames(old_code, new_code) 441 442 @staticmethod 443 def _UpdateNames(old_code, new_code): 444 if old_code.name == new_code.name: 445 return 446 # Kludge: there are code objects with custom names that don't 447 # match their flags. 448 misnamed_code = set(["Builtin:CpuFeatures::Probe"]) 449 if old_code.name in misnamed_code: 450 return 451 # Code object may be shared by a few functions. Collect the full 452 # set of names. 453 old_code.AddName(new_code.name) 454 455 456 class Descriptor(object): 457 """Descriptor of a structure in the binary trace log.""" 458 459 CTYPE_MAP = { 460 "u16": ctypes.c_uint16, 461 "u32": ctypes.c_uint32, 462 "u64": ctypes.c_uint64 463 } 464 465 def __init__(self, fields): 466 class TraceItem(ctypes.Structure): 467 _fields_ = Descriptor.CtypesFields(fields) 468 469 def __str__(self): 470 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 471 for field, _ in TraceItem._fields_) 472 473 self.ctype = TraceItem 474 475 def Read(self, trace, offset): 476 return self.ctype.from_buffer(trace, offset) 477 478 @staticmethod 479 def CtypesFields(fields): 480 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 481 482 483 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 484 # for the gory details. 485 486 487 TRACE_HEADER_DESC = Descriptor([ 488 ("magic", "u64"), 489 ("size", "u64"), 490 ("attr_size", "u64"), 491 ("attrs_offset", "u64"), 492 ("attrs_size", "u64"), 493 ("data_offset", "u64"), 494 ("data_size", "u64"), 495 ("event_types_offset", "u64"), 496 ("event_types_size", "u64") 497 ]) 498 499 500 PERF_EVENT_ATTR_DESC = Descriptor([ 501 ("type", "u32"), 502 ("size", "u32"), 503 ("config", "u64"), 504 ("sample_period_or_freq", "u64"), 505 ("sample_type", "u64"), 506 ("read_format", "u64"), 507 ("flags", "u64"), 508 ("wakeup_events_or_watermark", "u32"), 509 ("bt_type", "u32"), 510 ("bp_addr", "u64"), 511 ("bp_len", "u64"), 512 ]) 513 514 515 PERF_EVENT_HEADER_DESC = Descriptor([ 516 ("type", "u32"), 517 ("misc", "u16"), 518 ("size", "u16") 519 ]) 520 521 522 PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 523 ("pid", "u32"), 524 ("tid", "u32"), 525 ("addr", "u64"), 526 ("len", "u64"), 527 ("pgoff", "u64") 528 ]) 529 530 531 # perf_event_attr.sample_type bits control the set of 532 # perf_sample_event fields. 533 PERF_SAMPLE_IP = 1 << 0 534 PERF_SAMPLE_TID = 1 << 1 535 PERF_SAMPLE_TIME = 1 << 2 536 PERF_SAMPLE_ADDR = 1 << 3 537 PERF_SAMPLE_READ = 1 << 4 538 PERF_SAMPLE_CALLCHAIN = 1 << 5 539 PERF_SAMPLE_ID = 1 << 6 540 PERF_SAMPLE_CPU = 1 << 7 541 PERF_SAMPLE_PERIOD = 1 << 8 542 PERF_SAMPLE_STREAM_ID = 1 << 9 543 PERF_SAMPLE_RAW = 1 << 10 544 545 546 PERF_SAMPLE_EVENT_BODY_FIELDS = [ 547 ("ip", "u64", PERF_SAMPLE_IP), 548 ("pid", "u32", PERF_SAMPLE_TID), 549 ("tid", "u32", PERF_SAMPLE_TID), 550 ("time", "u64", PERF_SAMPLE_TIME), 551 ("addr", "u64", PERF_SAMPLE_ADDR), 552 ("id", "u64", PERF_SAMPLE_ID), 553 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 554 ("cpu", "u32", PERF_SAMPLE_CPU), 555 ("res", "u32", PERF_SAMPLE_CPU), 556 ("period", "u64", PERF_SAMPLE_PERIOD), 557 # Don't want to handle read format that comes after the period and 558 # before the callchain and has variable size. 559 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 560 # Raw data follows the callchain and is ignored. 561 ] 562 563 564 PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 565 566 567 PERF_RECORD_MMAP = 1 568 PERF_RECORD_SAMPLE = 9 569 570 571 class TraceReader(object): 572 """Perf (linux-2.6/tools/perf) trace file reader.""" 573 574 _TRACE_HEADER_MAGIC = 4993446653023372624 575 576 def __init__(self, trace_name): 577 self.trace_file = open(trace_name, "r") 578 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 579 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 580 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 581 print >>sys.stderr, "Warning: unsupported trace header magic" 582 self.offset = self.trace_header.data_offset 583 self.limit = self.trace_header.data_offset + self.trace_header.data_size 584 assert self.limit <= self.trace.size(), \ 585 "Trace data limit exceeds trace file size" 586 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 587 assert self.trace_header.attrs_size != 0, \ 588 "No perf event attributes found in the trace" 589 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 590 self.trace_header.attrs_offset) 591 self.sample_event_body_desc = self._SampleEventBodyDesc( 592 perf_event_attr.sample_type) 593 self.callchain_supported = \ 594 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 595 if self.callchain_supported: 596 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 597 self.ip_size = ctypes.sizeof(self.ip_struct) 598 599 def ReadEventHeader(self): 600 if self.offset >= self.limit: 601 return None, 0 602 offset = self.offset 603 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 604 self.offset += header.size 605 return header, offset 606 607 def ReadMmap(self, header, offset): 608 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 609 offset + self.header_size) 610 # Read null-padded filename. 611 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 612 offset + header.size].rstrip(chr(0)) 613 mmap_info.filename = filename 614 return mmap_info 615 616 def ReadSample(self, header, offset): 617 sample = self.sample_event_body_desc.Read(self.trace, 618 offset + self.header_size) 619 if not self.callchain_supported: 620 return sample 621 sample.ips = [] 622 offset += self.header_size + ctypes.sizeof(sample) 623 for _ in xrange(sample.nr): 624 sample.ips.append( 625 self.ip_struct.from_buffer(self.trace, offset).value) 626 offset += self.ip_size 627 return sample 628 629 def Dispose(self): 630 self.trace.close() 631 self.trace_file.close() 632 633 def _SampleEventBodyDesc(self, sample_type): 634 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 635 "Can't hande read format in samples" 636 fields = [(field, format) 637 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 638 if (bit & sample_type) != 0] 639 return Descriptor(fields) 640 641 642 OBJDUMP_SECTION_HEADER_RE = re.compile( 643 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 644 OBJDUMP_SYMBOL_LINE_RE = re.compile( 645 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 646 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 647 r"^DYNAMIC SYMBOL TABLE") 648 KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 649 PERF_KERNEL_ALLSYMS_RE = re.compile( 650 r".*kallsyms.*") 651 KERNEL_ALLSYMS_LINE_RE = re.compile( 652 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 653 654 655 class LibraryRepo(object): 656 def __init__(self): 657 self.infos = [] 658 self.names = set() 659 self.ticks = {} 660 661 def Load(self, mmap_info, code_map, options): 662 # Skip kernel mmaps when requested using the fact that their tid 663 # is 0. 664 if mmap_info.tid == 0 and not options.kernel: 665 return True 666 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 667 return self._LoadKernelSymbols(code_map) 668 self.infos.append(mmap_info) 669 mmap_info.ticks = 0 670 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 671 if not os.path.exists(mmap_info.filename): 672 return True 673 # Request section headers (-h), symbols (-t), and dynamic symbols 674 # (-T) from objdump. 675 # Unfortunately, section headers span two lines, so we have to 676 # keep the just seen section name (from the first line in each 677 # section header) in the after_section variable. 678 process = subprocess.Popen( 679 "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename), 680 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 681 pipe = process.stdout 682 after_section = None 683 code_sections = set() 684 reloc_sections = set() 685 dynamic = False 686 try: 687 for line in pipe: 688 if after_section: 689 if line.find("CODE") != -1: 690 code_sections.add(after_section) 691 if line.find("RELOC") != -1: 692 reloc_sections.add(after_section) 693 after_section = None 694 continue 695 696 match = OBJDUMP_SECTION_HEADER_RE.match(line) 697 if match: 698 after_section = match.group(1) 699 continue 700 701 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 702 dynamic = True 703 continue 704 705 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 706 if match: 707 start_address = int(match.group(1), 16) 708 origin_offset = start_address 709 flags = match.group(2) 710 section = match.group(3) 711 if section in code_sections: 712 if dynamic or section in reloc_sections: 713 start_address += mmap_info.addr 714 size = int(match.group(4), 16) 715 name = match.group(5) 716 origin = mmap_info.filename 717 code_map.Add(Code(name, start_address, start_address + size, 718 origin, origin_offset)) 719 finally: 720 pipe.close() 721 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 722 723 def Tick(self, pc): 724 for i, mmap_info in enumerate(self.infos): 725 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 726 mmap_info.ticks += 1 727 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 728 return True 729 return False 730 731 def _UniqueMmapName(self, mmap_info): 732 name = mmap_info.filename 733 index = 1 734 while name in self.names: 735 name = "%s-%d" % (mmap_info.filename, index) 736 index += 1 737 self.names.add(name) 738 return name 739 740 def _LoadKernelSymbols(self, code_map): 741 if not os.path.exists(KERNEL_ALLSYMS_FILE): 742 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE 743 return False 744 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 745 code = None 746 for line in kallsyms: 747 match = KERNEL_ALLSYMS_LINE_RE.match(line) 748 if match: 749 start_address = int(match.group(1), 16) 750 end_address = start_address 751 name = match.group(2) 752 if code: 753 code.end_address = start_address 754 code_map.Add(code, 16) 755 code = Code(name, start_address, end_address, "kernel", 0) 756 return True 757 758 759 def PrintReport(code_map, library_repo, code_info, options): 760 print "Ticks per symbol:" 761 used_code = [code for code in code_map.UsedCode()] 762 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 763 for i, code in enumerate(used_code): 764 print "%10d %s [%s]" % (code.self_ticks, code.FullName(), code.origin) 765 if options.disasm_all or i < options.disasm_top: 766 code.PrintAnnotated(code_info, options) 767 print 768 print "Ticks per library:" 769 mmap_infos = [m for m in library_repo.infos] 770 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 771 for mmap_info in mmap_infos: 772 print "%10d %s" % (mmap_info.ticks, mmap_info.unique_name) 773 774 775 def PrintDot(code_map, options): 776 print "digraph G {" 777 for code in code_map.UsedCode(): 778 if code.self_ticks < 10: 779 continue 780 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) 781 if code.callee_ticks: 782 for callee, ticks in code.callee_ticks.iteritems(): 783 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) 784 print "}" 785 786 787 if __name__ == "__main__": 788 parser = optparse.OptionParser(USAGE) 789 parser.add_option("--snapshot-log", 790 default="obj/release/snapshot.log", 791 help="V8 snapshot log file name [default: %default]") 792 parser.add_option("--log", 793 default="v8.log", 794 help="V8 log file name [default: %default]") 795 parser.add_option("--snapshot", 796 default=False, 797 action="store_true", 798 help="process V8 snapshot log [default: %default]") 799 parser.add_option("--trace", 800 default="perf.data", 801 help="perf trace file name [default: %default]") 802 parser.add_option("--kernel", 803 default=False, 804 action="store_true", 805 help="process kernel entries [default: %default]") 806 parser.add_option("--disasm-top", 807 default=0, 808 type="int", 809 help=("number of top symbols to disassemble and annotate " 810 "[default: %default]")) 811 parser.add_option("--disasm-all", 812 default=False, 813 action="store_true", 814 help=("disassemble and annotate all used symbols " 815 "[default: %default]")) 816 parser.add_option("--dot", 817 default=False, 818 action="store_true", 819 help="produce dot output (WIP) [default: %default]") 820 parser.add_option("--quiet", "-q", 821 default=False, 822 action="store_true", 823 help="no auxiliary messages [default: %default]") 824 options, args = parser.parse_args() 825 826 if not options.quiet: 827 if options.snapshot: 828 print "V8 logs: %s, %s, %s.code" % (options.snapshot_log, 829 options.log, 830 options.log) 831 else: 832 print "V8 log: %s, %s.code (no snapshot)" % (options.log, options.log) 833 print "Perf trace file: %s" % options.trace 834 835 # Stats. 836 events = 0 837 ticks = 0 838 missed_ticks = 0 839 really_missed_ticks = 0 840 mmap_time = 0 841 sample_time = 0 842 843 # Initialize the log reader and get the code info. 844 code_map = CodeMap() 845 snapshot_name_map = {} 846 log_reader = CodeLogReader(log_name=options.log, 847 code_map=code_map, 848 is_snapshot=False, 849 snapshot_pos_to_name=snapshot_name_map) 850 code_info = log_reader.ReadCodeInfo() 851 if not options.quiet: 852 print "Generated code architecture: %s" % code_info.arch 853 print 854 855 # Process the snapshot log to fill the snapshot name map. 856 if options.snapshot: 857 snapshot_log_reader = CodeLogReader(log_name=options.snapshot_log, 858 code_map=CodeMap(), 859 is_snapshot=True, 860 snapshot_pos_to_name=snapshot_name_map) 861 while snapshot_log_reader.ReadUpToGC(code_info): 862 pass 863 864 # Process the code and trace logs. 865 library_repo = LibraryRepo() 866 log_reader.ReadUpToGC(code_info) 867 trace_reader = TraceReader(options.trace) 868 while True: 869 header, offset = trace_reader.ReadEventHeader() 870 if not header: 871 break 872 events += 1 873 if header.type == PERF_RECORD_MMAP: 874 start = time.time() 875 mmap_info = trace_reader.ReadMmap(header, offset) 876 if mmap_info.filename == V8_GC_FAKE_MMAP: 877 log_reader.ReadUpToGC(code_info) 878 else: 879 library_repo.Load(mmap_info, code_map, options) 880 mmap_time += time.time() - start 881 elif header.type == PERF_RECORD_SAMPLE: 882 ticks += 1 883 start = time.time() 884 sample = trace_reader.ReadSample(header, offset) 885 code = code_map.Find(sample.ip) 886 if code: 887 code.Tick(sample.ip) 888 else: 889 missed_ticks += 1 890 if not library_repo.Tick(sample.ip) and not code: 891 really_missed_ticks += 1 892 if trace_reader.callchain_supported: 893 for ip in sample.ips: 894 caller_code = code_map.Find(ip) 895 if caller_code: 896 if code: 897 caller_code.CalleeTick(code) 898 code = caller_code 899 sample_time += time.time() - start 900 901 if options.dot: 902 PrintDot(code_map, options) 903 else: 904 PrintReport(code_map, library_repo, code_info, options) 905 906 if not options.quiet: 907 print 908 print "Stats:" 909 print "%10d total trace events" % events 910 print "%10d total ticks" % ticks 911 print "%10d ticks not in symbols" % missed_ticks 912 print "%10d unaccounted ticks" % really_missed_ticks 913 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 914 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 915 print "%9.2fs library processing time" % mmap_time 916 print "%9.2fs tick processing time" % sample_time 917 918 log_reader.Dispose() 919 trace_reader.Dispose() 920