1 #!/usr/bin/env python 2 # 3 # Copyright 2012 the V8 project authors. All rights reserved. 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # * Redistributions in binary form must reproduce the above 11 # copyright notice, this list of conditions and the following 12 # disclaimer in the documentation and/or other materials provided 13 # with the distribution. 14 # * Neither the name of Google Inc. nor the names of its 15 # contributors may be used to endorse or promote products derived 16 # from this software without specific prior written permission. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 import bisect 31 import collections 32 import ctypes 33 import disasm 34 import mmap 35 import optparse 36 import os 37 import re 38 import subprocess 39 import sys 40 import time 41 42 43 USAGE="""usage: %prog [OPTION]... 44 45 Analyses V8 and perf logs to produce profiles. 46 47 Perf logs can be collected using a command like: 48 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof 49 # -R: collect all data 50 # -e cycles: use cpu-cycles event (run "perf list" for details) 51 # -c 10000: write a sample after each 10000 events 52 # -f: force output file overwrite 53 # -i: limit profiling to our process and the kernel 54 # --ll-prof shell flag enables the right V8 logs 55 This will produce a binary trace file (perf.data) that %prog can analyse. 56 57 IMPORTANT: 58 The kernel has an internal maximum for events per second, it is 100K by 59 default. That's not enough for "-c 10000". Set it to some higher value: 60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate 61 You can also make the warning about kernel address maps go away: 62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict 63 64 We have a convenience script that handles all of the above for you: 65 $ tools/run-llprof.sh ./d8 bench.js 66 67 Examples: 68 # Print flat profile with annotated disassembly for the 10 top 69 # symbols. Use default log names. 70 $ %prog --disasm-top=10 71 72 # Print flat profile with annotated disassembly for all used symbols. 73 # Use default log names and include kernel symbols into analysis. 74 $ %prog --disasm-all --kernel 75 76 # Print flat profile. Use custom log names. 77 $ %prog --log=foo.log --trace=foo.data 78 """ 79 80 81 JS_ORIGIN = "js" 82 83 84 class Code(object): 85 """Code object.""" 86 87 _id = 0 88 UNKNOWN = 0 89 V8INTERNAL = 1 90 FULL_CODEGEN = 2 91 OPTIMIZED = 3 92 93 def __init__(self, name, start_address, end_address, origin, origin_offset): 94 self.id = Code._id 95 Code._id += 1 96 self.name = name 97 self.other_names = None 98 self.start_address = start_address 99 self.end_address = end_address 100 self.origin = origin 101 self.origin_offset = origin_offset 102 self.self_ticks = 0 103 self.self_ticks_map = None 104 self.callee_ticks = None 105 if name.startswith("LazyCompile:*"): 106 self.codetype = Code.OPTIMIZED 107 elif name.startswith("LazyCompile:"): 108 self.codetype = Code.FULL_CODEGEN 109 elif name.startswith("v8::internal::"): 110 self.codetype = Code.V8INTERNAL 111 else: 112 self.codetype = Code.UNKNOWN 113 114 def AddName(self, name): 115 assert self.name != name 116 if self.other_names is None: 117 self.other_names = [name] 118 return 119 if not name in self.other_names: 120 self.other_names.append(name) 121 122 def FullName(self): 123 if self.other_names is None: 124 return self.name 125 self.other_names.sort() 126 return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) 127 128 def IsUsed(self): 129 return self.self_ticks > 0 or self.callee_ticks is not None 130 131 def Tick(self, pc): 132 self.self_ticks += 1 133 if self.self_ticks_map is None: 134 self.self_ticks_map = collections.defaultdict(lambda: 0) 135 offset = pc - self.start_address 136 self.self_ticks_map[offset] += 1 137 138 def CalleeTick(self, callee): 139 if self.callee_ticks is None: 140 self.callee_ticks = collections.defaultdict(lambda: 0) 141 self.callee_ticks[callee] += 1 142 143 def PrintAnnotated(self, arch, options): 144 if self.self_ticks_map is None: 145 ticks_map = [] 146 else: 147 ticks_map = self.self_ticks_map.items() 148 # Convert the ticks map to offsets and counts arrays so that later 149 # we can do binary search in the offsets array. 150 ticks_map.sort(key=lambda t: t[0]) 151 ticks_offsets = [t[0] for t in ticks_map] 152 ticks_counts = [t[1] for t in ticks_map] 153 # Get a list of disassembled lines and their addresses. 154 lines = self._GetDisasmLines(arch, options) 155 if len(lines) == 0: 156 return 157 # Print annotated lines. 158 address = lines[0][0] 159 total_count = 0 160 for i in xrange(len(lines)): 161 start_offset = lines[i][0] - address 162 if i == len(lines) - 1: 163 end_offset = self.end_address - self.start_address 164 else: 165 end_offset = lines[i + 1][0] - address 166 # Ticks (reported pc values) are not always precise, i.e. not 167 # necessarily point at instruction starts. So we have to search 168 # for ticks that touch the current instruction line. 169 j = bisect.bisect_left(ticks_offsets, end_offset) 170 count = 0 171 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): 172 if offset < start_offset: 173 break 174 count += cnt 175 total_count += count 176 percent = 100.0 * count / self.self_ticks 177 offset = lines[i][0] 178 if percent >= 0.01: 179 # 5 spaces for tick count 180 # 1 space following 181 # 1 for '|' 182 # 1 space following 183 # 6 for the percentage number, incl. the '.' 184 # 1 for the '%' sign 185 # => 15 186 print "%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1]) 187 else: 188 print "%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1]) 189 print 190 assert total_count == self.self_ticks, \ 191 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) 192 193 def __str__(self): 194 return "%s [0x%x, 0x%x) size: %d origin: %s" % ( 195 self.name, 196 self.start_address, 197 self.end_address, 198 self.end_address - self.start_address, 199 self.origin) 200 201 def _GetDisasmLines(self, arch, options): 202 if self.origin == JS_ORIGIN: 203 inplace = False 204 filename = options.log + ".ll" 205 else: 206 inplace = True 207 filename = self.origin 208 return disasm.GetDisasmLines(filename, 209 self.origin_offset, 210 self.end_address - self.start_address, 211 arch, 212 inplace) 213 214 215 class CodePage(object): 216 """Group of adjacent code objects.""" 217 218 SHIFT = 20 # 1M pages 219 SIZE = (1 << SHIFT) 220 MASK = ~(SIZE - 1) 221 222 @staticmethod 223 def PageAddress(address): 224 return address & CodePage.MASK 225 226 @staticmethod 227 def PageId(address): 228 return address >> CodePage.SHIFT 229 230 @staticmethod 231 def PageAddressFromId(id): 232 return id << CodePage.SHIFT 233 234 def __init__(self, address): 235 self.address = address 236 self.code_objects = [] 237 238 def Add(self, code): 239 self.code_objects.append(code) 240 241 def Remove(self, code): 242 self.code_objects.remove(code) 243 244 def Find(self, pc): 245 code_objects = self.code_objects 246 for i, code in enumerate(code_objects): 247 if code.start_address <= pc < code.end_address: 248 code_objects[0], code_objects[i] = code, code_objects[0] 249 return code 250 return None 251 252 def __iter__(self): 253 return self.code_objects.__iter__() 254 255 256 class CodeMap(object): 257 """Code object map.""" 258 259 def __init__(self): 260 self.pages = {} 261 self.min_address = 1 << 64 262 self.max_address = -1 263 264 def Add(self, code, max_pages=-1): 265 page_id = CodePage.PageId(code.start_address) 266 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 267 pages = 0 268 while page_id < limit_id: 269 if max_pages >= 0 and pages > max_pages: 270 print >>sys.stderr, \ 271 "Warning: page limit (%d) reached for %s [%s]" % ( 272 max_pages, code.name, code.origin) 273 break 274 if page_id in self.pages: 275 page = self.pages[page_id] 276 else: 277 page = CodePage(CodePage.PageAddressFromId(page_id)) 278 self.pages[page_id] = page 279 page.Add(code) 280 page_id += 1 281 pages += 1 282 self.min_address = min(self.min_address, code.start_address) 283 self.max_address = max(self.max_address, code.end_address) 284 285 def Remove(self, code): 286 page_id = CodePage.PageId(code.start_address) 287 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) 288 removed = False 289 while page_id < limit_id: 290 if page_id not in self.pages: 291 page_id += 1 292 continue 293 page = self.pages[page_id] 294 page.Remove(code) 295 removed = True 296 page_id += 1 297 return removed 298 299 def AllCode(self): 300 for page in self.pages.itervalues(): 301 for code in page: 302 if CodePage.PageAddress(code.start_address) == page.address: 303 yield code 304 305 def UsedCode(self): 306 for code in self.AllCode(): 307 if code.IsUsed(): 308 yield code 309 310 def Print(self): 311 for code in self.AllCode(): 312 print code 313 314 def Find(self, pc): 315 if pc < self.min_address or pc >= self.max_address: 316 return None 317 page_id = CodePage.PageId(pc) 318 if page_id not in self.pages: 319 return None 320 return self.pages[page_id].Find(pc) 321 322 323 class CodeInfo(object): 324 """Generic info about generated code objects.""" 325 326 def __init__(self, arch, header_size): 327 self.arch = arch 328 self.header_size = header_size 329 330 331 class LogReader(object): 332 """V8 low-level (binary) log reader.""" 333 334 _ARCH_TO_POINTER_TYPE_MAP = { 335 "ia32": ctypes.c_uint32, 336 "arm": ctypes.c_uint32, 337 "mips": ctypes.c_uint32, 338 "x64": ctypes.c_uint64, 339 "arm64": ctypes.c_uint64 340 } 341 342 _CODE_CREATE_TAG = "C" 343 _CODE_MOVE_TAG = "M" 344 _CODE_MOVING_GC_TAG = "G" 345 346 def __init__(self, log_name, code_map): 347 self.log_file = open(log_name, "r") 348 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) 349 self.log_pos = 0 350 self.code_map = code_map 351 352 self.arch = self.log[:self.log.find("\0")] 353 self.log_pos += len(self.arch) + 1 354 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ 355 "Unsupported architecture %s" % self.arch 356 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] 357 358 self.code_create_struct = LogReader._DefineStruct([ 359 ("name_size", ctypes.c_int32), 360 ("code_address", pointer_type), 361 ("code_size", ctypes.c_int32)]) 362 363 self.code_move_struct = LogReader._DefineStruct([ 364 ("from_address", pointer_type), 365 ("to_address", pointer_type)]) 366 367 self.code_delete_struct = LogReader._DefineStruct([ 368 ("address", pointer_type)]) 369 370 def ReadUpToGC(self): 371 while self.log_pos < self.log.size(): 372 tag = self.log[self.log_pos] 373 self.log_pos += 1 374 375 if tag == LogReader._CODE_MOVING_GC_TAG: 376 return 377 378 if tag == LogReader._CODE_CREATE_TAG: 379 event = self.code_create_struct.from_buffer(self.log, self.log_pos) 380 self.log_pos += ctypes.sizeof(event) 381 start_address = event.code_address 382 end_address = start_address + event.code_size 383 name = self.log[self.log_pos:self.log_pos + event.name_size] 384 origin = JS_ORIGIN 385 self.log_pos += event.name_size 386 origin_offset = self.log_pos 387 self.log_pos += event.code_size 388 code = Code(name, start_address, end_address, origin, origin_offset) 389 conficting_code = self.code_map.Find(start_address) 390 if conficting_code: 391 if not (conficting_code.start_address == code.start_address and 392 conficting_code.end_address == code.end_address): 393 self.code_map.Remove(conficting_code) 394 else: 395 LogReader._HandleCodeConflict(conficting_code, code) 396 # TODO(vitalyr): this warning is too noisy because of our 397 # attempts to reconstruct code log from the snapshot. 398 # print >>sys.stderr, \ 399 # "Warning: Skipping duplicate code log entry %s" % code 400 continue 401 self.code_map.Add(code) 402 continue 403 404 if tag == LogReader._CODE_MOVE_TAG: 405 event = self.code_move_struct.from_buffer(self.log, self.log_pos) 406 self.log_pos += ctypes.sizeof(event) 407 old_start_address = event.from_address 408 new_start_address = event.to_address 409 if old_start_address == new_start_address: 410 # Skip useless code move entries. 411 continue 412 code = self.code_map.Find(old_start_address) 413 if not code: 414 print >>sys.stderr, "Warning: Not found %x" % old_start_address 415 continue 416 assert code.start_address == old_start_address, \ 417 "Inexact move address %x for %s" % (old_start_address, code) 418 self.code_map.Remove(code) 419 size = code.end_address - code.start_address 420 code.start_address = new_start_address 421 code.end_address = new_start_address + size 422 self.code_map.Add(code) 423 continue 424 425 assert False, "Unknown tag %s" % tag 426 427 def Dispose(self): 428 self.log.close() 429 self.log_file.close() 430 431 @staticmethod 432 def _DefineStruct(fields): 433 class Struct(ctypes.Structure): 434 _fields_ = fields 435 return Struct 436 437 @staticmethod 438 def _HandleCodeConflict(old_code, new_code): 439 assert (old_code.start_address == new_code.start_address and 440 old_code.end_address == new_code.end_address), \ 441 "Conficting code log entries %s and %s" % (old_code, new_code) 442 if old_code.name == new_code.name: 443 return 444 # Code object may be shared by a few functions. Collect the full 445 # set of names. 446 old_code.AddName(new_code.name) 447 448 449 class Descriptor(object): 450 """Descriptor of a structure in the binary trace log.""" 451 452 CTYPE_MAP = { 453 "u16": ctypes.c_uint16, 454 "u32": ctypes.c_uint32, 455 "u64": ctypes.c_uint64 456 } 457 458 def __init__(self, fields): 459 class TraceItem(ctypes.Structure): 460 _fields_ = Descriptor.CtypesFields(fields) 461 462 def __str__(self): 463 return ", ".join("%s: %s" % (field, self.__getattribute__(field)) 464 for field, _ in TraceItem._fields_) 465 466 self.ctype = TraceItem 467 468 def Read(self, trace, offset): 469 return self.ctype.from_buffer(trace, offset) 470 471 @staticmethod 472 def CtypesFields(fields): 473 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] 474 475 476 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf 477 # for the gory details. 478 479 480 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h 481 TRACE_HEADER_DESC = Descriptor([ 482 ("magic", "u64"), 483 ("size", "u64"), 484 ("attr_size", "u64"), 485 ("attrs_offset", "u64"), 486 ("attrs_size", "u64"), 487 ("data_offset", "u64"), 488 ("data_size", "u64"), 489 ("event_types_offset", "u64"), 490 ("event_types_size", "u64") 491 ]) 492 493 494 # Reference: /usr/include/linux/perf_event.h 495 PERF_EVENT_ATTR_DESC = Descriptor([ 496 ("type", "u32"), 497 ("size", "u32"), 498 ("config", "u64"), 499 ("sample_period_or_freq", "u64"), 500 ("sample_type", "u64"), 501 ("read_format", "u64"), 502 ("flags", "u64"), 503 ("wakeup_events_or_watermark", "u32"), 504 ("bp_type", "u32"), 505 ("bp_addr", "u64"), 506 ("bp_len", "u64") 507 ]) 508 509 510 # Reference: /usr/include/linux/perf_event.h 511 PERF_EVENT_HEADER_DESC = Descriptor([ 512 ("type", "u32"), 513 ("misc", "u16"), 514 ("size", "u16") 515 ]) 516 517 518 # Reference: kernel/tools/perf/util/event.h 519 PERF_MMAP_EVENT_BODY_DESC = Descriptor([ 520 ("pid", "u32"), 521 ("tid", "u32"), 522 ("addr", "u64"), 523 ("len", "u64"), 524 ("pgoff", "u64") 525 ]) 526 527 # Reference: kernel/tools/perf/util/event.h 528 PERF_MMAP2_EVENT_BODY_DESC = Descriptor([ 529 ("pid", "u32"), 530 ("tid", "u32"), 531 ("addr", "u64"), 532 ("len", "u64"), 533 ("pgoff", "u64"), 534 ("maj", "u32"), 535 ("min", "u32"), 536 ("ino", "u64"), 537 ("ino_generation", "u64"), 538 ("prot", "u32"), 539 ("flags","u32") 540 ]) 541 542 # perf_event_attr.sample_type bits control the set of 543 # perf_sample_event fields. 544 PERF_SAMPLE_IP = 1 << 0 545 PERF_SAMPLE_TID = 1 << 1 546 PERF_SAMPLE_TIME = 1 << 2 547 PERF_SAMPLE_ADDR = 1 << 3 548 PERF_SAMPLE_READ = 1 << 4 549 PERF_SAMPLE_CALLCHAIN = 1 << 5 550 PERF_SAMPLE_ID = 1 << 6 551 PERF_SAMPLE_CPU = 1 << 7 552 PERF_SAMPLE_PERIOD = 1 << 8 553 PERF_SAMPLE_STREAM_ID = 1 << 9 554 PERF_SAMPLE_RAW = 1 << 10 555 556 557 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. 558 PERF_SAMPLE_EVENT_BODY_FIELDS = [ 559 ("ip", "u64", PERF_SAMPLE_IP), 560 ("pid", "u32", PERF_SAMPLE_TID), 561 ("tid", "u32", PERF_SAMPLE_TID), 562 ("time", "u64", PERF_SAMPLE_TIME), 563 ("addr", "u64", PERF_SAMPLE_ADDR), 564 ("id", "u64", PERF_SAMPLE_ID), 565 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), 566 ("cpu", "u32", PERF_SAMPLE_CPU), 567 ("res", "u32", PERF_SAMPLE_CPU), 568 ("period", "u64", PERF_SAMPLE_PERIOD), 569 # Don't want to handle read format that comes after the period and 570 # before the callchain and has variable size. 571 ("nr", "u64", PERF_SAMPLE_CALLCHAIN) 572 # Raw data follows the callchain and is ignored. 573 ] 574 575 576 PERF_SAMPLE_EVENT_IP_FORMAT = "u64" 577 578 579 PERF_RECORD_MMAP = 1 580 PERF_RECORD_MMAP2 = 10 581 PERF_RECORD_SAMPLE = 9 582 583 584 class TraceReader(object): 585 """Perf (linux-2.6/tools/perf) trace file reader.""" 586 587 _TRACE_HEADER_MAGIC = 4993446653023372624 588 589 def __init__(self, trace_name): 590 self.trace_file = open(trace_name, "r") 591 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) 592 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) 593 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: 594 print >>sys.stderr, "Warning: unsupported trace header magic" 595 self.offset = self.trace_header.data_offset 596 self.limit = self.trace_header.data_offset + self.trace_header.data_size 597 assert self.limit <= self.trace.size(), \ 598 "Trace data limit exceeds trace file size" 599 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) 600 assert self.trace_header.attrs_size != 0, \ 601 "No perf event attributes found in the trace" 602 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, 603 self.trace_header.attrs_offset) 604 self.sample_event_body_desc = self._SampleEventBodyDesc( 605 perf_event_attr.sample_type) 606 self.callchain_supported = \ 607 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 608 if self.callchain_supported: 609 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] 610 self.ip_size = ctypes.sizeof(self.ip_struct) 611 612 def ReadEventHeader(self): 613 if self.offset >= self.limit: 614 return None, 0 615 offset = self.offset 616 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) 617 self.offset += header.size 618 return header, offset 619 620 def ReadMmap(self, header, offset): 621 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, 622 offset + self.header_size) 623 # Read null-terminated filename. 624 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 625 offset + header.size] 626 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 627 return mmap_info 628 629 def ReadMmap2(self, header, offset): 630 mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace, 631 offset + self.header_size) 632 # Read null-terminated filename. 633 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): 634 offset + header.size] 635 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] 636 return mmap_info 637 638 def ReadSample(self, header, offset): 639 sample = self.sample_event_body_desc.Read(self.trace, 640 offset + self.header_size) 641 if not self.callchain_supported: 642 return sample 643 sample.ips = [] 644 offset += self.header_size + ctypes.sizeof(sample) 645 for _ in xrange(sample.nr): 646 sample.ips.append( 647 self.ip_struct.from_buffer(self.trace, offset).value) 648 offset += self.ip_size 649 return sample 650 651 def Dispose(self): 652 self.trace.close() 653 self.trace_file.close() 654 655 def _SampleEventBodyDesc(self, sample_type): 656 assert (sample_type & PERF_SAMPLE_READ) == 0, \ 657 "Can't hande read format in samples" 658 fields = [(field, format) 659 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS 660 if (bit & sample_type) != 0] 661 return Descriptor(fields) 662 663 664 OBJDUMP_SECTION_HEADER_RE = re.compile( 665 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") 666 OBJDUMP_SYMBOL_LINE_RE = re.compile( 667 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") 668 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( 669 r"^DYNAMIC SYMBOL TABLE") 670 OBJDUMP_SKIP_RE = re.compile( 671 r"^.*ld\.so\.cache$") 672 KERNEL_ALLSYMS_FILE = "/proc/kallsyms" 673 PERF_KERNEL_ALLSYMS_RE = re.compile( 674 r".*kallsyms.*") 675 KERNEL_ALLSYMS_LINE_RE = re.compile( 676 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") 677 678 679 class LibraryRepo(object): 680 def __init__(self): 681 self.infos = [] 682 self.names = set() 683 self.ticks = {} 684 685 686 def HasDynamicSymbols(self, filename): 687 if filename.endswith(".ko"): return False 688 process = subprocess.Popen( 689 "%s -h %s" % (OBJDUMP_BIN, filename), 690 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 691 pipe = process.stdout 692 try: 693 for line in pipe: 694 match = OBJDUMP_SECTION_HEADER_RE.match(line) 695 if match and match.group(1) == 'dynsym': return True 696 finally: 697 pipe.close() 698 assert process.wait() == 0, "Failed to objdump -h %s" % filename 699 return False 700 701 702 def Load(self, mmap_info, code_map, options): 703 # Skip kernel mmaps when requested using the fact that their tid 704 # is 0. 705 if mmap_info.tid == 0 and not options.kernel: 706 return True 707 if OBJDUMP_SKIP_RE.match(mmap_info.filename): 708 return True 709 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): 710 return self._LoadKernelSymbols(code_map) 711 self.infos.append(mmap_info) 712 mmap_info.ticks = 0 713 mmap_info.unique_name = self._UniqueMmapName(mmap_info) 714 if not os.path.exists(mmap_info.filename): 715 return True 716 # Request section headers (-h), symbols (-t), and dynamic symbols 717 # (-T) from objdump. 718 # Unfortunately, section headers span two lines, so we have to 719 # keep the just seen section name (from the first line in each 720 # section header) in the after_section variable. 721 if self.HasDynamicSymbols(mmap_info.filename): 722 dynamic_symbols = "-T" 723 else: 724 dynamic_symbols = "" 725 process = subprocess.Popen( 726 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), 727 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 728 pipe = process.stdout 729 after_section = None 730 code_sections = set() 731 reloc_sections = set() 732 dynamic = False 733 try: 734 for line in pipe: 735 if after_section: 736 if line.find("CODE") != -1: 737 code_sections.add(after_section) 738 if line.find("RELOC") != -1: 739 reloc_sections.add(after_section) 740 after_section = None 741 continue 742 743 match = OBJDUMP_SECTION_HEADER_RE.match(line) 744 if match: 745 after_section = match.group(1) 746 continue 747 748 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): 749 dynamic = True 750 continue 751 752 match = OBJDUMP_SYMBOL_LINE_RE.match(line) 753 if match: 754 start_address = int(match.group(1), 16) 755 origin_offset = start_address 756 flags = match.group(2) 757 section = match.group(3) 758 if section in code_sections: 759 if dynamic or section in reloc_sections: 760 start_address += mmap_info.addr 761 size = int(match.group(4), 16) 762 name = match.group(5) 763 origin = mmap_info.filename 764 code_map.Add(Code(name, start_address, start_address + size, 765 origin, origin_offset)) 766 finally: 767 pipe.close() 768 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename 769 770 def Tick(self, pc): 771 for i, mmap_info in enumerate(self.infos): 772 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): 773 mmap_info.ticks += 1 774 self.infos[0], self.infos[i] = mmap_info, self.infos[0] 775 return True 776 return False 777 778 def _UniqueMmapName(self, mmap_info): 779 name = mmap_info.filename 780 index = 1 781 while name in self.names: 782 name = "%s-%d" % (mmap_info.filename, index) 783 index += 1 784 self.names.add(name) 785 return name 786 787 def _LoadKernelSymbols(self, code_map): 788 if not os.path.exists(KERNEL_ALLSYMS_FILE): 789 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE 790 return False 791 kallsyms = open(KERNEL_ALLSYMS_FILE, "r") 792 code = None 793 for line in kallsyms: 794 match = KERNEL_ALLSYMS_LINE_RE.match(line) 795 if match: 796 start_address = int(match.group(1), 16) 797 end_address = start_address 798 name = match.group(2) 799 if code: 800 code.end_address = start_address 801 code_map.Add(code, 16) 802 code = Code(name, start_address, end_address, "kernel", 0) 803 return True 804 805 806 def PrintReport(code_map, library_repo, arch, ticks, options): 807 print "Ticks per symbol:" 808 used_code = [code for code in code_map.UsedCode()] 809 used_code.sort(key=lambda x: x.self_ticks, reverse=True) 810 for i, code in enumerate(used_code): 811 code_ticks = code.self_ticks 812 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, 813 code.FullName(), code.origin) 814 if options.disasm_all or i < options.disasm_top: 815 code.PrintAnnotated(arch, options) 816 print 817 print "Ticks per library:" 818 mmap_infos = [m for m in library_repo.infos if m.ticks > 0] 819 mmap_infos.sort(key=lambda m: m.ticks, reverse=True) 820 for mmap_info in mmap_infos: 821 mmap_ticks = mmap_info.ticks 822 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, 823 mmap_info.unique_name) 824 825 826 def PrintDot(code_map, options): 827 print "digraph G {" 828 for code in code_map.UsedCode(): 829 if code.self_ticks < 10: 830 continue 831 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name) 832 if code.callee_ticks: 833 for callee, ticks in code.callee_ticks.iteritems(): 834 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks) 835 print "}" 836 837 838 if __name__ == "__main__": 839 parser = optparse.OptionParser(USAGE) 840 parser.add_option("--log", 841 default="v8.log", 842 help="V8 log file name [default: %default]") 843 parser.add_option("--trace", 844 default="perf.data", 845 help="perf trace file name [default: %default]") 846 parser.add_option("--kernel", 847 default=False, 848 action="store_true", 849 help="process kernel entries [default: %default]") 850 parser.add_option("--disasm-top", 851 default=0, 852 type="int", 853 help=("number of top symbols to disassemble and annotate " 854 "[default: %default]")) 855 parser.add_option("--disasm-all", 856 default=False, 857 action="store_true", 858 help=("disassemble and annotate all used symbols " 859 "[default: %default]")) 860 parser.add_option("--dot", 861 default=False, 862 action="store_true", 863 help="produce dot output (WIP) [default: %default]") 864 parser.add_option("--quiet", "-q", 865 default=False, 866 action="store_true", 867 help="no auxiliary messages [default: %default]") 868 parser.add_option("--gc-fake-mmap", 869 default="/tmp/__v8_gc__", 870 help="gc fake mmap file [default: %default]") 871 parser.add_option("--objdump", 872 default="/usr/bin/objdump", 873 help="objdump tool to use [default: %default]") 874 parser.add_option("--host-root", 875 default="", 876 help="Path to the host root [default: %default]") 877 options, args = parser.parse_args() 878 879 if not options.quiet: 880 print "V8 log: %s, %s.ll" % (options.log, options.log) 881 print "Perf trace file: %s" % options.trace 882 883 V8_GC_FAKE_MMAP = options.gc_fake_mmap 884 HOST_ROOT = options.host_root 885 if os.path.exists(options.objdump): 886 disasm.OBJDUMP_BIN = options.objdump 887 OBJDUMP_BIN = options.objdump 888 else: 889 print "Cannot find %s, falling back to default objdump" % options.objdump 890 891 # Stats. 892 events = 0 893 ticks = 0 894 missed_ticks = 0 895 really_missed_ticks = 0 896 optimized_ticks = 0 897 generated_ticks = 0 898 v8_internal_ticks = 0 899 mmap_time = 0 900 sample_time = 0 901 902 # Initialize the log reader. 903 code_map = CodeMap() 904 log_reader = LogReader(log_name=options.log + ".ll", 905 code_map=code_map) 906 if not options.quiet: 907 print "Generated code architecture: %s" % log_reader.arch 908 print 909 sys.stdout.flush() 910 911 # Process the code and trace logs. 912 library_repo = LibraryRepo() 913 log_reader.ReadUpToGC() 914 trace_reader = TraceReader(options.trace) 915 while True: 916 header, offset = trace_reader.ReadEventHeader() 917 if not header: 918 break 919 events += 1 920 if header.type == PERF_RECORD_MMAP: 921 start = time.time() 922 mmap_info = trace_reader.ReadMmap(header, offset) 923 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 924 log_reader.ReadUpToGC() 925 else: 926 library_repo.Load(mmap_info, code_map, options) 927 mmap_time += time.time() - start 928 elif header.type == PERF_RECORD_MMAP2: 929 start = time.time() 930 mmap_info = trace_reader.ReadMmap2(header, offset) 931 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: 932 log_reader.ReadUpToGC() 933 else: 934 library_repo.Load(mmap_info, code_map, options) 935 mmap_time += time.time() - start 936 elif header.type == PERF_RECORD_SAMPLE: 937 ticks += 1 938 start = time.time() 939 sample = trace_reader.ReadSample(header, offset) 940 code = code_map.Find(sample.ip) 941 if code: 942 code.Tick(sample.ip) 943 if code.codetype == Code.OPTIMIZED: 944 optimized_ticks += 1 945 elif code.codetype == Code.FULL_CODEGEN: 946 generated_ticks += 1 947 elif code.codetype == Code.V8INTERNAL: 948 v8_internal_ticks += 1 949 else: 950 missed_ticks += 1 951 if not library_repo.Tick(sample.ip) and not code: 952 really_missed_ticks += 1 953 if trace_reader.callchain_supported: 954 for ip in sample.ips: 955 caller_code = code_map.Find(ip) 956 if caller_code: 957 if code: 958 caller_code.CalleeTick(code) 959 code = caller_code 960 sample_time += time.time() - start 961 962 if options.dot: 963 PrintDot(code_map, options) 964 else: 965 PrintReport(code_map, library_repo, log_reader.arch, ticks, options) 966 967 if not options.quiet: 968 def PrintTicks(number, total, description): 969 print("%10d %5.1f%% ticks in %s" % 970 (number, 100.0*number/total, description)) 971 print 972 print "Stats:" 973 print "%10d total trace events" % events 974 print "%10d total ticks" % ticks 975 print "%10d ticks not in symbols" % missed_ticks 976 unaccounted = "unaccounted ticks" 977 if really_missed_ticks > 0: 978 unaccounted += " (probably in the kernel, try --kernel)" 979 PrintTicks(really_missed_ticks, ticks, unaccounted) 980 PrintTicks(optimized_ticks, ticks, "ticks in optimized code") 981 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") 982 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") 983 print "%10d total symbols" % len([c for c in code_map.AllCode()]) 984 print "%10d used symbols" % len([c for c in code_map.UsedCode()]) 985 print "%9.2fs library processing time" % mmap_time 986 print "%9.2fs tick processing time" % sample_time 987 988 log_reader.Dispose() 989 trace_reader.Dispose() 990