Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2012 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 import bisect
     31 import collections
     32 import ctypes
     33 import disasm
     34 import mmap
     35 import optparse
     36 import os
     37 import re
     38 import subprocess
     39 import sys
     40 import time
     41 
     42 
     43 USAGE="""usage: %prog [OPTION]...
     44 
     45 Analyses V8 and perf logs to produce profiles.
     46 
     47 Perf logs can be collected using a command like:
     48   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
     49   # -R: collect all data
     50   # -e cycles: use cpu-cycles event (run "perf list" for details)
     51   # -c 10000: write a sample after each 10000 events
     52   # -f: force output file overwrite
     53   # -i: limit profiling to our process and the kernel
     54   # --ll-prof shell flag enables the right V8 logs
     55 This will produce a binary trace file (perf.data) that %prog can analyse.
     56 
     57 IMPORTANT:
     58   The kernel has an internal maximum for events per second, it is 100K by
     59   default. That's not enough for "-c 10000". Set it to some higher value:
     60   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
     61   You can also make the warning about kernel address maps go away:
     62   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
     63 
     64 We have a convenience script that handles all of the above for you:
     65   $ tools/run-llprof.sh ./d8 bench.js
     66 
     67 Examples:
     68   # Print flat profile with annotated disassembly for the 10 top
     69   # symbols. Use default log names.
     70   $ %prog --disasm-top=10
     71 
     72   # Print flat profile with annotated disassembly for all used symbols.
     73   # Use default log names and include kernel symbols into analysis.
     74   $ %prog --disasm-all --kernel
     75 
     76   # Print flat profile. Use custom log names.
     77   $ %prog --log=foo.log --trace=foo.data
     78 """
     79 
     80 
     81 JS_ORIGIN = "js"
     82 
     83 
     84 class Code(object):
     85   """Code object."""
     86 
     87   _id = 0
     88   UNKNOWN = 0
     89   V8INTERNAL = 1
     90   FULL_CODEGEN = 2
     91   OPTIMIZED = 3
     92 
     93   def __init__(self, name, start_address, end_address, origin, origin_offset):
     94     self.id = Code._id
     95     Code._id += 1
     96     self.name = name
     97     self.other_names = None
     98     self.start_address = start_address
     99     self.end_address = end_address
    100     self.origin = origin
    101     self.origin_offset = origin_offset
    102     self.self_ticks = 0
    103     self.self_ticks_map = None
    104     self.callee_ticks = None
    105     if name.startswith("LazyCompile:*"):
    106       self.codetype = Code.OPTIMIZED
    107     elif name.startswith("LazyCompile:"):
    108       self.codetype = Code.FULL_CODEGEN
    109     elif name.startswith("v8::internal::"):
    110       self.codetype = Code.V8INTERNAL
    111     else:
    112       self.codetype = Code.UNKNOWN
    113 
    114   def AddName(self, name):
    115     assert self.name != name
    116     if self.other_names is None:
    117       self.other_names = [name]
    118       return
    119     if not name in self.other_names:
    120       self.other_names.append(name)
    121 
    122   def FullName(self):
    123     if self.other_names is None:
    124       return self.name
    125     self.other_names.sort()
    126     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
    127 
    128   def IsUsed(self):
    129     return self.self_ticks > 0 or self.callee_ticks is not None
    130 
    131   def Tick(self, pc):
    132     self.self_ticks += 1
    133     if self.self_ticks_map is None:
    134       self.self_ticks_map = collections.defaultdict(lambda: 0)
    135     offset = pc - self.start_address
    136     self.self_ticks_map[offset] += 1
    137 
    138   def CalleeTick(self, callee):
    139     if self.callee_ticks is None:
    140       self.callee_ticks = collections.defaultdict(lambda: 0)
    141     self.callee_ticks[callee] += 1
    142 
    143   def PrintAnnotated(self, arch, options):
    144     if self.self_ticks_map is None:
    145       ticks_map = []
    146     else:
    147       ticks_map = self.self_ticks_map.items()
    148     # Convert the ticks map to offsets and counts arrays so that later
    149     # we can do binary search in the offsets array.
    150     ticks_map.sort(key=lambda t: t[0])
    151     ticks_offsets = [t[0] for t in ticks_map]
    152     ticks_counts = [t[1] for t in ticks_map]
    153     # Get a list of disassembled lines and their addresses.
    154     lines = self._GetDisasmLines(arch, options)
    155     if len(lines) == 0:
    156       return
    157     # Print annotated lines.
    158     address = lines[0][0]
    159     total_count = 0
    160     for i in xrange(len(lines)):
    161       start_offset = lines[i][0] - address
    162       if i == len(lines) - 1:
    163         end_offset = self.end_address - self.start_address
    164       else:
    165         end_offset = lines[i + 1][0] - address
    166       # Ticks (reported pc values) are not always precise, i.e. not
    167       # necessarily point at instruction starts. So we have to search
    168       # for ticks that touch the current instruction line.
    169       j = bisect.bisect_left(ticks_offsets, end_offset)
    170       count = 0
    171       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
    172         if offset < start_offset:
    173           break
    174         count += cnt
    175       total_count += count
    176       percent = 100.0 * count / self.self_ticks
    177       offset = lines[i][0]
    178       if percent >= 0.01:
    179         # 5 spaces for tick count
    180         # 1 space following
    181         # 1 for '|'
    182         # 1 space following
    183         # 6 for the percentage number, incl. the '.'
    184         # 1 for the '%' sign
    185         # => 15
    186         print "%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1])
    187       else:
    188         print "%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1])
    189     print
    190     assert total_count == self.self_ticks, \
    191         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
    192 
    193   def __str__(self):
    194     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
    195       self.name,
    196       self.start_address,
    197       self.end_address,
    198       self.end_address - self.start_address,
    199       self.origin)
    200 
    201   def _GetDisasmLines(self, arch, options):
    202     if self.origin == JS_ORIGIN:
    203       inplace = False
    204       filename = options.log + ".ll"
    205     else:
    206       inplace = True
    207       filename = self.origin
    208     return disasm.GetDisasmLines(filename,
    209                                  self.origin_offset,
    210                                  self.end_address - self.start_address,
    211                                  arch,
    212                                  inplace)
    213 
    214 
    215 class CodePage(object):
    216   """Group of adjacent code objects."""
    217 
    218   SHIFT = 20  # 1M pages
    219   SIZE = (1 << SHIFT)
    220   MASK = ~(SIZE - 1)
    221 
    222   @staticmethod
    223   def PageAddress(address):
    224     return address & CodePage.MASK
    225 
    226   @staticmethod
    227   def PageId(address):
    228     return address >> CodePage.SHIFT
    229 
    230   @staticmethod
    231   def PageAddressFromId(id):
    232     return id << CodePage.SHIFT
    233 
    234   def __init__(self, address):
    235     self.address = address
    236     self.code_objects = []
    237 
    238   def Add(self, code):
    239     self.code_objects.append(code)
    240 
    241   def Remove(self, code):
    242     self.code_objects.remove(code)
    243 
    244   def Find(self, pc):
    245     code_objects = self.code_objects
    246     for i, code in enumerate(code_objects):
    247       if code.start_address <= pc < code.end_address:
    248         code_objects[0], code_objects[i] = code, code_objects[0]
    249         return code
    250     return None
    251 
    252   def __iter__(self):
    253     return self.code_objects.__iter__()
    254 
    255 
    256 class CodeMap(object):
    257   """Code object map."""
    258 
    259   def __init__(self):
    260     self.pages = {}
    261     self.min_address = 1 << 64
    262     self.max_address = -1
    263 
    264   def Add(self, code, max_pages=-1):
    265     page_id = CodePage.PageId(code.start_address)
    266     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    267     pages = 0
    268     while page_id < limit_id:
    269       if max_pages >= 0 and pages > max_pages:
    270         print >>sys.stderr, \
    271             "Warning: page limit (%d) reached for %s [%s]" % (
    272             max_pages, code.name, code.origin)
    273         break
    274       if page_id in self.pages:
    275         page = self.pages[page_id]
    276       else:
    277         page = CodePage(CodePage.PageAddressFromId(page_id))
    278         self.pages[page_id] = page
    279       page.Add(code)
    280       page_id += 1
    281       pages += 1
    282     self.min_address = min(self.min_address, code.start_address)
    283     self.max_address = max(self.max_address, code.end_address)
    284 
    285   def Remove(self, code):
    286     page_id = CodePage.PageId(code.start_address)
    287     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    288     removed = False
    289     while page_id < limit_id:
    290       if page_id not in self.pages:
    291         page_id += 1
    292         continue
    293       page = self.pages[page_id]
    294       page.Remove(code)
    295       removed = True
    296       page_id += 1
    297     return removed
    298 
    299   def AllCode(self):
    300     for page in self.pages.itervalues():
    301       for code in page:
    302         if CodePage.PageAddress(code.start_address) == page.address:
    303           yield code
    304 
    305   def UsedCode(self):
    306     for code in self.AllCode():
    307       if code.IsUsed():
    308         yield code
    309 
    310   def Print(self):
    311     for code in self.AllCode():
    312       print code
    313 
    314   def Find(self, pc):
    315     if pc < self.min_address or pc >= self.max_address:
    316       return None
    317     page_id = CodePage.PageId(pc)
    318     if page_id not in self.pages:
    319       return None
    320     return self.pages[page_id].Find(pc)
    321 
    322 
    323 class CodeInfo(object):
    324   """Generic info about generated code objects."""
    325 
    326   def __init__(self, arch, header_size):
    327     self.arch = arch
    328     self.header_size = header_size
    329 
    330 
    331 class LogReader(object):
    332   """V8 low-level (binary) log reader."""
    333 
    334   _ARCH_TO_POINTER_TYPE_MAP = {
    335     "ia32": ctypes.c_uint32,
    336     "arm": ctypes.c_uint32,
    337     "mips": ctypes.c_uint32,
    338     "x64": ctypes.c_uint64,
    339     "arm64": ctypes.c_uint64
    340   }
    341 
    342   _CODE_CREATE_TAG = "C"
    343   _CODE_MOVE_TAG = "M"
    344   _CODE_MOVING_GC_TAG = "G"
    345 
    346   def __init__(self, log_name, code_map):
    347     self.log_file = open(log_name, "r")
    348     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
    349     self.log_pos = 0
    350     self.code_map = code_map
    351 
    352     self.arch = self.log[:self.log.find("\0")]
    353     self.log_pos += len(self.arch) + 1
    354     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
    355         "Unsupported architecture %s" % self.arch
    356     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
    357 
    358     self.code_create_struct = LogReader._DefineStruct([
    359         ("name_size", ctypes.c_int32),
    360         ("code_address", pointer_type),
    361         ("code_size", ctypes.c_int32)])
    362 
    363     self.code_move_struct = LogReader._DefineStruct([
    364         ("from_address", pointer_type),
    365         ("to_address", pointer_type)])
    366 
    367     self.code_delete_struct = LogReader._DefineStruct([
    368         ("address", pointer_type)])
    369 
    370   def ReadUpToGC(self):
    371     while self.log_pos < self.log.size():
    372       tag = self.log[self.log_pos]
    373       self.log_pos += 1
    374 
    375       if tag == LogReader._CODE_MOVING_GC_TAG:
    376         return
    377 
    378       if tag == LogReader._CODE_CREATE_TAG:
    379         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
    380         self.log_pos += ctypes.sizeof(event)
    381         start_address = event.code_address
    382         end_address = start_address + event.code_size
    383         name = self.log[self.log_pos:self.log_pos + event.name_size]
    384         origin = JS_ORIGIN
    385         self.log_pos += event.name_size
    386         origin_offset = self.log_pos
    387         self.log_pos += event.code_size
    388         code = Code(name, start_address, end_address, origin, origin_offset)
    389         conficting_code = self.code_map.Find(start_address)
    390         if conficting_code:
    391           if not (conficting_code.start_address == code.start_address and
    392             conficting_code.end_address == code.end_address):
    393             self.code_map.Remove(conficting_code)
    394           else:
    395             LogReader._HandleCodeConflict(conficting_code, code)
    396             # TODO(vitalyr): this warning is too noisy because of our
    397             # attempts to reconstruct code log from the snapshot.
    398             # print >>sys.stderr, \
    399             #     "Warning: Skipping duplicate code log entry %s" % code
    400             continue
    401         self.code_map.Add(code)
    402         continue
    403 
    404       if tag == LogReader._CODE_MOVE_TAG:
    405         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
    406         self.log_pos += ctypes.sizeof(event)
    407         old_start_address = event.from_address
    408         new_start_address = event.to_address
    409         if old_start_address == new_start_address:
    410           # Skip useless code move entries.
    411           continue
    412         code = self.code_map.Find(old_start_address)
    413         if not code:
    414           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    415           continue
    416         assert code.start_address == old_start_address, \
    417             "Inexact move address %x for %s" % (old_start_address, code)
    418         self.code_map.Remove(code)
    419         size = code.end_address - code.start_address
    420         code.start_address = new_start_address
    421         code.end_address = new_start_address + size
    422         self.code_map.Add(code)
    423         continue
    424 
    425       assert False, "Unknown tag %s" % tag
    426 
    427   def Dispose(self):
    428     self.log.close()
    429     self.log_file.close()
    430 
    431   @staticmethod
    432   def _DefineStruct(fields):
    433     class Struct(ctypes.Structure):
    434       _fields_ = fields
    435     return Struct
    436 
    437   @staticmethod
    438   def _HandleCodeConflict(old_code, new_code):
    439     assert (old_code.start_address == new_code.start_address and
    440             old_code.end_address == new_code.end_address), \
    441         "Conficting code log entries %s and %s" % (old_code, new_code)
    442     if old_code.name == new_code.name:
    443       return
    444     # Code object may be shared by a few functions. Collect the full
    445     # set of names.
    446     old_code.AddName(new_code.name)
    447 
    448 
    449 class Descriptor(object):
    450   """Descriptor of a structure in the binary trace log."""
    451 
    452   CTYPE_MAP = {
    453     "u16": ctypes.c_uint16,
    454     "u32": ctypes.c_uint32,
    455     "u64": ctypes.c_uint64
    456   }
    457 
    458   def __init__(self, fields):
    459     class TraceItem(ctypes.Structure):
    460       _fields_ = Descriptor.CtypesFields(fields)
    461 
    462       def __str__(self):
    463         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
    464                          for field, _ in TraceItem._fields_)
    465 
    466     self.ctype = TraceItem
    467 
    468   def Read(self, trace, offset):
    469     return self.ctype.from_buffer(trace, offset)
    470 
    471   @staticmethod
    472   def CtypesFields(fields):
    473     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
    474 
    475 
    476 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
    477 # for the gory details.
    478 
    479 
    480 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
    481 TRACE_HEADER_DESC = Descriptor([
    482   ("magic", "u64"),
    483   ("size", "u64"),
    484   ("attr_size", "u64"),
    485   ("attrs_offset", "u64"),
    486   ("attrs_size", "u64"),
    487   ("data_offset", "u64"),
    488   ("data_size", "u64"),
    489   ("event_types_offset", "u64"),
    490   ("event_types_size", "u64")
    491 ])
    492 
    493 
    494 # Reference: /usr/include/linux/perf_event.h
    495 PERF_EVENT_ATTR_DESC = Descriptor([
    496   ("type", "u32"),
    497   ("size", "u32"),
    498   ("config", "u64"),
    499   ("sample_period_or_freq", "u64"),
    500   ("sample_type", "u64"),
    501   ("read_format", "u64"),
    502   ("flags", "u64"),
    503   ("wakeup_events_or_watermark", "u32"),
    504   ("bp_type", "u32"),
    505   ("bp_addr", "u64"),
    506   ("bp_len", "u64")
    507 ])
    508 
    509 
    510 # Reference: /usr/include/linux/perf_event.h
    511 PERF_EVENT_HEADER_DESC = Descriptor([
    512   ("type", "u32"),
    513   ("misc", "u16"),
    514   ("size", "u16")
    515 ])
    516 
    517 
    518 # Reference: kernel/tools/perf/util/event.h
    519 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
    520   ("pid", "u32"),
    521   ("tid", "u32"),
    522   ("addr", "u64"),
    523   ("len", "u64"),
    524   ("pgoff", "u64")
    525 ])
    526 
    527 # Reference: kernel/tools/perf/util/event.h
    528 PERF_MMAP2_EVENT_BODY_DESC = Descriptor([
    529   ("pid", "u32"),
    530   ("tid", "u32"),
    531   ("addr", "u64"),
    532   ("len", "u64"),
    533   ("pgoff", "u64"),
    534   ("maj", "u32"),
    535   ("min", "u32"),
    536   ("ino", "u64"),
    537   ("ino_generation", "u64"),
    538   ("prot", "u32"),
    539   ("flags","u32")
    540 ])
    541 
    542 # perf_event_attr.sample_type bits control the set of
    543 # perf_sample_event fields.
    544 PERF_SAMPLE_IP = 1 << 0
    545 PERF_SAMPLE_TID = 1 << 1
    546 PERF_SAMPLE_TIME = 1 << 2
    547 PERF_SAMPLE_ADDR = 1 << 3
    548 PERF_SAMPLE_READ = 1 << 4
    549 PERF_SAMPLE_CALLCHAIN = 1 << 5
    550 PERF_SAMPLE_ID = 1 << 6
    551 PERF_SAMPLE_CPU = 1 << 7
    552 PERF_SAMPLE_PERIOD = 1 << 8
    553 PERF_SAMPLE_STREAM_ID = 1 << 9
    554 PERF_SAMPLE_RAW = 1 << 10
    555 
    556 
    557 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
    558 PERF_SAMPLE_EVENT_BODY_FIELDS = [
    559   ("ip", "u64", PERF_SAMPLE_IP),
    560   ("pid", "u32", PERF_SAMPLE_TID),
    561   ("tid", "u32", PERF_SAMPLE_TID),
    562   ("time", "u64", PERF_SAMPLE_TIME),
    563   ("addr", "u64", PERF_SAMPLE_ADDR),
    564   ("id", "u64", PERF_SAMPLE_ID),
    565   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
    566   ("cpu", "u32", PERF_SAMPLE_CPU),
    567   ("res", "u32", PERF_SAMPLE_CPU),
    568   ("period", "u64", PERF_SAMPLE_PERIOD),
    569   # Don't want to handle read format that comes after the period and
    570   # before the callchain and has variable size.
    571   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
    572   # Raw data follows the callchain and is ignored.
    573 ]
    574 
    575 
    576 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
    577 
    578 
    579 PERF_RECORD_MMAP = 1
    580 PERF_RECORD_MMAP2 = 10
    581 PERF_RECORD_SAMPLE = 9
    582 
    583 
    584 class TraceReader(object):
    585   """Perf (linux-2.6/tools/perf) trace file reader."""
    586 
    587   _TRACE_HEADER_MAGIC = 4993446653023372624
    588 
    589   def __init__(self, trace_name):
    590     self.trace_file = open(trace_name, "r")
    591     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
    592     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
    593     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
    594       print >>sys.stderr, "Warning: unsupported trace header magic"
    595     self.offset = self.trace_header.data_offset
    596     self.limit = self.trace_header.data_offset + self.trace_header.data_size
    597     assert self.limit <= self.trace.size(), \
    598         "Trace data limit exceeds trace file size"
    599     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
    600     assert self.trace_header.attrs_size != 0, \
    601         "No perf event attributes found in the trace"
    602     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
    603                                                 self.trace_header.attrs_offset)
    604     self.sample_event_body_desc = self._SampleEventBodyDesc(
    605         perf_event_attr.sample_type)
    606     self.callchain_supported = \
    607         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
    608     if self.callchain_supported:
    609       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
    610       self.ip_size = ctypes.sizeof(self.ip_struct)
    611 
    612   def ReadEventHeader(self):
    613     if self.offset >= self.limit:
    614       return None, 0
    615     offset = self.offset
    616     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
    617     self.offset += header.size
    618     return header, offset
    619 
    620   def ReadMmap(self, header, offset):
    621     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
    622                                                offset + self.header_size)
    623     # Read null-terminated filename.
    624     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
    625                           offset + header.size]
    626     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
    627     return mmap_info
    628 
    629   def ReadMmap2(self, header, offset):
    630     mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace,
    631                                                 offset + self.header_size)
    632     # Read null-terminated filename.
    633     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
    634                           offset + header.size]
    635     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
    636     return mmap_info
    637 
    638   def ReadSample(self, header, offset):
    639     sample = self.sample_event_body_desc.Read(self.trace,
    640                                               offset + self.header_size)
    641     if not self.callchain_supported:
    642       return sample
    643     sample.ips = []
    644     offset += self.header_size + ctypes.sizeof(sample)
    645     for _ in xrange(sample.nr):
    646       sample.ips.append(
    647         self.ip_struct.from_buffer(self.trace, offset).value)
    648       offset += self.ip_size
    649     return sample
    650 
    651   def Dispose(self):
    652     self.trace.close()
    653     self.trace_file.close()
    654 
    655   def _SampleEventBodyDesc(self, sample_type):
    656     assert (sample_type & PERF_SAMPLE_READ) == 0, \
    657            "Can't hande read format in samples"
    658     fields = [(field, format)
    659               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
    660               if (bit & sample_type) != 0]
    661     return Descriptor(fields)
    662 
    663 
    664 OBJDUMP_SECTION_HEADER_RE = re.compile(
    665   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
    666 OBJDUMP_SYMBOL_LINE_RE = re.compile(
    667   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
    668 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
    669   r"^DYNAMIC SYMBOL TABLE")
    670 OBJDUMP_SKIP_RE = re.compile(
    671   r"^.*ld\.so\.cache$")
    672 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
    673 PERF_KERNEL_ALLSYMS_RE = re.compile(
    674   r".*kallsyms.*")
    675 KERNEL_ALLSYMS_LINE_RE = re.compile(
    676   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
    677 
    678 
    679 class LibraryRepo(object):
    680   def __init__(self):
    681     self.infos = []
    682     self.names = set()
    683     self.ticks = {}
    684 
    685 
    686   def HasDynamicSymbols(self, filename):
    687     if filename.endswith(".ko"): return False
    688     process = subprocess.Popen(
    689       "%s -h %s" % (OBJDUMP_BIN, filename),
    690       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    691     pipe = process.stdout
    692     try:
    693       for line in pipe:
    694         match = OBJDUMP_SECTION_HEADER_RE.match(line)
    695         if match and match.group(1) == 'dynsym': return True
    696     finally:
    697       pipe.close()
    698     assert process.wait() == 0, "Failed to objdump -h %s" % filename
    699     return False
    700 
    701 
    702   def Load(self, mmap_info, code_map, options):
    703     # Skip kernel mmaps when requested using the fact that their tid
    704     # is 0.
    705     if mmap_info.tid == 0 and not options.kernel:
    706       return True
    707     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
    708       return True
    709     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
    710       return self._LoadKernelSymbols(code_map)
    711     self.infos.append(mmap_info)
    712     mmap_info.ticks = 0
    713     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
    714     if not os.path.exists(mmap_info.filename):
    715       return True
    716     # Request section headers (-h), symbols (-t), and dynamic symbols
    717     # (-T) from objdump.
    718     # Unfortunately, section headers span two lines, so we have to
    719     # keep the just seen section name (from the first line in each
    720     # section header) in the after_section variable.
    721     if self.HasDynamicSymbols(mmap_info.filename):
    722       dynamic_symbols = "-T"
    723     else:
    724       dynamic_symbols = ""
    725     process = subprocess.Popen(
    726       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
    727       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    728     pipe = process.stdout
    729     after_section = None
    730     code_sections = set()
    731     reloc_sections = set()
    732     dynamic = False
    733     try:
    734       for line in pipe:
    735         if after_section:
    736           if line.find("CODE") != -1:
    737             code_sections.add(after_section)
    738           if line.find("RELOC") != -1:
    739             reloc_sections.add(after_section)
    740           after_section = None
    741           continue
    742 
    743         match = OBJDUMP_SECTION_HEADER_RE.match(line)
    744         if match:
    745           after_section = match.group(1)
    746           continue
    747 
    748         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
    749           dynamic = True
    750           continue
    751 
    752         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
    753         if match:
    754           start_address = int(match.group(1), 16)
    755           origin_offset = start_address
    756           flags = match.group(2)
    757           section = match.group(3)
    758           if section in code_sections:
    759             if dynamic or section in reloc_sections:
    760               start_address += mmap_info.addr
    761             size = int(match.group(4), 16)
    762             name = match.group(5)
    763             origin = mmap_info.filename
    764             code_map.Add(Code(name, start_address, start_address + size,
    765                               origin, origin_offset))
    766     finally:
    767       pipe.close()
    768     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
    769 
    770   def Tick(self, pc):
    771     for i, mmap_info in enumerate(self.infos):
    772       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
    773         mmap_info.ticks += 1
    774         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
    775         return True
    776     return False
    777 
    778   def _UniqueMmapName(self, mmap_info):
    779     name = mmap_info.filename
    780     index = 1
    781     while name in self.names:
    782       name = "%s-%d" % (mmap_info.filename, index)
    783       index += 1
    784     self.names.add(name)
    785     return name
    786 
    787   def _LoadKernelSymbols(self, code_map):
    788     if not os.path.exists(KERNEL_ALLSYMS_FILE):
    789       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
    790       return False
    791     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
    792     code = None
    793     for line in kallsyms:
    794       match = KERNEL_ALLSYMS_LINE_RE.match(line)
    795       if match:
    796         start_address = int(match.group(1), 16)
    797         end_address = start_address
    798         name = match.group(2)
    799         if code:
    800           code.end_address = start_address
    801           code_map.Add(code, 16)
    802         code = Code(name, start_address, end_address, "kernel", 0)
    803     return True
    804 
    805 
    806 def PrintReport(code_map, library_repo, arch, ticks, options):
    807   print "Ticks per symbol:"
    808   used_code = [code for code in code_map.UsedCode()]
    809   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
    810   for i, code in enumerate(used_code):
    811     code_ticks = code.self_ticks
    812     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
    813                                     code.FullName(), code.origin)
    814     if options.disasm_all or i < options.disasm_top:
    815       code.PrintAnnotated(arch, options)
    816   print
    817   print "Ticks per library:"
    818   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
    819   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
    820   for mmap_info in mmap_infos:
    821     mmap_ticks = mmap_info.ticks
    822     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
    823                                mmap_info.unique_name)
    824 
    825 
    826 def PrintDot(code_map, options):
    827   print "digraph G {"
    828   for code in code_map.UsedCode():
    829     if code.self_ticks < 10:
    830       continue
    831     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
    832     if code.callee_ticks:
    833       for callee, ticks in code.callee_ticks.iteritems():
    834         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
    835   print "}"
    836 
    837 
    838 if __name__ == "__main__":
    839   parser = optparse.OptionParser(USAGE)
    840   parser.add_option("--log",
    841                     default="v8.log",
    842                     help="V8 log file name [default: %default]")
    843   parser.add_option("--trace",
    844                     default="perf.data",
    845                     help="perf trace file name [default: %default]")
    846   parser.add_option("--kernel",
    847                     default=False,
    848                     action="store_true",
    849                     help="process kernel entries [default: %default]")
    850   parser.add_option("--disasm-top",
    851                     default=0,
    852                     type="int",
    853                     help=("number of top symbols to disassemble and annotate "
    854                           "[default: %default]"))
    855   parser.add_option("--disasm-all",
    856                     default=False,
    857                     action="store_true",
    858                     help=("disassemble and annotate all used symbols "
    859                           "[default: %default]"))
    860   parser.add_option("--dot",
    861                     default=False,
    862                     action="store_true",
    863                     help="produce dot output (WIP) [default: %default]")
    864   parser.add_option("--quiet", "-q",
    865                     default=False,
    866                     action="store_true",
    867                     help="no auxiliary messages [default: %default]")
    868   parser.add_option("--gc-fake-mmap",
    869                     default="/tmp/__v8_gc__",
    870                     help="gc fake mmap file [default: %default]")
    871   parser.add_option("--objdump",
    872                     default="/usr/bin/objdump",
    873                     help="objdump tool to use [default: %default]")
    874   parser.add_option("--host-root",
    875                     default="",
    876                     help="Path to the host root [default: %default]")
    877   options, args = parser.parse_args()
    878 
    879   if not options.quiet:
    880     print "V8 log: %s, %s.ll" % (options.log, options.log)
    881     print "Perf trace file: %s" % options.trace
    882 
    883   V8_GC_FAKE_MMAP = options.gc_fake_mmap
    884   HOST_ROOT = options.host_root
    885   if os.path.exists(options.objdump):
    886     disasm.OBJDUMP_BIN = options.objdump
    887     OBJDUMP_BIN = options.objdump
    888   else:
    889     print "Cannot find %s, falling back to default objdump" % options.objdump
    890 
    891   # Stats.
    892   events = 0
    893   ticks = 0
    894   missed_ticks = 0
    895   really_missed_ticks = 0
    896   optimized_ticks = 0
    897   generated_ticks = 0
    898   v8_internal_ticks = 0
    899   mmap_time = 0
    900   sample_time = 0
    901 
    902   # Initialize the log reader.
    903   code_map = CodeMap()
    904   log_reader = LogReader(log_name=options.log + ".ll",
    905                          code_map=code_map)
    906   if not options.quiet:
    907     print "Generated code architecture: %s" % log_reader.arch
    908     print
    909     sys.stdout.flush()
    910 
    911   # Process the code and trace logs.
    912   library_repo = LibraryRepo()
    913   log_reader.ReadUpToGC()
    914   trace_reader = TraceReader(options.trace)
    915   while True:
    916     header, offset = trace_reader.ReadEventHeader()
    917     if not header:
    918       break
    919     events += 1
    920     if header.type == PERF_RECORD_MMAP:
    921       start = time.time()
    922       mmap_info = trace_reader.ReadMmap(header, offset)
    923       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
    924         log_reader.ReadUpToGC()
    925       else:
    926         library_repo.Load(mmap_info, code_map, options)
    927       mmap_time += time.time() - start
    928     elif header.type == PERF_RECORD_MMAP2:
    929       start = time.time()
    930       mmap_info = trace_reader.ReadMmap2(header, offset)
    931       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
    932         log_reader.ReadUpToGC()
    933       else:
    934         library_repo.Load(mmap_info, code_map, options)
    935       mmap_time += time.time() - start
    936     elif header.type == PERF_RECORD_SAMPLE:
    937       ticks += 1
    938       start = time.time()
    939       sample = trace_reader.ReadSample(header, offset)
    940       code = code_map.Find(sample.ip)
    941       if code:
    942         code.Tick(sample.ip)
    943         if code.codetype == Code.OPTIMIZED:
    944           optimized_ticks += 1
    945         elif code.codetype == Code.FULL_CODEGEN:
    946           generated_ticks += 1
    947         elif code.codetype == Code.V8INTERNAL:
    948           v8_internal_ticks += 1
    949       else:
    950         missed_ticks += 1
    951       if not library_repo.Tick(sample.ip) and not code:
    952         really_missed_ticks += 1
    953       if trace_reader.callchain_supported:
    954         for ip in sample.ips:
    955           caller_code = code_map.Find(ip)
    956           if caller_code:
    957             if code:
    958               caller_code.CalleeTick(code)
    959             code = caller_code
    960       sample_time += time.time() - start
    961 
    962   if options.dot:
    963     PrintDot(code_map, options)
    964   else:
    965     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
    966 
    967     if not options.quiet:
    968       def PrintTicks(number, total, description):
    969         print("%10d %5.1f%% ticks in %s" %
    970               (number, 100.0*number/total, description))
    971       print
    972       print "Stats:"
    973       print "%10d total trace events" % events
    974       print "%10d total ticks" % ticks
    975       print "%10d ticks not in symbols" % missed_ticks
    976       unaccounted = "unaccounted ticks"
    977       if really_missed_ticks > 0:
    978         unaccounted += " (probably in the kernel, try --kernel)"
    979       PrintTicks(really_missed_ticks, ticks, unaccounted)
    980       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
    981       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
    982       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
    983       print "%10d total symbols" % len([c for c in code_map.AllCode()])
    984       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
    985       print "%9.2fs library processing time" % mmap_time
    986       print "%9.2fs tick processing time" % sample_time
    987 
    988   log_reader.Dispose()
    989   trace_reader.Dispose()
    990