Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2012 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 import bisect
     31 import collections
     32 import ctypes
     33 import disasm
     34 import mmap
     35 import optparse
     36 import os
     37 import re
     38 import subprocess
     39 import sys
     40 import time
     41 
     42 
     43 USAGE="""usage: %prog [OPTION]...
     44 
     45 Analyses V8 and perf logs to produce profiles.
     46 
     47 Perf logs can be collected using a command like:
     48   $ perf record -R -e cycles -c 10000 -f -i ./shell bench.js --ll-prof
     49   # -R: collect all data
     50   # -e cycles: use cpu-cycles event (run "perf list" for details)
     51   # -c 10000: write a sample after each 10000 events
     52   # -f: force output file overwrite
     53   # -i: limit profiling to our process and the kernel
     54   # --ll-prof shell flag enables the right V8 logs
     55 This will produce a binary trace file (perf.data) that %prog can analyse.
     56 
     57 Examples:
     58   # Print flat profile with annotated disassembly for the 10 top
     59   # symbols. Use default log names and include the snapshot log.
     60   $ %prog --snapshot --disasm-top=10
     61 
     62   # Print flat profile with annotated disassembly for all used symbols.
     63   # Use default log names and include kernel symbols into analysis.
     64   $ %prog --disasm-all --kernel
     65 
     66   # Print flat profile. Use custom log names.
     67   $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
     68 """
     69 
     70 
     71 # Must match kGcFakeMmap.
     72 V8_GC_FAKE_MMAP = "/tmp/__v8_gc__"
     73 
     74 JS_ORIGIN = "js"
     75 JS_SNAPSHOT_ORIGIN = "js-snapshot"
     76 
     77 OBJDUMP_BIN = disasm.OBJDUMP_BIN
     78 
     79 
     80 class Code(object):
     81   """Code object."""
     82 
     83   _id = 0
     84 
     85   def __init__(self, name, start_address, end_address, origin, origin_offset):
     86     self.id = Code._id
     87     Code._id += 1
     88     self.name = name
     89     self.other_names = None
     90     self.start_address = start_address
     91     self.end_address = end_address
     92     self.origin = origin
     93     self.origin_offset = origin_offset
     94     self.self_ticks = 0
     95     self.self_ticks_map = None
     96     self.callee_ticks = None
     97 
     98   def AddName(self, name):
     99     assert self.name != name
    100     if self.other_names is None:
    101       self.other_names = [name]
    102       return
    103     if not name in self.other_names:
    104       self.other_names.append(name)
    105 
    106   def FullName(self):
    107     if self.other_names is None:
    108       return self.name
    109     self.other_names.sort()
    110     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
    111 
    112   def IsUsed(self):
    113     return self.self_ticks > 0 or self.callee_ticks is not None
    114 
    115   def Tick(self, pc):
    116     self.self_ticks += 1
    117     if self.self_ticks_map is None:
    118       self.self_ticks_map = collections.defaultdict(lambda: 0)
    119     offset = pc - self.start_address
    120     self.self_ticks_map[offset] += 1
    121 
    122   def CalleeTick(self, callee):
    123     if self.callee_ticks is None:
    124       self.callee_ticks = collections.defaultdict(lambda: 0)
    125     self.callee_ticks[callee] += 1
    126 
    127   def PrintAnnotated(self, arch, options):
    128     if self.self_ticks_map is None:
    129       ticks_map = []
    130     else:
    131       ticks_map = self.self_ticks_map.items()
    132     # Convert the ticks map to offsets and counts arrays so that later
    133     # we can do binary search in the offsets array.
    134     ticks_map.sort(key=lambda t: t[0])
    135     ticks_offsets = [t[0] for t in ticks_map]
    136     ticks_counts = [t[1] for t in ticks_map]
    137     # Get a list of disassembled lines and their addresses.
    138     lines = self._GetDisasmLines(arch, options)
    139     if len(lines) == 0:
    140       return
    141     # Print annotated lines.
    142     address = lines[0][0]
    143     total_count = 0
    144     for i in xrange(len(lines)):
    145       start_offset = lines[i][0] - address
    146       if i == len(lines) - 1:
    147         end_offset = self.end_address - self.start_address
    148       else:
    149         end_offset = lines[i + 1][0] - address
    150       # Ticks (reported pc values) are not always precise, i.e. not
    151       # necessarily point at instruction starts. So we have to search
    152       # for ticks that touch the current instruction line.
    153       j = bisect.bisect_left(ticks_offsets, end_offset)
    154       count = 0
    155       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
    156         if offset < start_offset:
    157           break
    158         count += cnt
    159       total_count += count
    160       count = 100.0 * count / self.self_ticks
    161       if count >= 0.01:
    162         print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
    163       else:
    164         print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
    165     print
    166     assert total_count == self.self_ticks, \
    167         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
    168 
    169   def __str__(self):
    170     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
    171       self.name,
    172       self.start_address,
    173       self.end_address,
    174       self.end_address - self.start_address,
    175       self.origin)
    176 
    177   def _GetDisasmLines(self, arch, options):
    178     if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
    179       inplace = False
    180       filename = options.log + ".ll"
    181     else:
    182       inplace = True
    183       filename = self.origin
    184     return disasm.GetDisasmLines(filename,
    185                                  self.origin_offset,
    186                                  self.end_address - self.start_address,
    187                                  arch,
    188                                  inplace)
    189 
    190 
    191 class CodePage(object):
    192   """Group of adjacent code objects."""
    193 
    194   SHIFT = 12  # 4K pages
    195   SIZE = (1 << SHIFT)
    196   MASK = ~(SIZE - 1)
    197 
    198   @staticmethod
    199   def PageAddress(address):
    200     return address & CodePage.MASK
    201 
    202   @staticmethod
    203   def PageId(address):
    204     return address >> CodePage.SHIFT
    205 
    206   @staticmethod
    207   def PageAddressFromId(id):
    208     return id << CodePage.SHIFT
    209 
    210   def __init__(self, address):
    211     self.address = address
    212     self.code_objects = []
    213 
    214   def Add(self, code):
    215     self.code_objects.append(code)
    216 
    217   def Remove(self, code):
    218     self.code_objects.remove(code)
    219 
    220   def Find(self, pc):
    221     code_objects = self.code_objects
    222     for i, code in enumerate(code_objects):
    223       if code.start_address <= pc < code.end_address:
    224         code_objects[0], code_objects[i] = code, code_objects[0]
    225         return code
    226     return None
    227 
    228   def __iter__(self):
    229     return self.code_objects.__iter__()
    230 
    231 
    232 class CodeMap(object):
    233   """Code object map."""
    234 
    235   def __init__(self):
    236     self.pages = {}
    237     self.min_address = 1 << 64
    238     self.max_address = -1
    239 
    240   def Add(self, code, max_pages=-1):
    241     page_id = CodePage.PageId(code.start_address)
    242     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    243     pages = 0
    244     while page_id < limit_id:
    245       if max_pages >= 0 and pages > max_pages:
    246         print >>sys.stderr, \
    247             "Warning: page limit (%d) reached for %s [%s]" % (
    248             max_pages, code.name, code.origin)
    249         break
    250       if page_id in self.pages:
    251         page = self.pages[page_id]
    252       else:
    253         page = CodePage(CodePage.PageAddressFromId(page_id))
    254         self.pages[page_id] = page
    255       page.Add(code)
    256       page_id += 1
    257       pages += 1
    258     self.min_address = min(self.min_address, code.start_address)
    259     self.max_address = max(self.max_address, code.end_address)
    260 
    261   def Remove(self, code):
    262     page_id = CodePage.PageId(code.start_address)
    263     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    264     removed = False
    265     while page_id < limit_id:
    266       if page_id not in self.pages:
    267         page_id += 1
    268         continue
    269       page = self.pages[page_id]
    270       page.Remove(code)
    271       removed = True
    272       page_id += 1
    273     return removed
    274 
    275   def AllCode(self):
    276     for page in self.pages.itervalues():
    277       for code in page:
    278         if CodePage.PageAddress(code.start_address) == page.address:
    279           yield code
    280 
    281   def UsedCode(self):
    282     for code in self.AllCode():
    283       if code.IsUsed():
    284         yield code
    285 
    286   def Print(self):
    287     for code in self.AllCode():
    288       print code
    289 
    290   def Find(self, pc):
    291     if pc < self.min_address or pc >= self.max_address:
    292       return None
    293     page_id = CodePage.PageId(pc)
    294     if page_id not in self.pages:
    295       return None
    296     return self.pages[page_id].Find(pc)
    297 
    298 
    299 class CodeInfo(object):
    300   """Generic info about generated code objects."""
    301 
    302   def __init__(self, arch, header_size):
    303     self.arch = arch
    304     self.header_size = header_size
    305 
    306 
    307 class SnapshotLogReader(object):
    308   """V8 snapshot log reader."""
    309 
    310   _SNAPSHOT_CODE_NAME_RE = re.compile(
    311     r"snapshot-code-name,(\d+),\"(.*)\"")
    312 
    313   def __init__(self, log_name):
    314     self.log_name = log_name
    315 
    316   def ReadNameMap(self):
    317     log = open(self.log_name, "r")
    318     try:
    319       snapshot_pos_to_name = {}
    320       for line in log:
    321         match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
    322         if match:
    323           pos = int(match.group(1))
    324           name = match.group(2)
    325           snapshot_pos_to_name[pos] = name
    326     finally:
    327       log.close()
    328     return snapshot_pos_to_name
    329 
    330 
    331 class LogReader(object):
    332   """V8 low-level (binary) log reader."""
    333 
    334   _ARCH_TO_POINTER_TYPE_MAP = {
    335     "ia32": ctypes.c_uint32,
    336     "arm": ctypes.c_uint32,
    337     "mips": ctypes.c_uint32,
    338     "x64": ctypes.c_uint64
    339   }
    340 
    341   _CODE_CREATE_TAG = "C"
    342   _CODE_MOVE_TAG = "M"
    343   _CODE_DELETE_TAG = "D"
    344   _SNAPSHOT_POSITION_TAG = "P"
    345   _CODE_MOVING_GC_TAG = "G"
    346 
    347   def __init__(self, log_name, code_map, snapshot_pos_to_name):
    348     self.log_file = open(log_name, "r")
    349     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
    350     self.log_pos = 0
    351     self.code_map = code_map
    352     self.snapshot_pos_to_name = snapshot_pos_to_name
    353     self.address_to_snapshot_name = {}
    354 
    355     self.arch = self.log[:self.log.find("\0")]
    356     self.log_pos += len(self.arch) + 1
    357     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
    358         "Unsupported architecture %s" % self.arch
    359     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
    360 
    361     self.code_create_struct = LogReader._DefineStruct([
    362         ("name_size", ctypes.c_int32),
    363         ("code_address", pointer_type),
    364         ("code_size", ctypes.c_int32)])
    365 
    366     self.code_move_struct = LogReader._DefineStruct([
    367         ("from_address", pointer_type),
    368         ("to_address", pointer_type)])
    369 
    370     self.code_delete_struct = LogReader._DefineStruct([
    371         ("address", pointer_type)])
    372 
    373     self.snapshot_position_struct = LogReader._DefineStruct([
    374         ("address", pointer_type),
    375         ("position", ctypes.c_int32)])
    376 
    377   def ReadUpToGC(self):
    378     while self.log_pos < self.log.size():
    379       tag = self.log[self.log_pos]
    380       self.log_pos += 1
    381 
    382       if tag == LogReader._CODE_MOVING_GC_TAG:
    383         self.address_to_snapshot_name.clear()
    384         return
    385 
    386       if tag == LogReader._CODE_CREATE_TAG:
    387         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
    388         self.log_pos += ctypes.sizeof(event)
    389         start_address = event.code_address
    390         end_address = start_address + event.code_size
    391         if start_address in self.address_to_snapshot_name:
    392           name = self.address_to_snapshot_name[start_address]
    393           origin = JS_SNAPSHOT_ORIGIN
    394         else:
    395           name = self.log[self.log_pos:self.log_pos + event.name_size]
    396           origin = JS_ORIGIN
    397         self.log_pos += event.name_size
    398         origin_offset = self.log_pos
    399         self.log_pos += event.code_size
    400         code = Code(name, start_address, end_address, origin, origin_offset)
    401         conficting_code = self.code_map.Find(start_address)
    402         if conficting_code:
    403           if not (conficting_code.start_address == code.start_address and
    404             conficting_code.end_address == code.end_address):
    405             self.code_map.Remove(conficting_code)
    406           else:
    407             LogReader._HandleCodeConflict(conficting_code, code)
    408             # TODO(vitalyr): this warning is too noisy because of our
    409             # attempts to reconstruct code log from the snapshot.
    410             # print >>sys.stderr, \
    411             #     "Warning: Skipping duplicate code log entry %s" % code
    412             continue
    413         self.code_map.Add(code)
    414         continue
    415 
    416       if tag == LogReader._CODE_MOVE_TAG:
    417         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
    418         self.log_pos += ctypes.sizeof(event)
    419         old_start_address = event.from_address
    420         new_start_address = event.to_address
    421         if old_start_address == new_start_address:
    422           # Skip useless code move entries.
    423           continue
    424         code = self.code_map.Find(old_start_address)
    425         if not code:
    426           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    427           continue
    428         assert code.start_address == old_start_address, \
    429             "Inexact move address %x for %s" % (old_start_address, code)
    430         self.code_map.Remove(code)
    431         size = code.end_address - code.start_address
    432         code.start_address = new_start_address
    433         code.end_address = new_start_address + size
    434         self.code_map.Add(code)
    435         continue
    436 
    437       if tag == LogReader._CODE_DELETE_TAG:
    438         event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
    439         self.log_pos += ctypes.sizeof(event)
    440         old_start_address = event.address
    441         code = self.code_map.Find(old_start_address)
    442         if not code:
    443           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    444           continue
    445         assert code.start_address == old_start_address, \
    446             "Inexact delete address %x for %s" % (old_start_address, code)
    447         self.code_map.Remove(code)
    448         continue
    449 
    450       if tag == LogReader._SNAPSHOT_POSITION_TAG:
    451         event = self.snapshot_position_struct.from_buffer(self.log,
    452                                                           self.log_pos)
    453         self.log_pos += ctypes.sizeof(event)
    454         start_address = event.address
    455         snapshot_pos = event.position
    456         if snapshot_pos in self.snapshot_pos_to_name:
    457           self.address_to_snapshot_name[start_address] = \
    458               self.snapshot_pos_to_name[snapshot_pos]
    459         continue
    460 
    461       assert False, "Unknown tag %s" % tag
    462 
    463   def Dispose(self):
    464     self.log.close()
    465     self.log_file.close()
    466 
    467   @staticmethod
    468   def _DefineStruct(fields):
    469     class Struct(ctypes.Structure):
    470       _fields_ = fields
    471     return Struct
    472 
    473   @staticmethod
    474   def _HandleCodeConflict(old_code, new_code):
    475     assert (old_code.start_address == new_code.start_address and
    476             old_code.end_address == new_code.end_address), \
    477         "Conficting code log entries %s and %s" % (old_code, new_code)
    478     if old_code.name == new_code.name:
    479       return
    480     # Code object may be shared by a few functions. Collect the full
    481     # set of names.
    482     old_code.AddName(new_code.name)
    483 
    484 
    485 class Descriptor(object):
    486   """Descriptor of a structure in the binary trace log."""
    487 
    488   CTYPE_MAP = {
    489     "u16": ctypes.c_uint16,
    490     "u32": ctypes.c_uint32,
    491     "u64": ctypes.c_uint64
    492   }
    493 
    494   def __init__(self, fields):
    495     class TraceItem(ctypes.Structure):
    496       _fields_ = Descriptor.CtypesFields(fields)
    497 
    498       def __str__(self):
    499         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
    500                          for field, _ in TraceItem._fields_)
    501 
    502     self.ctype = TraceItem
    503 
    504   def Read(self, trace, offset):
    505     return self.ctype.from_buffer(trace, offset)
    506 
    507   @staticmethod
    508   def CtypesFields(fields):
    509     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
    510 
    511 
    512 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
    513 # for the gory details.
    514 
    515 
    516 TRACE_HEADER_DESC = Descriptor([
    517   ("magic", "u64"),
    518   ("size", "u64"),
    519   ("attr_size", "u64"),
    520   ("attrs_offset", "u64"),
    521   ("attrs_size", "u64"),
    522   ("data_offset", "u64"),
    523   ("data_size", "u64"),
    524   ("event_types_offset", "u64"),
    525   ("event_types_size", "u64")
    526 ])
    527 
    528 
    529 PERF_EVENT_ATTR_DESC = Descriptor([
    530   ("type", "u32"),
    531   ("size", "u32"),
    532   ("config", "u64"),
    533   ("sample_period_or_freq", "u64"),
    534   ("sample_type", "u64"),
    535   ("read_format", "u64"),
    536   ("flags", "u64"),
    537   ("wakeup_events_or_watermark", "u32"),
    538   ("bt_type", "u32"),
    539   ("bp_addr", "u64"),
    540   ("bp_len", "u64"),
    541 ])
    542 
    543 
    544 PERF_EVENT_HEADER_DESC = Descriptor([
    545   ("type", "u32"),
    546   ("misc", "u16"),
    547   ("size", "u16")
    548 ])
    549 
    550 
    551 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
    552   ("pid", "u32"),
    553   ("tid", "u32"),
    554   ("addr", "u64"),
    555   ("len", "u64"),
    556   ("pgoff", "u64")
    557 ])
    558 
    559 
    560 # perf_event_attr.sample_type bits control the set of
    561 # perf_sample_event fields.
    562 PERF_SAMPLE_IP = 1 << 0
    563 PERF_SAMPLE_TID = 1 << 1
    564 PERF_SAMPLE_TIME = 1 << 2
    565 PERF_SAMPLE_ADDR = 1 << 3
    566 PERF_SAMPLE_READ = 1 << 4
    567 PERF_SAMPLE_CALLCHAIN = 1 << 5
    568 PERF_SAMPLE_ID = 1 << 6
    569 PERF_SAMPLE_CPU = 1 << 7
    570 PERF_SAMPLE_PERIOD = 1 << 8
    571 PERF_SAMPLE_STREAM_ID = 1 << 9
    572 PERF_SAMPLE_RAW = 1 << 10
    573 
    574 
    575 PERF_SAMPLE_EVENT_BODY_FIELDS = [
    576   ("ip", "u64", PERF_SAMPLE_IP),
    577   ("pid", "u32", PERF_SAMPLE_TID),
    578   ("tid", "u32", PERF_SAMPLE_TID),
    579   ("time", "u64", PERF_SAMPLE_TIME),
    580   ("addr", "u64", PERF_SAMPLE_ADDR),
    581   ("id", "u64", PERF_SAMPLE_ID),
    582   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
    583   ("cpu", "u32", PERF_SAMPLE_CPU),
    584   ("res", "u32", PERF_SAMPLE_CPU),
    585   ("period", "u64", PERF_SAMPLE_PERIOD),
    586   # Don't want to handle read format that comes after the period and
    587   # before the callchain and has variable size.
    588   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
    589   # Raw data follows the callchain and is ignored.
    590 ]
    591 
    592 
    593 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
    594 
    595 
    596 PERF_RECORD_MMAP = 1
    597 PERF_RECORD_SAMPLE = 9
    598 
    599 
    600 class TraceReader(object):
    601   """Perf (linux-2.6/tools/perf) trace file reader."""
    602 
    603   _TRACE_HEADER_MAGIC = 4993446653023372624
    604 
    605   def __init__(self, trace_name):
    606     self.trace_file = open(trace_name, "r")
    607     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
    608     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
    609     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
    610       print >>sys.stderr, "Warning: unsupported trace header magic"
    611     self.offset = self.trace_header.data_offset
    612     self.limit = self.trace_header.data_offset + self.trace_header.data_size
    613     assert self.limit <= self.trace.size(), \
    614         "Trace data limit exceeds trace file size"
    615     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
    616     assert self.trace_header.attrs_size != 0, \
    617         "No perf event attributes found in the trace"
    618     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
    619                                                 self.trace_header.attrs_offset)
    620     self.sample_event_body_desc = self._SampleEventBodyDesc(
    621         perf_event_attr.sample_type)
    622     self.callchain_supported = \
    623         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
    624     if self.callchain_supported:
    625       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
    626       self.ip_size = ctypes.sizeof(self.ip_struct)
    627 
    628   def ReadEventHeader(self):
    629     if self.offset >= self.limit:
    630       return None, 0
    631     offset = self.offset
    632     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
    633     self.offset += header.size
    634     return header, offset
    635 
    636   def ReadMmap(self, header, offset):
    637     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
    638                                                offset + self.header_size)
    639     # Read null-terminated filename.
    640     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
    641                           offset + header.size]
    642     mmap_info.filename = filename[:filename.find(chr(0))]
    643     return mmap_info
    644 
    645   def ReadSample(self, header, offset):
    646     sample = self.sample_event_body_desc.Read(self.trace,
    647                                               offset + self.header_size)
    648     if not self.callchain_supported:
    649       return sample
    650     sample.ips = []
    651     offset += self.header_size + ctypes.sizeof(sample)
    652     for _ in xrange(sample.nr):
    653       sample.ips.append(
    654         self.ip_struct.from_buffer(self.trace, offset).value)
    655       offset += self.ip_size
    656     return sample
    657 
    658   def Dispose(self):
    659     self.trace.close()
    660     self.trace_file.close()
    661 
    662   def _SampleEventBodyDesc(self, sample_type):
    663     assert (sample_type & PERF_SAMPLE_READ) == 0, \
    664            "Can't hande read format in samples"
    665     fields = [(field, format)
    666               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
    667               if (bit & sample_type) != 0]
    668     return Descriptor(fields)
    669 
    670 
    671 OBJDUMP_SECTION_HEADER_RE = re.compile(
    672   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
    673 OBJDUMP_SYMBOL_LINE_RE = re.compile(
    674   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
    675 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
    676   r"^DYNAMIC SYMBOL TABLE")
    677 OBJDUMP_SKIP_RE = re.compile(
    678   r"^.*ld\.so\.cache$")
    679 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
    680 PERF_KERNEL_ALLSYMS_RE = re.compile(
    681   r".*kallsyms.*")
    682 KERNEL_ALLSYMS_LINE_RE = re.compile(
    683   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
    684 
    685 
    686 class LibraryRepo(object):
    687   def __init__(self):
    688     self.infos = []
    689     self.names = set()
    690     self.ticks = {}
    691 
    692   def Load(self, mmap_info, code_map, options):
    693     # Skip kernel mmaps when requested using the fact that their tid
    694     # is 0.
    695     if mmap_info.tid == 0 and not options.kernel:
    696       return True
    697     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
    698       return True
    699     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
    700       return self._LoadKernelSymbols(code_map)
    701     self.infos.append(mmap_info)
    702     mmap_info.ticks = 0
    703     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
    704     if not os.path.exists(mmap_info.filename):
    705       return True
    706     # Request section headers (-h), symbols (-t), and dynamic symbols
    707     # (-T) from objdump.
    708     # Unfortunately, section headers span two lines, so we have to
    709     # keep the just seen section name (from the first line in each
    710     # section header) in the after_section variable.
    711     process = subprocess.Popen(
    712       "%s -h -t -T -C %s" % (OBJDUMP_BIN, mmap_info.filename),
    713       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    714     pipe = process.stdout
    715     after_section = None
    716     code_sections = set()
    717     reloc_sections = set()
    718     dynamic = False
    719     try:
    720       for line in pipe:
    721         if after_section:
    722           if line.find("CODE") != -1:
    723             code_sections.add(after_section)
    724           if line.find("RELOC") != -1:
    725             reloc_sections.add(after_section)
    726           after_section = None
    727           continue
    728 
    729         match = OBJDUMP_SECTION_HEADER_RE.match(line)
    730         if match:
    731           after_section = match.group(1)
    732           continue
    733 
    734         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
    735           dynamic = True
    736           continue
    737 
    738         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
    739         if match:
    740           start_address = int(match.group(1), 16)
    741           origin_offset = start_address
    742           flags = match.group(2)
    743           section = match.group(3)
    744           if section in code_sections:
    745             if dynamic or section in reloc_sections:
    746               start_address += mmap_info.addr
    747             size = int(match.group(4), 16)
    748             name = match.group(5)
    749             origin = mmap_info.filename
    750             code_map.Add(Code(name, start_address, start_address + size,
    751                               origin, origin_offset))
    752     finally:
    753       pipe.close()
    754     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
    755 
    756   def Tick(self, pc):
    757     for i, mmap_info in enumerate(self.infos):
    758       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
    759         mmap_info.ticks += 1
    760         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
    761         return True
    762     return False
    763 
    764   def _UniqueMmapName(self, mmap_info):
    765     name = mmap_info.filename
    766     index = 1
    767     while name in self.names:
    768       name = "%s-%d" % (mmap_info.filename, index)
    769       index += 1
    770     self.names.add(name)
    771     return name
    772 
    773   def _LoadKernelSymbols(self, code_map):
    774     if not os.path.exists(KERNEL_ALLSYMS_FILE):
    775       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
    776       return False
    777     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
    778     code = None
    779     for line in kallsyms:
    780       match = KERNEL_ALLSYMS_LINE_RE.match(line)
    781       if match:
    782         start_address = int(match.group(1), 16)
    783         end_address = start_address
    784         name = match.group(2)
    785         if code:
    786           code.end_address = start_address
    787           code_map.Add(code, 16)
    788         code = Code(name, start_address, end_address, "kernel", 0)
    789     return True
    790 
    791 
    792 def PrintReport(code_map, library_repo, arch, ticks, options):
    793   print "Ticks per symbol:"
    794   used_code = [code for code in code_map.UsedCode()]
    795   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
    796   for i, code in enumerate(used_code):
    797     code_ticks = code.self_ticks
    798     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
    799                                     code.FullName(), code.origin)
    800     if options.disasm_all or i < options.disasm_top:
    801       code.PrintAnnotated(arch, options)
    802   print
    803   print "Ticks per library:"
    804   mmap_infos = [m for m in library_repo.infos]
    805   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
    806   for mmap_info in mmap_infos:
    807     mmap_ticks = mmap_info.ticks
    808     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
    809                                mmap_info.unique_name)
    810 
    811 
    812 def PrintDot(code_map, options):
    813   print "digraph G {"
    814   for code in code_map.UsedCode():
    815     if code.self_ticks < 10:
    816       continue
    817     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
    818     if code.callee_ticks:
    819       for callee, ticks in code.callee_ticks.iteritems():
    820         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
    821   print "}"
    822 
    823 
    824 if __name__ == "__main__":
    825   parser = optparse.OptionParser(USAGE)
    826   parser.add_option("--snapshot-log",
    827                     default="obj/release/snapshot.log",
    828                     help="V8 snapshot log file name [default: %default]")
    829   parser.add_option("--log",
    830                     default="v8.log",
    831                     help="V8 log file name [default: %default]")
    832   parser.add_option("--snapshot",
    833                     default=False,
    834                     action="store_true",
    835                     help="process V8 snapshot log [default: %default]")
    836   parser.add_option("--trace",
    837                     default="perf.data",
    838                     help="perf trace file name [default: %default]")
    839   parser.add_option("--kernel",
    840                     default=False,
    841                     action="store_true",
    842                     help="process kernel entries [default: %default]")
    843   parser.add_option("--disasm-top",
    844                     default=0,
    845                     type="int",
    846                     help=("number of top symbols to disassemble and annotate "
    847                           "[default: %default]"))
    848   parser.add_option("--disasm-all",
    849                     default=False,
    850                     action="store_true",
    851                     help=("disassemble and annotate all used symbols "
    852                           "[default: %default]"))
    853   parser.add_option("--dot",
    854                     default=False,
    855                     action="store_true",
    856                     help="produce dot output (WIP) [default: %default]")
    857   parser.add_option("--quiet", "-q",
    858                     default=False,
    859                     action="store_true",
    860                     help="no auxiliary messages [default: %default]")
    861   options, args = parser.parse_args()
    862 
    863   if not options.quiet:
    864     if options.snapshot:
    865       print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
    866                                         options.log,
    867                                         options.log)
    868     else:
    869       print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
    870     print "Perf trace file: %s" % options.trace
    871 
    872   # Stats.
    873   events = 0
    874   ticks = 0
    875   missed_ticks = 0
    876   really_missed_ticks = 0
    877   mmap_time = 0
    878   sample_time = 0
    879 
    880   # Process the snapshot log to fill the snapshot name map.
    881   snapshot_name_map = {}
    882   if options.snapshot:
    883     snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
    884     snapshot_name_map = snapshot_log_reader.ReadNameMap()
    885 
    886   # Initialize the log reader.
    887   code_map = CodeMap()
    888   log_reader = LogReader(log_name=options.log + ".ll",
    889                          code_map=code_map,
    890                          snapshot_pos_to_name=snapshot_name_map)
    891   if not options.quiet:
    892     print "Generated code architecture: %s" % log_reader.arch
    893     print
    894     sys.stdout.flush()
    895 
    896   # Process the code and trace logs.
    897   library_repo = LibraryRepo()
    898   log_reader.ReadUpToGC()
    899   trace_reader = TraceReader(options.trace)
    900   while True:
    901     header, offset = trace_reader.ReadEventHeader()
    902     if not header:
    903       break
    904     events += 1
    905     if header.type == PERF_RECORD_MMAP:
    906       start = time.time()
    907       mmap_info = trace_reader.ReadMmap(header, offset)
    908       if mmap_info.filename == V8_GC_FAKE_MMAP:
    909         log_reader.ReadUpToGC()
    910       else:
    911         library_repo.Load(mmap_info, code_map, options)
    912       mmap_time += time.time() - start
    913     elif header.type == PERF_RECORD_SAMPLE:
    914       ticks += 1
    915       start = time.time()
    916       sample = trace_reader.ReadSample(header, offset)
    917       code = code_map.Find(sample.ip)
    918       if code:
    919         code.Tick(sample.ip)
    920       else:
    921         missed_ticks += 1
    922       if not library_repo.Tick(sample.ip) and not code:
    923         really_missed_ticks += 1
    924       if trace_reader.callchain_supported:
    925         for ip in sample.ips:
    926           caller_code = code_map.Find(ip)
    927           if caller_code:
    928             if code:
    929               caller_code.CalleeTick(code)
    930             code = caller_code
    931       sample_time += time.time() - start
    932 
    933   if options.dot:
    934     PrintDot(code_map, options)
    935   else:
    936     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
    937 
    938     if not options.quiet:
    939       print
    940       print "Stats:"
    941       print "%10d total trace events" % events
    942       print "%10d total ticks" % ticks
    943       print "%10d ticks not in symbols" % missed_ticks
    944       print "%10d unaccounted ticks" % really_missed_ticks
    945       print "%10d total symbols" % len([c for c in code_map.AllCode()])
    946       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
    947       print "%9.2fs library processing time" % mmap_time
    948       print "%9.2fs tick processing time" % sample_time
    949 
    950   log_reader.Dispose()
    951   trace_reader.Dispose()
    952