Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2012 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 import bisect
     31 import collections
     32 import ctypes
     33 import disasm
     34 import mmap
     35 import optparse
     36 import os
     37 import re
     38 import subprocess
     39 import sys
     40 import time
     41 
     42 
     43 USAGE="""usage: %prog [OPTION]...
     44 
     45 Analyses V8 and perf logs to produce profiles.
     46 
     47 Perf logs can be collected using a command like:
     48   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
     49   # -R: collect all data
     50   # -e cycles: use cpu-cycles event (run "perf list" for details)
     51   # -c 10000: write a sample after each 10000 events
     52   # -f: force output file overwrite
     53   # -i: limit profiling to our process and the kernel
     54   # --ll-prof shell flag enables the right V8 logs
     55 This will produce a binary trace file (perf.data) that %prog can analyse.
     56 
     57 IMPORTANT:
     58   The kernel has an internal maximum for events per second, it is 100K by
     59   default. That's not enough for "-c 10000". Set it to some higher value:
     60   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
     61   You can also make the warning about kernel address maps go away:
     62   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
     63 
     64 We have a convenience script that handles all of the above for you:
     65   $ tools/run-llprof.sh ./d8 bench.js
     66 
     67 Examples:
     68   # Print flat profile with annotated disassembly for the 10 top
     69   # symbols. Use default log names and include the snapshot log.
     70   $ %prog --snapshot --disasm-top=10
     71 
     72   # Print flat profile with annotated disassembly for all used symbols.
     73   # Use default log names and include kernel symbols into analysis.
     74   $ %prog --disasm-all --kernel
     75 
     76   # Print flat profile. Use custom log names.
     77   $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
     78 """
     79 
     80 
     81 JS_ORIGIN = "js"
     82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
     83 
     84 class Code(object):
     85   """Code object."""
     86 
     87   _id = 0
     88   UNKNOWN = 0
     89   V8INTERNAL = 1
     90   FULL_CODEGEN = 2
     91   OPTIMIZED = 3
     92 
     93   def __init__(self, name, start_address, end_address, origin, origin_offset):
     94     self.id = Code._id
     95     Code._id += 1
     96     self.name = name
     97     self.other_names = None
     98     self.start_address = start_address
     99     self.end_address = end_address
    100     self.origin = origin
    101     self.origin_offset = origin_offset
    102     self.self_ticks = 0
    103     self.self_ticks_map = None
    104     self.callee_ticks = None
    105     if name.startswith("LazyCompile:*"):
    106       self.codetype = Code.OPTIMIZED
    107     elif name.startswith("LazyCompile:"):
    108       self.codetype = Code.FULL_CODEGEN
    109     elif name.startswith("v8::internal::"):
    110       self.codetype = Code.V8INTERNAL
    111     else:
    112       self.codetype = Code.UNKNOWN
    113 
    114   def AddName(self, name):
    115     assert self.name != name
    116     if self.other_names is None:
    117       self.other_names = [name]
    118       return
    119     if not name in self.other_names:
    120       self.other_names.append(name)
    121 
    122   def FullName(self):
    123     if self.other_names is None:
    124       return self.name
    125     self.other_names.sort()
    126     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
    127 
    128   def IsUsed(self):
    129     return self.self_ticks > 0 or self.callee_ticks is not None
    130 
    131   def Tick(self, pc):
    132     self.self_ticks += 1
    133     if self.self_ticks_map is None:
    134       self.self_ticks_map = collections.defaultdict(lambda: 0)
    135     offset = pc - self.start_address
    136     self.self_ticks_map[offset] += 1
    137 
    138   def CalleeTick(self, callee):
    139     if self.callee_ticks is None:
    140       self.callee_ticks = collections.defaultdict(lambda: 0)
    141     self.callee_ticks[callee] += 1
    142 
    143   def PrintAnnotated(self, arch, options):
    144     if self.self_ticks_map is None:
    145       ticks_map = []
    146     else:
    147       ticks_map = self.self_ticks_map.items()
    148     # Convert the ticks map to offsets and counts arrays so that later
    149     # we can do binary search in the offsets array.
    150     ticks_map.sort(key=lambda t: t[0])
    151     ticks_offsets = [t[0] for t in ticks_map]
    152     ticks_counts = [t[1] for t in ticks_map]
    153     # Get a list of disassembled lines and their addresses.
    154     lines = self._GetDisasmLines(arch, options)
    155     if len(lines) == 0:
    156       return
    157     # Print annotated lines.
    158     address = lines[0][0]
    159     total_count = 0
    160     for i in xrange(len(lines)):
    161       start_offset = lines[i][0] - address
    162       if i == len(lines) - 1:
    163         end_offset = self.end_address - self.start_address
    164       else:
    165         end_offset = lines[i + 1][0] - address
    166       # Ticks (reported pc values) are not always precise, i.e. not
    167       # necessarily point at instruction starts. So we have to search
    168       # for ticks that touch the current instruction line.
    169       j = bisect.bisect_left(ticks_offsets, end_offset)
    170       count = 0
    171       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
    172         if offset < start_offset:
    173           break
    174         count += cnt
    175       total_count += count
    176       count = 100.0 * count / self.self_ticks
    177       if count >= 0.01:
    178         print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
    179       else:
    180         print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
    181     print
    182     assert total_count == self.self_ticks, \
    183         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
    184 
    185   def __str__(self):
    186     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
    187       self.name,
    188       self.start_address,
    189       self.end_address,
    190       self.end_address - self.start_address,
    191       self.origin)
    192 
    193   def _GetDisasmLines(self, arch, options):
    194     if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
    195       inplace = False
    196       filename = options.log + ".ll"
    197     else:
    198       inplace = True
    199       filename = self.origin
    200     return disasm.GetDisasmLines(filename,
    201                                  self.origin_offset,
    202                                  self.end_address - self.start_address,
    203                                  arch,
    204                                  inplace)
    205 
    206 
    207 class CodePage(object):
    208   """Group of adjacent code objects."""
    209 
    210   SHIFT = 20  # 1M pages
    211   SIZE = (1 << SHIFT)
    212   MASK = ~(SIZE - 1)
    213 
    214   @staticmethod
    215   def PageAddress(address):
    216     return address & CodePage.MASK
    217 
    218   @staticmethod
    219   def PageId(address):
    220     return address >> CodePage.SHIFT
    221 
    222   @staticmethod
    223   def PageAddressFromId(id):
    224     return id << CodePage.SHIFT
    225 
    226   def __init__(self, address):
    227     self.address = address
    228     self.code_objects = []
    229 
    230   def Add(self, code):
    231     self.code_objects.append(code)
    232 
    233   def Remove(self, code):
    234     self.code_objects.remove(code)
    235 
    236   def Find(self, pc):
    237     code_objects = self.code_objects
    238     for i, code in enumerate(code_objects):
    239       if code.start_address <= pc < code.end_address:
    240         code_objects[0], code_objects[i] = code, code_objects[0]
    241         return code
    242     return None
    243 
    244   def __iter__(self):
    245     return self.code_objects.__iter__()
    246 
    247 
    248 class CodeMap(object):
    249   """Code object map."""
    250 
    251   def __init__(self):
    252     self.pages = {}
    253     self.min_address = 1 << 64
    254     self.max_address = -1
    255 
    256   def Add(self, code, max_pages=-1):
    257     page_id = CodePage.PageId(code.start_address)
    258     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    259     pages = 0
    260     while page_id < limit_id:
    261       if max_pages >= 0 and pages > max_pages:
    262         print >>sys.stderr, \
    263             "Warning: page limit (%d) reached for %s [%s]" % (
    264             max_pages, code.name, code.origin)
    265         break
    266       if page_id in self.pages:
    267         page = self.pages[page_id]
    268       else:
    269         page = CodePage(CodePage.PageAddressFromId(page_id))
    270         self.pages[page_id] = page
    271       page.Add(code)
    272       page_id += 1
    273       pages += 1
    274     self.min_address = min(self.min_address, code.start_address)
    275     self.max_address = max(self.max_address, code.end_address)
    276 
    277   def Remove(self, code):
    278     page_id = CodePage.PageId(code.start_address)
    279     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    280     removed = False
    281     while page_id < limit_id:
    282       if page_id not in self.pages:
    283         page_id += 1
    284         continue
    285       page = self.pages[page_id]
    286       page.Remove(code)
    287       removed = True
    288       page_id += 1
    289     return removed
    290 
    291   def AllCode(self):
    292     for page in self.pages.itervalues():
    293       for code in page:
    294         if CodePage.PageAddress(code.start_address) == page.address:
    295           yield code
    296 
    297   def UsedCode(self):
    298     for code in self.AllCode():
    299       if code.IsUsed():
    300         yield code
    301 
    302   def Print(self):
    303     for code in self.AllCode():
    304       print code
    305 
    306   def Find(self, pc):
    307     if pc < self.min_address or pc >= self.max_address:
    308       return None
    309     page_id = CodePage.PageId(pc)
    310     if page_id not in self.pages:
    311       return None
    312     return self.pages[page_id].Find(pc)
    313 
    314 
    315 class CodeInfo(object):
    316   """Generic info about generated code objects."""
    317 
    318   def __init__(self, arch, header_size):
    319     self.arch = arch
    320     self.header_size = header_size
    321 
    322 
    323 class SnapshotLogReader(object):
    324   """V8 snapshot log reader."""
    325 
    326   _SNAPSHOT_CODE_NAME_RE = re.compile(
    327     r"snapshot-code-name,(\d+),\"(.*)\"")
    328 
    329   def __init__(self, log_name):
    330     self.log_name = log_name
    331 
    332   def ReadNameMap(self):
    333     log = open(self.log_name, "r")
    334     try:
    335       snapshot_pos_to_name = {}
    336       for line in log:
    337         match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
    338         if match:
    339           pos = int(match.group(1))
    340           name = match.group(2)
    341           snapshot_pos_to_name[pos] = name
    342     finally:
    343       log.close()
    344     return snapshot_pos_to_name
    345 
    346 
    347 class LogReader(object):
    348   """V8 low-level (binary) log reader."""
    349 
    350   _ARCH_TO_POINTER_TYPE_MAP = {
    351     "ia32": ctypes.c_uint32,
    352     "arm": ctypes.c_uint32,
    353     "mips": ctypes.c_uint32,
    354     "x64": ctypes.c_uint64
    355   }
    356 
    357   _CODE_CREATE_TAG = "C"
    358   _CODE_MOVE_TAG = "M"
    359   _CODE_DELETE_TAG = "D"
    360   _SNAPSHOT_POSITION_TAG = "P"
    361   _CODE_MOVING_GC_TAG = "G"
    362 
    363   def __init__(self, log_name, code_map, snapshot_pos_to_name):
    364     self.log_file = open(log_name, "r")
    365     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
    366     self.log_pos = 0
    367     self.code_map = code_map
    368     self.snapshot_pos_to_name = snapshot_pos_to_name
    369     self.address_to_snapshot_name = {}
    370 
    371     self.arch = self.log[:self.log.find("\0")]
    372     self.log_pos += len(self.arch) + 1
    373     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
    374         "Unsupported architecture %s" % self.arch
    375     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
    376 
    377     self.code_create_struct = LogReader._DefineStruct([
    378         ("name_size", ctypes.c_int32),
    379         ("code_address", pointer_type),
    380         ("code_size", ctypes.c_int32)])
    381 
    382     self.code_move_struct = LogReader._DefineStruct([
    383         ("from_address", pointer_type),
    384         ("to_address", pointer_type)])
    385 
    386     self.code_delete_struct = LogReader._DefineStruct([
    387         ("address", pointer_type)])
    388 
    389     self.snapshot_position_struct = LogReader._DefineStruct([
    390         ("address", pointer_type),
    391         ("position", ctypes.c_int32)])
    392 
    393   def ReadUpToGC(self):
    394     while self.log_pos < self.log.size():
    395       tag = self.log[self.log_pos]
    396       self.log_pos += 1
    397 
    398       if tag == LogReader._CODE_MOVING_GC_TAG:
    399         self.address_to_snapshot_name.clear()
    400         return
    401 
    402       if tag == LogReader._CODE_CREATE_TAG:
    403         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
    404         self.log_pos += ctypes.sizeof(event)
    405         start_address = event.code_address
    406         end_address = start_address + event.code_size
    407         if start_address in self.address_to_snapshot_name:
    408           name = self.address_to_snapshot_name[start_address]
    409           origin = JS_SNAPSHOT_ORIGIN
    410         else:
    411           name = self.log[self.log_pos:self.log_pos + event.name_size]
    412           origin = JS_ORIGIN
    413         self.log_pos += event.name_size
    414         origin_offset = self.log_pos
    415         self.log_pos += event.code_size
    416         code = Code(name, start_address, end_address, origin, origin_offset)
    417         conficting_code = self.code_map.Find(start_address)
    418         if conficting_code:
    419           if not (conficting_code.start_address == code.start_address and
    420             conficting_code.end_address == code.end_address):
    421             self.code_map.Remove(conficting_code)
    422           else:
    423             LogReader._HandleCodeConflict(conficting_code, code)
    424             # TODO(vitalyr): this warning is too noisy because of our
    425             # attempts to reconstruct code log from the snapshot.
    426             # print >>sys.stderr, \
    427             #     "Warning: Skipping duplicate code log entry %s" % code
    428             continue
    429         self.code_map.Add(code)
    430         continue
    431 
    432       if tag == LogReader._CODE_MOVE_TAG:
    433         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
    434         self.log_pos += ctypes.sizeof(event)
    435         old_start_address = event.from_address
    436         new_start_address = event.to_address
    437         if old_start_address == new_start_address:
    438           # Skip useless code move entries.
    439           continue
    440         code = self.code_map.Find(old_start_address)
    441         if not code:
    442           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    443           continue
    444         assert code.start_address == old_start_address, \
    445             "Inexact move address %x for %s" % (old_start_address, code)
    446         self.code_map.Remove(code)
    447         size = code.end_address - code.start_address
    448         code.start_address = new_start_address
    449         code.end_address = new_start_address + size
    450         self.code_map.Add(code)
    451         continue
    452 
    453       if tag == LogReader._CODE_DELETE_TAG:
    454         event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
    455         self.log_pos += ctypes.sizeof(event)
    456         old_start_address = event.address
    457         code = self.code_map.Find(old_start_address)
    458         if not code:
    459           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    460           continue
    461         assert code.start_address == old_start_address, \
    462             "Inexact delete address %x for %s" % (old_start_address, code)
    463         self.code_map.Remove(code)
    464         continue
    465 
    466       if tag == LogReader._SNAPSHOT_POSITION_TAG:
    467         event = self.snapshot_position_struct.from_buffer(self.log,
    468                                                           self.log_pos)
    469         self.log_pos += ctypes.sizeof(event)
    470         start_address = event.address
    471         snapshot_pos = event.position
    472         if snapshot_pos in self.snapshot_pos_to_name:
    473           self.address_to_snapshot_name[start_address] = \
    474               self.snapshot_pos_to_name[snapshot_pos]
    475         continue
    476 
    477       assert False, "Unknown tag %s" % tag
    478 
    479   def Dispose(self):
    480     self.log.close()
    481     self.log_file.close()
    482 
    483   @staticmethod
    484   def _DefineStruct(fields):
    485     class Struct(ctypes.Structure):
    486       _fields_ = fields
    487     return Struct
    488 
    489   @staticmethod
    490   def _HandleCodeConflict(old_code, new_code):
    491     assert (old_code.start_address == new_code.start_address and
    492             old_code.end_address == new_code.end_address), \
    493         "Conficting code log entries %s and %s" % (old_code, new_code)
    494     if old_code.name == new_code.name:
    495       return
    496     # Code object may be shared by a few functions. Collect the full
    497     # set of names.
    498     old_code.AddName(new_code.name)
    499 
    500 
    501 class Descriptor(object):
    502   """Descriptor of a structure in the binary trace log."""
    503 
    504   CTYPE_MAP = {
    505     "u16": ctypes.c_uint16,
    506     "u32": ctypes.c_uint32,
    507     "u64": ctypes.c_uint64
    508   }
    509 
    510   def __init__(self, fields):
    511     class TraceItem(ctypes.Structure):
    512       _fields_ = Descriptor.CtypesFields(fields)
    513 
    514       def __str__(self):
    515         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
    516                          for field, _ in TraceItem._fields_)
    517 
    518     self.ctype = TraceItem
    519 
    520   def Read(self, trace, offset):
    521     return self.ctype.from_buffer(trace, offset)
    522 
    523   @staticmethod
    524   def CtypesFields(fields):
    525     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
    526 
    527 
    528 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
    529 # for the gory details.
    530 
    531 
    532 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
    533 TRACE_HEADER_DESC = Descriptor([
    534   ("magic", "u64"),
    535   ("size", "u64"),
    536   ("attr_size", "u64"),
    537   ("attrs_offset", "u64"),
    538   ("attrs_size", "u64"),
    539   ("data_offset", "u64"),
    540   ("data_size", "u64"),
    541   ("event_types_offset", "u64"),
    542   ("event_types_size", "u64")
    543 ])
    544 
    545 
    546 # Reference: /usr/include/linux/perf_event.h
    547 PERF_EVENT_ATTR_DESC = Descriptor([
    548   ("type", "u32"),
    549   ("size", "u32"),
    550   ("config", "u64"),
    551   ("sample_period_or_freq", "u64"),
    552   ("sample_type", "u64"),
    553   ("read_format", "u64"),
    554   ("flags", "u64"),
    555   ("wakeup_events_or_watermark", "u32"),
    556   ("bp_type", "u32"),
    557   ("bp_addr", "u64"),
    558   ("bp_len", "u64")
    559 ])
    560 
    561 
    562 # Reference: /usr/include/linux/perf_event.h
    563 PERF_EVENT_HEADER_DESC = Descriptor([
    564   ("type", "u32"),
    565   ("misc", "u16"),
    566   ("size", "u16")
    567 ])
    568 
    569 
    570 # Reference: kernel/events/core.c
    571 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
    572   ("pid", "u32"),
    573   ("tid", "u32"),
    574   ("addr", "u64"),
    575   ("len", "u64"),
    576   ("pgoff", "u64")
    577 ])
    578 
    579 
    580 # perf_event_attr.sample_type bits control the set of
    581 # perf_sample_event fields.
    582 PERF_SAMPLE_IP = 1 << 0
    583 PERF_SAMPLE_TID = 1 << 1
    584 PERF_SAMPLE_TIME = 1 << 2
    585 PERF_SAMPLE_ADDR = 1 << 3
    586 PERF_SAMPLE_READ = 1 << 4
    587 PERF_SAMPLE_CALLCHAIN = 1 << 5
    588 PERF_SAMPLE_ID = 1 << 6
    589 PERF_SAMPLE_CPU = 1 << 7
    590 PERF_SAMPLE_PERIOD = 1 << 8
    591 PERF_SAMPLE_STREAM_ID = 1 << 9
    592 PERF_SAMPLE_RAW = 1 << 10
    593 
    594 
    595 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
    596 PERF_SAMPLE_EVENT_BODY_FIELDS = [
    597   ("ip", "u64", PERF_SAMPLE_IP),
    598   ("pid", "u32", PERF_SAMPLE_TID),
    599   ("tid", "u32", PERF_SAMPLE_TID),
    600   ("time", "u64", PERF_SAMPLE_TIME),
    601   ("addr", "u64", PERF_SAMPLE_ADDR),
    602   ("id", "u64", PERF_SAMPLE_ID),
    603   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
    604   ("cpu", "u32", PERF_SAMPLE_CPU),
    605   ("res", "u32", PERF_SAMPLE_CPU),
    606   ("period", "u64", PERF_SAMPLE_PERIOD),
    607   # Don't want to handle read format that comes after the period and
    608   # before the callchain and has variable size.
    609   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
    610   # Raw data follows the callchain and is ignored.
    611 ]
    612 
    613 
    614 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
    615 
    616 
    617 PERF_RECORD_MMAP = 1
    618 PERF_RECORD_SAMPLE = 9
    619 
    620 
    621 class TraceReader(object):
    622   """Perf (linux-2.6/tools/perf) trace file reader."""
    623 
    624   _TRACE_HEADER_MAGIC = 4993446653023372624
    625 
    626   def __init__(self, trace_name):
    627     self.trace_file = open(trace_name, "r")
    628     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
    629     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
    630     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
    631       print >>sys.stderr, "Warning: unsupported trace header magic"
    632     self.offset = self.trace_header.data_offset
    633     self.limit = self.trace_header.data_offset + self.trace_header.data_size
    634     assert self.limit <= self.trace.size(), \
    635         "Trace data limit exceeds trace file size"
    636     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
    637     assert self.trace_header.attrs_size != 0, \
    638         "No perf event attributes found in the trace"
    639     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
    640                                                 self.trace_header.attrs_offset)
    641     self.sample_event_body_desc = self._SampleEventBodyDesc(
    642         perf_event_attr.sample_type)
    643     self.callchain_supported = \
    644         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
    645     if self.callchain_supported:
    646       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
    647       self.ip_size = ctypes.sizeof(self.ip_struct)
    648 
    649   def ReadEventHeader(self):
    650     if self.offset >= self.limit:
    651       return None, 0
    652     offset = self.offset
    653     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
    654     self.offset += header.size
    655     return header, offset
    656 
    657   def ReadMmap(self, header, offset):
    658     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
    659                                                offset + self.header_size)
    660     # Read null-terminated filename.
    661     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
    662                           offset + header.size]
    663     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
    664     return mmap_info
    665 
    666   def ReadSample(self, header, offset):
    667     sample = self.sample_event_body_desc.Read(self.trace,
    668                                               offset + self.header_size)
    669     if not self.callchain_supported:
    670       return sample
    671     sample.ips = []
    672     offset += self.header_size + ctypes.sizeof(sample)
    673     for _ in xrange(sample.nr):
    674       sample.ips.append(
    675         self.ip_struct.from_buffer(self.trace, offset).value)
    676       offset += self.ip_size
    677     return sample
    678 
    679   def Dispose(self):
    680     self.trace.close()
    681     self.trace_file.close()
    682 
    683   def _SampleEventBodyDesc(self, sample_type):
    684     assert (sample_type & PERF_SAMPLE_READ) == 0, \
    685            "Can't hande read format in samples"
    686     fields = [(field, format)
    687               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
    688               if (bit & sample_type) != 0]
    689     return Descriptor(fields)
    690 
    691 
    692 OBJDUMP_SECTION_HEADER_RE = re.compile(
    693   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
    694 OBJDUMP_SYMBOL_LINE_RE = re.compile(
    695   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
    696 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
    697   r"^DYNAMIC SYMBOL TABLE")
    698 OBJDUMP_SKIP_RE = re.compile(
    699   r"^.*ld\.so\.cache$")
    700 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
    701 PERF_KERNEL_ALLSYMS_RE = re.compile(
    702   r".*kallsyms.*")
    703 KERNEL_ALLSYMS_LINE_RE = re.compile(
    704   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
    705 
    706 
    707 class LibraryRepo(object):
    708   def __init__(self):
    709     self.infos = []
    710     self.names = set()
    711     self.ticks = {}
    712 
    713   def Load(self, mmap_info, code_map, options):
    714     # Skip kernel mmaps when requested using the fact that their tid
    715     # is 0.
    716     if mmap_info.tid == 0 and not options.kernel:
    717       return True
    718     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
    719       return True
    720     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
    721       return self._LoadKernelSymbols(code_map)
    722     self.infos.append(mmap_info)
    723     mmap_info.ticks = 0
    724     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
    725     if not os.path.exists(mmap_info.filename):
    726       return True
    727     # Request section headers (-h), symbols (-t), and dynamic symbols
    728     # (-T) from objdump.
    729     # Unfortunately, section headers span two lines, so we have to
    730     # keep the just seen section name (from the first line in each
    731     # section header) in the after_section variable.
    732     if mmap_info.filename.endswith(".ko"):
    733       dynamic_symbols = ""
    734     else:
    735       dynamic_symbols = "-T"
    736     process = subprocess.Popen(
    737       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
    738       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    739     pipe = process.stdout
    740     after_section = None
    741     code_sections = set()
    742     reloc_sections = set()
    743     dynamic = False
    744     try:
    745       for line in pipe:
    746         if after_section:
    747           if line.find("CODE") != -1:
    748             code_sections.add(after_section)
    749           if line.find("RELOC") != -1:
    750             reloc_sections.add(after_section)
    751           after_section = None
    752           continue
    753 
    754         match = OBJDUMP_SECTION_HEADER_RE.match(line)
    755         if match:
    756           after_section = match.group(1)
    757           continue
    758 
    759         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
    760           dynamic = True
    761           continue
    762 
    763         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
    764         if match:
    765           start_address = int(match.group(1), 16)
    766           origin_offset = start_address
    767           flags = match.group(2)
    768           section = match.group(3)
    769           if section in code_sections:
    770             if dynamic or section in reloc_sections:
    771               start_address += mmap_info.addr
    772             size = int(match.group(4), 16)
    773             name = match.group(5)
    774             origin = mmap_info.filename
    775             code_map.Add(Code(name, start_address, start_address + size,
    776                               origin, origin_offset))
    777     finally:
    778       pipe.close()
    779     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
    780 
    781   def Tick(self, pc):
    782     for i, mmap_info in enumerate(self.infos):
    783       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
    784         mmap_info.ticks += 1
    785         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
    786         return True
    787     return False
    788 
    789   def _UniqueMmapName(self, mmap_info):
    790     name = mmap_info.filename
    791     index = 1
    792     while name in self.names:
    793       name = "%s-%d" % (mmap_info.filename, index)
    794       index += 1
    795     self.names.add(name)
    796     return name
    797 
    798   def _LoadKernelSymbols(self, code_map):
    799     if not os.path.exists(KERNEL_ALLSYMS_FILE):
    800       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
    801       return False
    802     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
    803     code = None
    804     for line in kallsyms:
    805       match = KERNEL_ALLSYMS_LINE_RE.match(line)
    806       if match:
    807         start_address = int(match.group(1), 16)
    808         end_address = start_address
    809         name = match.group(2)
    810         if code:
    811           code.end_address = start_address
    812           code_map.Add(code, 16)
    813         code = Code(name, start_address, end_address, "kernel", 0)
    814     return True
    815 
    816 
    817 def PrintReport(code_map, library_repo, arch, ticks, options):
    818   print "Ticks per symbol:"
    819   used_code = [code for code in code_map.UsedCode()]
    820   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
    821   for i, code in enumerate(used_code):
    822     code_ticks = code.self_ticks
    823     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
    824                                     code.FullName(), code.origin)
    825     if options.disasm_all or i < options.disasm_top:
    826       code.PrintAnnotated(arch, options)
    827   print
    828   print "Ticks per library:"
    829   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
    830   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
    831   for mmap_info in mmap_infos:
    832     mmap_ticks = mmap_info.ticks
    833     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
    834                                mmap_info.unique_name)
    835 
    836 
    837 def PrintDot(code_map, options):
    838   print "digraph G {"
    839   for code in code_map.UsedCode():
    840     if code.self_ticks < 10:
    841       continue
    842     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
    843     if code.callee_ticks:
    844       for callee, ticks in code.callee_ticks.iteritems():
    845         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
    846   print "}"
    847 
    848 
    849 if __name__ == "__main__":
    850   parser = optparse.OptionParser(USAGE)
    851   parser.add_option("--snapshot-log",
    852                     default="obj/release/snapshot.log",
    853                     help="V8 snapshot log file name [default: %default]")
    854   parser.add_option("--log",
    855                     default="v8.log",
    856                     help="V8 log file name [default: %default]")
    857   parser.add_option("--snapshot",
    858                     default=False,
    859                     action="store_true",
    860                     help="process V8 snapshot log [default: %default]")
    861   parser.add_option("--trace",
    862                     default="perf.data",
    863                     help="perf trace file name [default: %default]")
    864   parser.add_option("--kernel",
    865                     default=False,
    866                     action="store_true",
    867                     help="process kernel entries [default: %default]")
    868   parser.add_option("--disasm-top",
    869                     default=0,
    870                     type="int",
    871                     help=("number of top symbols to disassemble and annotate "
    872                           "[default: %default]"))
    873   parser.add_option("--disasm-all",
    874                     default=False,
    875                     action="store_true",
    876                     help=("disassemble and annotate all used symbols "
    877                           "[default: %default]"))
    878   parser.add_option("--dot",
    879                     default=False,
    880                     action="store_true",
    881                     help="produce dot output (WIP) [default: %default]")
    882   parser.add_option("--quiet", "-q",
    883                     default=False,
    884                     action="store_true",
    885                     help="no auxiliary messages [default: %default]")
    886   parser.add_option("--gc-fake-mmap",
    887                     default="/tmp/__v8_gc__",
    888                     help="gc fake mmap file [default: %default]")
    889   parser.add_option("--objdump",
    890                     default="/usr/bin/objdump",
    891                     help="objdump tool to use [default: %default]")
    892   parser.add_option("--host-root",
    893                     default="",
    894                     help="Path to the host root [default: %default]")
    895   options, args = parser.parse_args()
    896 
    897   if not options.quiet:
    898     if options.snapshot:
    899       print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
    900                                         options.log,
    901                                         options.log)
    902     else:
    903       print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
    904     print "Perf trace file: %s" % options.trace
    905 
    906   V8_GC_FAKE_MMAP = options.gc_fake_mmap
    907   HOST_ROOT = options.host_root
    908   if os.path.exists(options.objdump):
    909     disasm.OBJDUMP_BIN = options.objdump
    910     OBJDUMP_BIN = options.objdump
    911   else:
    912     print "Cannot find %s, falling back to default objdump" % options.objdump
    913 
    914   # Stats.
    915   events = 0
    916   ticks = 0
    917   missed_ticks = 0
    918   really_missed_ticks = 0
    919   optimized_ticks = 0
    920   generated_ticks = 0
    921   v8_internal_ticks = 0
    922   mmap_time = 0
    923   sample_time = 0
    924 
    925   # Process the snapshot log to fill the snapshot name map.
    926   snapshot_name_map = {}
    927   if options.snapshot:
    928     snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
    929     snapshot_name_map = snapshot_log_reader.ReadNameMap()
    930 
    931   # Initialize the log reader.
    932   code_map = CodeMap()
    933   log_reader = LogReader(log_name=options.log + ".ll",
    934                          code_map=code_map,
    935                          snapshot_pos_to_name=snapshot_name_map)
    936   if not options.quiet:
    937     print "Generated code architecture: %s" % log_reader.arch
    938     print
    939     sys.stdout.flush()
    940 
    941   # Process the code and trace logs.
    942   library_repo = LibraryRepo()
    943   log_reader.ReadUpToGC()
    944   trace_reader = TraceReader(options.trace)
    945   while True:
    946     header, offset = trace_reader.ReadEventHeader()
    947     if not header:
    948       break
    949     events += 1
    950     if header.type == PERF_RECORD_MMAP:
    951       start = time.time()
    952       mmap_info = trace_reader.ReadMmap(header, offset)
    953       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
    954         log_reader.ReadUpToGC()
    955       else:
    956         library_repo.Load(mmap_info, code_map, options)
    957       mmap_time += time.time() - start
    958     elif header.type == PERF_RECORD_SAMPLE:
    959       ticks += 1
    960       start = time.time()
    961       sample = trace_reader.ReadSample(header, offset)
    962       code = code_map.Find(sample.ip)
    963       if code:
    964         code.Tick(sample.ip)
    965         if code.codetype == Code.OPTIMIZED:
    966           optimized_ticks += 1
    967         elif code.codetype == Code.FULL_CODEGEN:
    968           generated_ticks += 1
    969         elif code.codetype == Code.V8INTERNAL:
    970           v8_internal_ticks += 1
    971       else:
    972         missed_ticks += 1
    973       if not library_repo.Tick(sample.ip) and not code:
    974         really_missed_ticks += 1
    975       if trace_reader.callchain_supported:
    976         for ip in sample.ips:
    977           caller_code = code_map.Find(ip)
    978           if caller_code:
    979             if code:
    980               caller_code.CalleeTick(code)
    981             code = caller_code
    982       sample_time += time.time() - start
    983 
    984   if options.dot:
    985     PrintDot(code_map, options)
    986   else:
    987     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
    988 
    989     if not options.quiet:
    990       def PrintTicks(number, total, description):
    991         print("%10d %5.1f%% ticks in %s" %
    992               (number, 100.0*number/total, description))
    993       print
    994       print "Stats:"
    995       print "%10d total trace events" % events
    996       print "%10d total ticks" % ticks
    997       print "%10d ticks not in symbols" % missed_ticks
    998       unaccounted = "unaccounted ticks"
    999       if really_missed_ticks > 0:
   1000         unaccounted += " (probably in the kernel, try --kernel)"
   1001       PrintTicks(really_missed_ticks, ticks, unaccounted)
   1002       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
   1003       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
   1004       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
   1005       print "%10d total symbols" % len([c for c in code_map.AllCode()])
   1006       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
   1007       print "%9.2fs library processing time" % mmap_time
   1008       print "%9.2fs tick processing time" % sample_time
   1009 
   1010   log_reader.Dispose()
   1011   trace_reader.Dispose()
   1012