Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2012 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 import bisect
     31 import collections
     32 import ctypes
     33 import disasm
     34 import mmap
     35 import optparse
     36 import os
     37 import re
     38 import subprocess
     39 import sys
     40 import time
     41 
     42 
     43 USAGE="""usage: %prog [OPTION]...
     44 
     45 Analyses V8 and perf logs to produce profiles.
     46 
     47 Perf logs can be collected using a command like:
     48   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
     49   # -R: collect all data
     50   # -e cycles: use cpu-cycles event (run "perf list" for details)
     51   # -c 10000: write a sample after each 10000 events
     52   # -f: force output file overwrite
     53   # -i: limit profiling to our process and the kernel
     54   # --ll-prof shell flag enables the right V8 logs
     55 This will produce a binary trace file (perf.data) that %prog can analyse.
     56 
     57 IMPORTANT:
     58   The kernel has an internal maximum for events per second, it is 100K by
     59   default. That's not enough for "-c 10000". Set it to some higher value:
     60   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
     61   You can also make the warning about kernel address maps go away:
     62   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
     63 
     64 We have a convenience script that handles all of the above for you:
     65   $ tools/run-llprof.sh ./d8 bench.js
     66 
     67 Examples:
     68   # Print flat profile with annotated disassembly for the 10 top
     69   # symbols. Use default log names and include the snapshot log.
     70   $ %prog --snapshot --disasm-top=10
     71 
     72   # Print flat profile with annotated disassembly for all used symbols.
     73   # Use default log names and include kernel symbols into analysis.
     74   $ %prog --disasm-all --kernel
     75 
     76   # Print flat profile. Use custom log names.
     77   $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
     78 """
     79 
     80 
     81 JS_ORIGIN = "js"
     82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
     83 
     84 class Code(object):
     85   """Code object."""
     86 
     87   _id = 0
     88   UNKNOWN = 0
     89   V8INTERNAL = 1
     90   FULL_CODEGEN = 2
     91   OPTIMIZED = 3
     92 
     93   def __init__(self, name, start_address, end_address, origin, origin_offset):
     94     self.id = Code._id
     95     Code._id += 1
     96     self.name = name
     97     self.other_names = None
     98     self.start_address = start_address
     99     self.end_address = end_address
    100     self.origin = origin
    101     self.origin_offset = origin_offset
    102     self.self_ticks = 0
    103     self.self_ticks_map = None
    104     self.callee_ticks = None
    105     if name.startswith("LazyCompile:*"):
    106       self.codetype = Code.OPTIMIZED
    107     elif name.startswith("LazyCompile:"):
    108       self.codetype = Code.FULL_CODEGEN
    109     elif name.startswith("v8::internal::"):
    110       self.codetype = Code.V8INTERNAL
    111     else:
    112       self.codetype = Code.UNKNOWN
    113 
    114   def AddName(self, name):
    115     assert self.name != name
    116     if self.other_names is None:
    117       self.other_names = [name]
    118       return
    119     if not name in self.other_names:
    120       self.other_names.append(name)
    121 
    122   def FullName(self):
    123     if self.other_names is None:
    124       return self.name
    125     self.other_names.sort()
    126     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
    127 
    128   def IsUsed(self):
    129     return self.self_ticks > 0 or self.callee_ticks is not None
    130 
    131   def Tick(self, pc):
    132     self.self_ticks += 1
    133     if self.self_ticks_map is None:
    134       self.self_ticks_map = collections.defaultdict(lambda: 0)
    135     offset = pc - self.start_address
    136     self.self_ticks_map[offset] += 1
    137 
    138   def CalleeTick(self, callee):
    139     if self.callee_ticks is None:
    140       self.callee_ticks = collections.defaultdict(lambda: 0)
    141     self.callee_ticks[callee] += 1
    142 
    143   def PrintAnnotated(self, arch, options):
    144     if self.self_ticks_map is None:
    145       ticks_map = []
    146     else:
    147       ticks_map = self.self_ticks_map.items()
    148     # Convert the ticks map to offsets and counts arrays so that later
    149     # we can do binary search in the offsets array.
    150     ticks_map.sort(key=lambda t: t[0])
    151     ticks_offsets = [t[0] for t in ticks_map]
    152     ticks_counts = [t[1] for t in ticks_map]
    153     # Get a list of disassembled lines and their addresses.
    154     lines = self._GetDisasmLines(arch, options)
    155     if len(lines) == 0:
    156       return
    157     # Print annotated lines.
    158     address = lines[0][0]
    159     total_count = 0
    160     for i in xrange(len(lines)):
    161       start_offset = lines[i][0] - address
    162       if i == len(lines) - 1:
    163         end_offset = self.end_address - self.start_address
    164       else:
    165         end_offset = lines[i + 1][0] - address
    166       # Ticks (reported pc values) are not always precise, i.e. not
    167       # necessarily point at instruction starts. So we have to search
    168       # for ticks that touch the current instruction line.
    169       j = bisect.bisect_left(ticks_offsets, end_offset)
    170       count = 0
    171       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
    172         if offset < start_offset:
    173           break
    174         count += cnt
    175       total_count += count
    176       count = 100.0 * count / self.self_ticks
    177       if count >= 0.01:
    178         print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
    179       else:
    180         print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
    181     print
    182     assert total_count == self.self_ticks, \
    183         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
    184 
    185   def __str__(self):
    186     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
    187       self.name,
    188       self.start_address,
    189       self.end_address,
    190       self.end_address - self.start_address,
    191       self.origin)
    192 
    193   def _GetDisasmLines(self, arch, options):
    194     if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
    195       inplace = False
    196       filename = options.log + ".ll"
    197     else:
    198       inplace = True
    199       filename = self.origin
    200     return disasm.GetDisasmLines(filename,
    201                                  self.origin_offset,
    202                                  self.end_address - self.start_address,
    203                                  arch,
    204                                  inplace)
    205 
    206 
    207 class CodePage(object):
    208   """Group of adjacent code objects."""
    209 
    210   SHIFT = 20  # 1M pages
    211   SIZE = (1 << SHIFT)
    212   MASK = ~(SIZE - 1)
    213 
    214   @staticmethod
    215   def PageAddress(address):
    216     return address & CodePage.MASK
    217 
    218   @staticmethod
    219   def PageId(address):
    220     return address >> CodePage.SHIFT
    221 
    222   @staticmethod
    223   def PageAddressFromId(id):
    224     return id << CodePage.SHIFT
    225 
    226   def __init__(self, address):
    227     self.address = address
    228     self.code_objects = []
    229 
    230   def Add(self, code):
    231     self.code_objects.append(code)
    232 
    233   def Remove(self, code):
    234     self.code_objects.remove(code)
    235 
    236   def Find(self, pc):
    237     code_objects = self.code_objects
    238     for i, code in enumerate(code_objects):
    239       if code.start_address <= pc < code.end_address:
    240         code_objects[0], code_objects[i] = code, code_objects[0]
    241         return code
    242     return None
    243 
    244   def __iter__(self):
    245     return self.code_objects.__iter__()
    246 
    247 
    248 class CodeMap(object):
    249   """Code object map."""
    250 
    251   def __init__(self):
    252     self.pages = {}
    253     self.min_address = 1 << 64
    254     self.max_address = -1
    255 
    256   def Add(self, code, max_pages=-1):
    257     page_id = CodePage.PageId(code.start_address)
    258     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    259     pages = 0
    260     while page_id < limit_id:
    261       if max_pages >= 0 and pages > max_pages:
    262         print >>sys.stderr, \
    263             "Warning: page limit (%d) reached for %s [%s]" % (
    264             max_pages, code.name, code.origin)
    265         break
    266       if page_id in self.pages:
    267         page = self.pages[page_id]
    268       else:
    269         page = CodePage(CodePage.PageAddressFromId(page_id))
    270         self.pages[page_id] = page
    271       page.Add(code)
    272       page_id += 1
    273       pages += 1
    274     self.min_address = min(self.min_address, code.start_address)
    275     self.max_address = max(self.max_address, code.end_address)
    276 
    277   def Remove(self, code):
    278     page_id = CodePage.PageId(code.start_address)
    279     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
    280     removed = False
    281     while page_id < limit_id:
    282       if page_id not in self.pages:
    283         page_id += 1
    284         continue
    285       page = self.pages[page_id]
    286       page.Remove(code)
    287       removed = True
    288       page_id += 1
    289     return removed
    290 
    291   def AllCode(self):
    292     for page in self.pages.itervalues():
    293       for code in page:
    294         if CodePage.PageAddress(code.start_address) == page.address:
    295           yield code
    296 
    297   def UsedCode(self):
    298     for code in self.AllCode():
    299       if code.IsUsed():
    300         yield code
    301 
    302   def Print(self):
    303     for code in self.AllCode():
    304       print code
    305 
    306   def Find(self, pc):
    307     if pc < self.min_address or pc >= self.max_address:
    308       return None
    309     page_id = CodePage.PageId(pc)
    310     if page_id not in self.pages:
    311       return None
    312     return self.pages[page_id].Find(pc)
    313 
    314 
    315 class CodeInfo(object):
    316   """Generic info about generated code objects."""
    317 
    318   def __init__(self, arch, header_size):
    319     self.arch = arch
    320     self.header_size = header_size
    321 
    322 
    323 class SnapshotLogReader(object):
    324   """V8 snapshot log reader."""
    325 
    326   _SNAPSHOT_CODE_NAME_RE = re.compile(
    327     r"snapshot-code-name,(\d+),\"(.*)\"")
    328 
    329   def __init__(self, log_name):
    330     self.log_name = log_name
    331 
    332   def ReadNameMap(self):
    333     log = open(self.log_name, "r")
    334     try:
    335       snapshot_pos_to_name = {}
    336       for line in log:
    337         match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
    338         if match:
    339           pos = int(match.group(1))
    340           name = match.group(2)
    341           snapshot_pos_to_name[pos] = name
    342     finally:
    343       log.close()
    344     return snapshot_pos_to_name
    345 
    346 
    347 class LogReader(object):
    348   """V8 low-level (binary) log reader."""
    349 
    350   _ARCH_TO_POINTER_TYPE_MAP = {
    351     "ia32": ctypes.c_uint32,
    352     "arm": ctypes.c_uint32,
    353     "mips": ctypes.c_uint32,
    354     "x64": ctypes.c_uint64,
    355     "arm64": ctypes.c_uint64
    356   }
    357 
    358   _CODE_CREATE_TAG = "C"
    359   _CODE_MOVE_TAG = "M"
    360   _CODE_DELETE_TAG = "D"
    361   _SNAPSHOT_POSITION_TAG = "P"
    362   _CODE_MOVING_GC_TAG = "G"
    363 
    364   def __init__(self, log_name, code_map, snapshot_pos_to_name):
    365     self.log_file = open(log_name, "r")
    366     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
    367     self.log_pos = 0
    368     self.code_map = code_map
    369     self.snapshot_pos_to_name = snapshot_pos_to_name
    370     self.address_to_snapshot_name = {}
    371 
    372     self.arch = self.log[:self.log.find("\0")]
    373     self.log_pos += len(self.arch) + 1
    374     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
    375         "Unsupported architecture %s" % self.arch
    376     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
    377 
    378     self.code_create_struct = LogReader._DefineStruct([
    379         ("name_size", ctypes.c_int32),
    380         ("code_address", pointer_type),
    381         ("code_size", ctypes.c_int32)])
    382 
    383     self.code_move_struct = LogReader._DefineStruct([
    384         ("from_address", pointer_type),
    385         ("to_address", pointer_type)])
    386 
    387     self.code_delete_struct = LogReader._DefineStruct([
    388         ("address", pointer_type)])
    389 
    390     self.snapshot_position_struct = LogReader._DefineStruct([
    391         ("address", pointer_type),
    392         ("position", ctypes.c_int32)])
    393 
    394   def ReadUpToGC(self):
    395     while self.log_pos < self.log.size():
    396       tag = self.log[self.log_pos]
    397       self.log_pos += 1
    398 
    399       if tag == LogReader._CODE_MOVING_GC_TAG:
    400         self.address_to_snapshot_name.clear()
    401         return
    402 
    403       if tag == LogReader._CODE_CREATE_TAG:
    404         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
    405         self.log_pos += ctypes.sizeof(event)
    406         start_address = event.code_address
    407         end_address = start_address + event.code_size
    408         if start_address in self.address_to_snapshot_name:
    409           name = self.address_to_snapshot_name[start_address]
    410           origin = JS_SNAPSHOT_ORIGIN
    411         else:
    412           name = self.log[self.log_pos:self.log_pos + event.name_size]
    413           origin = JS_ORIGIN
    414         self.log_pos += event.name_size
    415         origin_offset = self.log_pos
    416         self.log_pos += event.code_size
    417         code = Code(name, start_address, end_address, origin, origin_offset)
    418         conficting_code = self.code_map.Find(start_address)
    419         if conficting_code:
    420           if not (conficting_code.start_address == code.start_address and
    421             conficting_code.end_address == code.end_address):
    422             self.code_map.Remove(conficting_code)
    423           else:
    424             LogReader._HandleCodeConflict(conficting_code, code)
    425             # TODO(vitalyr): this warning is too noisy because of our
    426             # attempts to reconstruct code log from the snapshot.
    427             # print >>sys.stderr, \
    428             #     "Warning: Skipping duplicate code log entry %s" % code
    429             continue
    430         self.code_map.Add(code)
    431         continue
    432 
    433       if tag == LogReader._CODE_MOVE_TAG:
    434         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
    435         self.log_pos += ctypes.sizeof(event)
    436         old_start_address = event.from_address
    437         new_start_address = event.to_address
    438         if old_start_address == new_start_address:
    439           # Skip useless code move entries.
    440           continue
    441         code = self.code_map.Find(old_start_address)
    442         if not code:
    443           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    444           continue
    445         assert code.start_address == old_start_address, \
    446             "Inexact move address %x for %s" % (old_start_address, code)
    447         self.code_map.Remove(code)
    448         size = code.end_address - code.start_address
    449         code.start_address = new_start_address
    450         code.end_address = new_start_address + size
    451         self.code_map.Add(code)
    452         continue
    453 
    454       if tag == LogReader._CODE_DELETE_TAG:
    455         event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
    456         self.log_pos += ctypes.sizeof(event)
    457         old_start_address = event.address
    458         code = self.code_map.Find(old_start_address)
    459         if not code:
    460           print >>sys.stderr, "Warning: Not found %x" % old_start_address
    461           continue
    462         assert code.start_address == old_start_address, \
    463             "Inexact delete address %x for %s" % (old_start_address, code)
    464         self.code_map.Remove(code)
    465         continue
    466 
    467       if tag == LogReader._SNAPSHOT_POSITION_TAG:
    468         event = self.snapshot_position_struct.from_buffer(self.log,
    469                                                           self.log_pos)
    470         self.log_pos += ctypes.sizeof(event)
    471         start_address = event.address
    472         snapshot_pos = event.position
    473         if snapshot_pos in self.snapshot_pos_to_name:
    474           self.address_to_snapshot_name[start_address] = \
    475               self.snapshot_pos_to_name[snapshot_pos]
    476         continue
    477 
    478       assert False, "Unknown tag %s" % tag
    479 
    480   def Dispose(self):
    481     self.log.close()
    482     self.log_file.close()
    483 
    484   @staticmethod
    485   def _DefineStruct(fields):
    486     class Struct(ctypes.Structure):
    487       _fields_ = fields
    488     return Struct
    489 
    490   @staticmethod
    491   def _HandleCodeConflict(old_code, new_code):
    492     assert (old_code.start_address == new_code.start_address and
    493             old_code.end_address == new_code.end_address), \
    494         "Conficting code log entries %s and %s" % (old_code, new_code)
    495     if old_code.name == new_code.name:
    496       return
    497     # Code object may be shared by a few functions. Collect the full
    498     # set of names.
    499     old_code.AddName(new_code.name)
    500 
    501 
    502 class Descriptor(object):
    503   """Descriptor of a structure in the binary trace log."""
    504 
    505   CTYPE_MAP = {
    506     "u16": ctypes.c_uint16,
    507     "u32": ctypes.c_uint32,
    508     "u64": ctypes.c_uint64
    509   }
    510 
    511   def __init__(self, fields):
    512     class TraceItem(ctypes.Structure):
    513       _fields_ = Descriptor.CtypesFields(fields)
    514 
    515       def __str__(self):
    516         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
    517                          for field, _ in TraceItem._fields_)
    518 
    519     self.ctype = TraceItem
    520 
    521   def Read(self, trace, offset):
    522     return self.ctype.from_buffer(trace, offset)
    523 
    524   @staticmethod
    525   def CtypesFields(fields):
    526     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
    527 
    528 
    529 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
    530 # for the gory details.
    531 
    532 
    533 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
    534 TRACE_HEADER_DESC = Descriptor([
    535   ("magic", "u64"),
    536   ("size", "u64"),
    537   ("attr_size", "u64"),
    538   ("attrs_offset", "u64"),
    539   ("attrs_size", "u64"),
    540   ("data_offset", "u64"),
    541   ("data_size", "u64"),
    542   ("event_types_offset", "u64"),
    543   ("event_types_size", "u64")
    544 ])
    545 
    546 
    547 # Reference: /usr/include/linux/perf_event.h
    548 PERF_EVENT_ATTR_DESC = Descriptor([
    549   ("type", "u32"),
    550   ("size", "u32"),
    551   ("config", "u64"),
    552   ("sample_period_or_freq", "u64"),
    553   ("sample_type", "u64"),
    554   ("read_format", "u64"),
    555   ("flags", "u64"),
    556   ("wakeup_events_or_watermark", "u32"),
    557   ("bp_type", "u32"),
    558   ("bp_addr", "u64"),
    559   ("bp_len", "u64")
    560 ])
    561 
    562 
    563 # Reference: /usr/include/linux/perf_event.h
    564 PERF_EVENT_HEADER_DESC = Descriptor([
    565   ("type", "u32"),
    566   ("misc", "u16"),
    567   ("size", "u16")
    568 ])
    569 
    570 
    571 # Reference: kernel/events/core.c
    572 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
    573   ("pid", "u32"),
    574   ("tid", "u32"),
    575   ("addr", "u64"),
    576   ("len", "u64"),
    577   ("pgoff", "u64")
    578 ])
    579 
    580 
    581 # perf_event_attr.sample_type bits control the set of
    582 # perf_sample_event fields.
    583 PERF_SAMPLE_IP = 1 << 0
    584 PERF_SAMPLE_TID = 1 << 1
    585 PERF_SAMPLE_TIME = 1 << 2
    586 PERF_SAMPLE_ADDR = 1 << 3
    587 PERF_SAMPLE_READ = 1 << 4
    588 PERF_SAMPLE_CALLCHAIN = 1 << 5
    589 PERF_SAMPLE_ID = 1 << 6
    590 PERF_SAMPLE_CPU = 1 << 7
    591 PERF_SAMPLE_PERIOD = 1 << 8
    592 PERF_SAMPLE_STREAM_ID = 1 << 9
    593 PERF_SAMPLE_RAW = 1 << 10
    594 
    595 
    596 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
    597 PERF_SAMPLE_EVENT_BODY_FIELDS = [
    598   ("ip", "u64", PERF_SAMPLE_IP),
    599   ("pid", "u32", PERF_SAMPLE_TID),
    600   ("tid", "u32", PERF_SAMPLE_TID),
    601   ("time", "u64", PERF_SAMPLE_TIME),
    602   ("addr", "u64", PERF_SAMPLE_ADDR),
    603   ("id", "u64", PERF_SAMPLE_ID),
    604   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
    605   ("cpu", "u32", PERF_SAMPLE_CPU),
    606   ("res", "u32", PERF_SAMPLE_CPU),
    607   ("period", "u64", PERF_SAMPLE_PERIOD),
    608   # Don't want to handle read format that comes after the period and
    609   # before the callchain and has variable size.
    610   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
    611   # Raw data follows the callchain and is ignored.
    612 ]
    613 
    614 
    615 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
    616 
    617 
    618 PERF_RECORD_MMAP = 1
    619 PERF_RECORD_SAMPLE = 9
    620 
    621 
    622 class TraceReader(object):
    623   """Perf (linux-2.6/tools/perf) trace file reader."""
    624 
    625   _TRACE_HEADER_MAGIC = 4993446653023372624
    626 
    627   def __init__(self, trace_name):
    628     self.trace_file = open(trace_name, "r")
    629     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
    630     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
    631     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
    632       print >>sys.stderr, "Warning: unsupported trace header magic"
    633     self.offset = self.trace_header.data_offset
    634     self.limit = self.trace_header.data_offset + self.trace_header.data_size
    635     assert self.limit <= self.trace.size(), \
    636         "Trace data limit exceeds trace file size"
    637     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
    638     assert self.trace_header.attrs_size != 0, \
    639         "No perf event attributes found in the trace"
    640     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
    641                                                 self.trace_header.attrs_offset)
    642     self.sample_event_body_desc = self._SampleEventBodyDesc(
    643         perf_event_attr.sample_type)
    644     self.callchain_supported = \
    645         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
    646     if self.callchain_supported:
    647       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
    648       self.ip_size = ctypes.sizeof(self.ip_struct)
    649 
    650   def ReadEventHeader(self):
    651     if self.offset >= self.limit:
    652       return None, 0
    653     offset = self.offset
    654     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
    655     self.offset += header.size
    656     return header, offset
    657 
    658   def ReadMmap(self, header, offset):
    659     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
    660                                                offset + self.header_size)
    661     # Read null-terminated filename.
    662     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
    663                           offset + header.size]
    664     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
    665     return mmap_info
    666 
    667   def ReadSample(self, header, offset):
    668     sample = self.sample_event_body_desc.Read(self.trace,
    669                                               offset + self.header_size)
    670     if not self.callchain_supported:
    671       return sample
    672     sample.ips = []
    673     offset += self.header_size + ctypes.sizeof(sample)
    674     for _ in xrange(sample.nr):
    675       sample.ips.append(
    676         self.ip_struct.from_buffer(self.trace, offset).value)
    677       offset += self.ip_size
    678     return sample
    679 
    680   def Dispose(self):
    681     self.trace.close()
    682     self.trace_file.close()
    683 
    684   def _SampleEventBodyDesc(self, sample_type):
    685     assert (sample_type & PERF_SAMPLE_READ) == 0, \
    686            "Can't hande read format in samples"
    687     fields = [(field, format)
    688               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
    689               if (bit & sample_type) != 0]
    690     return Descriptor(fields)
    691 
    692 
    693 OBJDUMP_SECTION_HEADER_RE = re.compile(
    694   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
    695 OBJDUMP_SYMBOL_LINE_RE = re.compile(
    696   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
    697 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
    698   r"^DYNAMIC SYMBOL TABLE")
    699 OBJDUMP_SKIP_RE = re.compile(
    700   r"^.*ld\.so\.cache$")
    701 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
    702 PERF_KERNEL_ALLSYMS_RE = re.compile(
    703   r".*kallsyms.*")
    704 KERNEL_ALLSYMS_LINE_RE = re.compile(
    705   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
    706 
    707 
    708 class LibraryRepo(object):
    709   def __init__(self):
    710     self.infos = []
    711     self.names = set()
    712     self.ticks = {}
    713 
    714   def Load(self, mmap_info, code_map, options):
    715     # Skip kernel mmaps when requested using the fact that their tid
    716     # is 0.
    717     if mmap_info.tid == 0 and not options.kernel:
    718       return True
    719     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
    720       return True
    721     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
    722       return self._LoadKernelSymbols(code_map)
    723     self.infos.append(mmap_info)
    724     mmap_info.ticks = 0
    725     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
    726     if not os.path.exists(mmap_info.filename):
    727       return True
    728     # Request section headers (-h), symbols (-t), and dynamic symbols
    729     # (-T) from objdump.
    730     # Unfortunately, section headers span two lines, so we have to
    731     # keep the just seen section name (from the first line in each
    732     # section header) in the after_section variable.
    733     if mmap_info.filename.endswith(".ko"):
    734       dynamic_symbols = ""
    735     else:
    736       dynamic_symbols = "-T"
    737     process = subprocess.Popen(
    738       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
    739       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    740     pipe = process.stdout
    741     after_section = None
    742     code_sections = set()
    743     reloc_sections = set()
    744     dynamic = False
    745     try:
    746       for line in pipe:
    747         if after_section:
    748           if line.find("CODE") != -1:
    749             code_sections.add(after_section)
    750           if line.find("RELOC") != -1:
    751             reloc_sections.add(after_section)
    752           after_section = None
    753           continue
    754 
    755         match = OBJDUMP_SECTION_HEADER_RE.match(line)
    756         if match:
    757           after_section = match.group(1)
    758           continue
    759 
    760         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
    761           dynamic = True
    762           continue
    763 
    764         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
    765         if match:
    766           start_address = int(match.group(1), 16)
    767           origin_offset = start_address
    768           flags = match.group(2)
    769           section = match.group(3)
    770           if section in code_sections:
    771             if dynamic or section in reloc_sections:
    772               start_address += mmap_info.addr
    773             size = int(match.group(4), 16)
    774             name = match.group(5)
    775             origin = mmap_info.filename
    776             code_map.Add(Code(name, start_address, start_address + size,
    777                               origin, origin_offset))
    778     finally:
    779       pipe.close()
    780     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
    781 
    782   def Tick(self, pc):
    783     for i, mmap_info in enumerate(self.infos):
    784       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
    785         mmap_info.ticks += 1
    786         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
    787         return True
    788     return False
    789 
    790   def _UniqueMmapName(self, mmap_info):
    791     name = mmap_info.filename
    792     index = 1
    793     while name in self.names:
    794       name = "%s-%d" % (mmap_info.filename, index)
    795       index += 1
    796     self.names.add(name)
    797     return name
    798 
    799   def _LoadKernelSymbols(self, code_map):
    800     if not os.path.exists(KERNEL_ALLSYMS_FILE):
    801       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
    802       return False
    803     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
    804     code = None
    805     for line in kallsyms:
    806       match = KERNEL_ALLSYMS_LINE_RE.match(line)
    807       if match:
    808         start_address = int(match.group(1), 16)
    809         end_address = start_address
    810         name = match.group(2)
    811         if code:
    812           code.end_address = start_address
    813           code_map.Add(code, 16)
    814         code = Code(name, start_address, end_address, "kernel", 0)
    815     return True
    816 
    817 
    818 def PrintReport(code_map, library_repo, arch, ticks, options):
    819   print "Ticks per symbol:"
    820   used_code = [code for code in code_map.UsedCode()]
    821   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
    822   for i, code in enumerate(used_code):
    823     code_ticks = code.self_ticks
    824     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
    825                                     code.FullName(), code.origin)
    826     if options.disasm_all or i < options.disasm_top:
    827       code.PrintAnnotated(arch, options)
    828   print
    829   print "Ticks per library:"
    830   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
    831   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
    832   for mmap_info in mmap_infos:
    833     mmap_ticks = mmap_info.ticks
    834     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
    835                                mmap_info.unique_name)
    836 
    837 
    838 def PrintDot(code_map, options):
    839   print "digraph G {"
    840   for code in code_map.UsedCode():
    841     if code.self_ticks < 10:
    842       continue
    843     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
    844     if code.callee_ticks:
    845       for callee, ticks in code.callee_ticks.iteritems():
    846         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
    847   print "}"
    848 
    849 
    850 if __name__ == "__main__":
    851   parser = optparse.OptionParser(USAGE)
    852   parser.add_option("--snapshot-log",
    853                     default="obj/release/snapshot.log",
    854                     help="V8 snapshot log file name [default: %default]")
    855   parser.add_option("--log",
    856                     default="v8.log",
    857                     help="V8 log file name [default: %default]")
    858   parser.add_option("--snapshot",
    859                     default=False,
    860                     action="store_true",
    861                     help="process V8 snapshot log [default: %default]")
    862   parser.add_option("--trace",
    863                     default="perf.data",
    864                     help="perf trace file name [default: %default]")
    865   parser.add_option("--kernel",
    866                     default=False,
    867                     action="store_true",
    868                     help="process kernel entries [default: %default]")
    869   parser.add_option("--disasm-top",
    870                     default=0,
    871                     type="int",
    872                     help=("number of top symbols to disassemble and annotate "
    873                           "[default: %default]"))
    874   parser.add_option("--disasm-all",
    875                     default=False,
    876                     action="store_true",
    877                     help=("disassemble and annotate all used symbols "
    878                           "[default: %default]"))
    879   parser.add_option("--dot",
    880                     default=False,
    881                     action="store_true",
    882                     help="produce dot output (WIP) [default: %default]")
    883   parser.add_option("--quiet", "-q",
    884                     default=False,
    885                     action="store_true",
    886                     help="no auxiliary messages [default: %default]")
    887   parser.add_option("--gc-fake-mmap",
    888                     default="/tmp/__v8_gc__",
    889                     help="gc fake mmap file [default: %default]")
    890   parser.add_option("--objdump",
    891                     default="/usr/bin/objdump",
    892                     help="objdump tool to use [default: %default]")
    893   parser.add_option("--host-root",
    894                     default="",
    895                     help="Path to the host root [default: %default]")
    896   options, args = parser.parse_args()
    897 
    898   if not options.quiet:
    899     if options.snapshot:
    900       print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
    901                                         options.log,
    902                                         options.log)
    903     else:
    904       print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
    905     print "Perf trace file: %s" % options.trace
    906 
    907   V8_GC_FAKE_MMAP = options.gc_fake_mmap
    908   HOST_ROOT = options.host_root
    909   if os.path.exists(options.objdump):
    910     disasm.OBJDUMP_BIN = options.objdump
    911     OBJDUMP_BIN = options.objdump
    912   else:
    913     print "Cannot find %s, falling back to default objdump" % options.objdump
    914 
    915   # Stats.
    916   events = 0
    917   ticks = 0
    918   missed_ticks = 0
    919   really_missed_ticks = 0
    920   optimized_ticks = 0
    921   generated_ticks = 0
    922   v8_internal_ticks = 0
    923   mmap_time = 0
    924   sample_time = 0
    925 
    926   # Process the snapshot log to fill the snapshot name map.
    927   snapshot_name_map = {}
    928   if options.snapshot:
    929     snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
    930     snapshot_name_map = snapshot_log_reader.ReadNameMap()
    931 
    932   # Initialize the log reader.
    933   code_map = CodeMap()
    934   log_reader = LogReader(log_name=options.log + ".ll",
    935                          code_map=code_map,
    936                          snapshot_pos_to_name=snapshot_name_map)
    937   if not options.quiet:
    938     print "Generated code architecture: %s" % log_reader.arch
    939     print
    940     sys.stdout.flush()
    941 
    942   # Process the code and trace logs.
    943   library_repo = LibraryRepo()
    944   log_reader.ReadUpToGC()
    945   trace_reader = TraceReader(options.trace)
    946   while True:
    947     header, offset = trace_reader.ReadEventHeader()
    948     if not header:
    949       break
    950     events += 1
    951     if header.type == PERF_RECORD_MMAP:
    952       start = time.time()
    953       mmap_info = trace_reader.ReadMmap(header, offset)
    954       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
    955         log_reader.ReadUpToGC()
    956       else:
    957         library_repo.Load(mmap_info, code_map, options)
    958       mmap_time += time.time() - start
    959     elif header.type == PERF_RECORD_SAMPLE:
    960       ticks += 1
    961       start = time.time()
    962       sample = trace_reader.ReadSample(header, offset)
    963       code = code_map.Find(sample.ip)
    964       if code:
    965         code.Tick(sample.ip)
    966         if code.codetype == Code.OPTIMIZED:
    967           optimized_ticks += 1
    968         elif code.codetype == Code.FULL_CODEGEN:
    969           generated_ticks += 1
    970         elif code.codetype == Code.V8INTERNAL:
    971           v8_internal_ticks += 1
    972       else:
    973         missed_ticks += 1
    974       if not library_repo.Tick(sample.ip) and not code:
    975         really_missed_ticks += 1
    976       if trace_reader.callchain_supported:
    977         for ip in sample.ips:
    978           caller_code = code_map.Find(ip)
    979           if caller_code:
    980             if code:
    981               caller_code.CalleeTick(code)
    982             code = caller_code
    983       sample_time += time.time() - start
    984 
    985   if options.dot:
    986     PrintDot(code_map, options)
    987   else:
    988     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
    989 
    990     if not options.quiet:
    991       def PrintTicks(number, total, description):
    992         print("%10d %5.1f%% ticks in %s" %
    993               (number, 100.0*number/total, description))
    994       print
    995       print "Stats:"
    996       print "%10d total trace events" % events
    997       print "%10d total ticks" % ticks
    998       print "%10d ticks not in symbols" % missed_ticks
    999       unaccounted = "unaccounted ticks"
   1000       if really_missed_ticks > 0:
   1001         unaccounted += " (probably in the kernel, try --kernel)"
   1002       PrintTicks(really_missed_ticks, ticks, unaccounted)
   1003       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
   1004       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
   1005       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
   1006       print "%10d total symbols" % len([c for c in code_map.AllCode()])
   1007       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
   1008       print "%9.2fs library processing time" % mmap_time
   1009       print "%9.2fs tick processing time" % sample_time
   1010 
   1011   log_reader.Dispose()
   1012   trace_reader.Dispose()
   1013