Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2010 the V8 project authors. All rights reserved.
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 #       notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 #       copyright notice, this list of conditions and the following
     12 #       disclaimer in the documentation and/or other materials provided
     13 #       with the distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 #       contributors may be used to endorse or promote products derived
     16 #       from this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 #
     30 
     31 #
     32 # This is an utility for plotting charts based on GC traces produced by V8 when
     33 # run with flags --trace-gc --trace-gc-nvp. Relies on gnuplot for actual
     34 # plotting.
     35 #
     36 # Usage: gc-nvp-trace-processor.py <GC-trace-filename>
     37 #
     38 
     39 
     40 from __future__ import with_statement
     41 import sys, types, re, subprocess, math
     42 
     43 def flatten(l):
     44   flat = []
     45   for i in l: flat.extend(i)
     46   return flat
     47 
     48 def split_nvp(s):
     49   t = {}
     50   for (name, value) in re.findall(r"(\w+)=([-\w]+)", s):
     51     try:
     52       t[name] = int(value)
     53     except ValueError:
     54       t[name] = value
     55 
     56   return t
     57 
     58 def parse_gc_trace(input):
     59   trace = []
     60   with open(input) as f:
     61     for line in f:
     62       info = split_nvp(line)
     63       if info and 'pause' in info and info['pause'] > 0:
     64         info['i'] = len(trace)
     65         trace.append(info)
     66   return trace
     67 
     68 def extract_field_names(script):
     69   fields = { 'data': true, 'in': true }
     70 
     71   for m in re.finditer(r"$(\w+)", script):
     72     field_name = m.group(1)
     73     if field_name not in fields:
     74       fields[field] = field_count
     75       field_count = field_count + 1
     76 
     77   return fields
     78 
     79 def gnuplot(script):
     80   gnuplot = subprocess.Popen(["gnuplot"], stdin=subprocess.PIPE)
     81   gnuplot.stdin.write(script)
     82   gnuplot.stdin.close()
     83   gnuplot.wait()
     84 
     85 x1y1 = 'x1y1'
     86 x1y2 = 'x1y2'
     87 x2y1 = 'x2y1'
     88 x2y2 = 'x2y2'
     89 
     90 class Item(object):
     91   def __init__(self, title, field, axis = x1y1, **keywords):
     92     self.title = title
     93     self.axis = axis
     94     self.props = keywords
     95     if type(field) is types.ListType:
     96       self.field = field
     97     else:
     98       self.field = [field]
     99 
    100   def fieldrefs(self):
    101     return self.field
    102 
    103   def to_gnuplot(self, context):
    104     args = ['"%s"' % context.datafile,
    105             'using %s' % context.format_fieldref(self.field),
    106             'title "%s"' % self.title,
    107             'axis %s' % self.axis]
    108     if 'style' in self.props:
    109       args.append('with %s' % self.props['style'])
    110     if 'lc' in self.props:
    111       args.append('lc rgb "%s"' % self.props['lc'])
    112     if 'fs' in self.props:
    113       args.append('fs %s' % self.props['fs'])
    114     return ' '.join(args)
    115 
    116 class Plot(object):
    117   def __init__(self, *items):
    118     self.items = items
    119 
    120   def fieldrefs(self):
    121     return flatten([item.fieldrefs() for item in self.items])
    122 
    123   def to_gnuplot(self, ctx):
    124     return 'plot ' + ', '.join([item.to_gnuplot(ctx) for item in self.items])
    125 
    126 class Set(object):
    127   def __init__(self, value):
    128     self.value = value
    129 
    130   def to_gnuplot(self, ctx):
    131     return 'set ' + self.value
    132 
    133   def fieldrefs(self):
    134     return []
    135 
    136 class Context(object):
    137   def __init__(self, datafile, field_to_index):
    138     self.datafile = datafile
    139     self.field_to_index = field_to_index
    140 
    141   def format_fieldref(self, fieldref):
    142     return ':'.join([str(self.field_to_index[field]) for field in fieldref])
    143 
    144 def collect_fields(plot):
    145   field_to_index = {}
    146   fields = []
    147 
    148   def add_field(field):
    149     if field not in field_to_index:
    150       fields.append(field)
    151       field_to_index[field] = len(fields)
    152 
    153   for field in flatten([item.fieldrefs() for item in plot]):
    154     add_field(field)
    155 
    156   return (fields, field_to_index)
    157 
    158 def is_y2_used(plot):
    159   for subplot in plot:
    160     if isinstance(subplot, Plot):
    161       for item in subplot.items:
    162         if item.axis == x1y2 or item.axis == x2y2:
    163           return True
    164   return False
    165 
    166 def get_field(trace_line, field):
    167   t = type(field)
    168   if t is types.StringType:
    169     return trace_line[field]
    170   elif t is types.FunctionType:
    171     return field(trace_line)
    172 
    173 def generate_datafile(datafile_name, trace, fields):
    174   with open(datafile_name, 'w') as datafile:
    175     for line in trace:
    176       data_line = [str(get_field(line, field)) for field in fields]
    177       datafile.write('\t'.join(data_line))
    178       datafile.write('\n')
    179 
    180 def generate_script_and_datafile(plot, trace, datafile, output):
    181   (fields, field_to_index) = collect_fields(plot)
    182   generate_datafile(datafile, trace, fields)
    183   script = [
    184       'set terminal png',
    185       'set output "%s"' % output,
    186       'set autoscale',
    187       'set ytics nomirror',
    188       'set xtics nomirror',
    189       'set key below'
    190   ]
    191 
    192   if is_y2_used(plot):
    193     script.append('set autoscale y2')
    194     script.append('set y2tics')
    195 
    196   context = Context(datafile, field_to_index)
    197 
    198   for item in plot:
    199     script.append(item.to_gnuplot(context))
    200 
    201   return '\n'.join(script)
    202 
    203 def plot_all(plots, trace, prefix):
    204   charts = []
    205 
    206   for plot in plots:
    207     outfilename = "%s_%d.png" % (prefix, len(charts))
    208     charts.append(outfilename)
    209     script = generate_script_and_datafile(plot, trace, '~datafile', outfilename)
    210     print 'Plotting %s...' % outfilename
    211     gnuplot(script)
    212 
    213   return charts
    214 
    215 def reclaimed_bytes(row):
    216   return row['total_size_before'] - row['total_size_after']
    217 
    218 def other_scope(r):
    219   if r['gc'] == 's':
    220     # there is no 'other' scope for scavenging collections.
    221     return 0
    222   return r['pause'] - r['mark'] - r['sweep'] - r['external']
    223 
    224 def scavenge_scope(r):
    225   if r['gc'] == 's':
    226     return r['pause'] - r['external']
    227   return 0
    228 
    229 
    230 def real_mutator(r):
    231   return r['mutator'] - r['stepstook']
    232 
    233 plots = [
    234   [
    235     Set('style fill solid 0.5 noborder'),
    236     Set('style histogram rowstacked'),
    237     Set('style data histograms'),
    238     Plot(Item('Scavenge', scavenge_scope, lc = 'green'),
    239          Item('Marking', 'mark', lc = 'purple'),
    240          Item('Sweep', 'sweep', lc = 'blue'),
    241          Item('External', 'external', lc = '#489D43'),
    242          Item('Other', other_scope, lc = 'grey'),
    243          Item('IGC Steps', 'stepstook', lc = '#FF6347'))
    244   ],
    245   [
    246     Set('style fill solid 0.5 noborder'),
    247     Set('style histogram rowstacked'),
    248     Set('style data histograms'),
    249     Plot(Item('Scavenge', scavenge_scope, lc = 'green'),
    250          Item('Marking', 'mark', lc = 'purple'),
    251          Item('Sweep', 'sweep', lc = 'blue'),
    252          Item('External', 'external', lc = '#489D43'),
    253          Item('Other', other_scope, lc = '#ADD8E6'),
    254          Item('External', 'external', lc = '#D3D3D3'))
    255   ],
    256 
    257   [
    258     Plot(Item('Mutator', real_mutator, lc = 'black', style = 'lines'))
    259   ],
    260   [
    261     Set('style histogram rowstacked'),
    262     Set('style data histograms'),
    263     Plot(Item('Heap Size (before GC)', 'total_size_before', x1y2,
    264               fs = 'solid 0.4 noborder',
    265               lc = 'green'),
    266          Item('Total holes (after GC)', 'holes_size_before', x1y2,
    267               fs = 'solid 0.4 noborder',
    268               lc = 'red'),
    269          Item('GC Time', ['i', 'pause'], style = 'lines', lc = 'red'))
    270   ],
    271   [
    272     Set('style histogram rowstacked'),
    273     Set('style data histograms'),
    274     Plot(Item('Heap Size (after GC)', 'total_size_after', x1y2,
    275               fs = 'solid 0.4 noborder',
    276               lc = 'green'),
    277          Item('Total holes (after GC)', 'holes_size_after', x1y2,
    278               fs = 'solid 0.4 noborder',
    279               lc = 'red'),
    280          Item('GC Time', ['i', 'pause'],
    281               style = 'lines',
    282               lc = 'red'))
    283   ],
    284   [
    285     Set('style fill solid 0.5 noborder'),
    286     Set('style data histograms'),
    287     Plot(Item('Allocated', 'allocated'),
    288          Item('Reclaimed', reclaimed_bytes),
    289          Item('Promoted', 'promoted', style = 'lines', lc = 'black'))
    290   ],
    291 ]
    292 
    293 def freduce(f, field, trace, init):
    294   return reduce(lambda t,r: f(t, r[field]), trace, init)
    295 
    296 def calc_total(trace, field):
    297   return freduce(lambda t,v: t + long(v), field, trace, long(0))
    298 
    299 def calc_max(trace, field):
    300   return freduce(lambda t,r: max(t, r), field, trace, 0)
    301 
    302 def count_nonzero(trace, field):
    303   return freduce(lambda t,r: t if r == 0 else t + 1, field, trace, 0)
    304 
    305 
    306 def process_trace(filename):
    307   trace = parse_gc_trace(filename)
    308 
    309   marksweeps = filter(lambda r: r['gc'] == 'ms', trace)
    310   scavenges = filter(lambda r: r['gc'] == 's', trace)
    311   globalgcs = filter(lambda r: r['gc'] != 's', trace)
    312 
    313 
    314   charts = plot_all(plots, trace, filename)
    315 
    316   def stats(out, prefix, trace, field):
    317     n = len(trace)
    318     total = calc_total(trace, field)
    319     max = calc_max(trace, field)
    320     if n > 0:
    321       avg = total / n
    322     else:
    323       avg = 0
    324     if n > 1:
    325       dev = math.sqrt(freduce(lambda t,r: t + (r - avg) ** 2, field, trace, 0) /
    326                       (n - 1))
    327     else:
    328       dev = 0
    329 
    330     out.write('<tr><td>%s</td><td>%d</td><td>%d</td>'
    331               '<td>%d</td><td>%d [dev %f]</td></tr>' %
    332               (prefix, n, total, max, avg, dev))
    333 
    334   def HumanReadable(size):
    335     suffixes = ['bytes', 'kB', 'MB', 'GB']
    336     power = 1
    337     for i in range(len(suffixes)):
    338       if size < power*1024:
    339         return "%.1f" % (float(size) / power) + " " + suffixes[i]
    340       power *= 1024
    341 
    342   def throughput(name, trace):
    343     total_live_after = calc_total(trace, 'total_size_after')
    344     total_live_before = calc_total(trace, 'total_size_before')
    345     total_gc = calc_total(trace, 'pause')
    346     if total_gc == 0:
    347       return
    348     out.write('GC %s Throughput (after): %s / %s ms = %s/ms<br/>' %
    349               (name,
    350                HumanReadable(total_live_after),
    351                total_gc,
    352                HumanReadable(total_live_after / total_gc)))
    353     out.write('GC %s Throughput (before): %s / %s ms = %s/ms<br/>' %
    354               (name,
    355                HumanReadable(total_live_before),
    356                total_gc,
    357                HumanReadable(total_live_before / total_gc)))
    358 
    359 
    360   with open(filename + '.html', 'w') as out:
    361     out.write('<html><body>')
    362     out.write('<table>')
    363     out.write('<tr><td>Phase</td><td>Count</td><td>Time (ms)</td>')
    364     out.write('<td>Max</td><td>Avg</td></tr>')
    365     stats(out, 'Total in GC', trace, 'pause')
    366     stats(out, 'Scavenge', scavenges, 'pause')
    367     stats(out, 'MarkSweep', marksweeps, 'pause')
    368     stats(out, 'Mark', filter(lambda r: r['mark'] != 0, trace), 'mark')
    369     stats(out, 'Sweep', filter(lambda r: r['sweep'] != 0, trace), 'sweep')
    370     stats(out,
    371           'External',
    372           filter(lambda r: r['external'] != 0, trace),
    373           'external')
    374     out.write('</table>')
    375     throughput('TOTAL', trace)
    376     throughput('MS', marksweeps)
    377     throughput('OLDSPACE', globalgcs)
    378     out.write('<br/>')
    379     for chart in charts:
    380       out.write('<img src="%s">' % chart)
    381       out.write('</body></html>')
    382 
    383   print "%s generated." % (filename + '.html')
    384 
    385 if len(sys.argv) != 2:
    386   print "Usage: %s <GC-trace-filename>" % sys.argv[0]
    387   sys.exit(1)
    388 
    389 process_trace(sys.argv[1])
    390