Home | History | Annotate | Download | only in measurements
      1 #!/usr/bin/env python
      2 # Copyright 2013 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Parses CSV output from the loading_measurement and outputs interesting stats.
      7 
      8 Example usage:
      9 $ tools/perf/run_measurement --browser=release \
     10     --output-format=csv --output=/path/to/loading_measurement_output.csv \
     11     loading_measurement tools/perf/page_sets/top_1m.json
     12 $ tools/perf/measurements/loading_measurement_analyzer.py \
     13     --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \
     14     /path/to/loading_measurement_output.csv
     15 """
     16 
     17 import collections
     18 import csv
     19 import heapq
     20 import optparse
     21 import os
     22 import re
     23 import sys
     24 
     25 
     26 class LoadingMeasurementAnalyzer(object):
     27 
     28   def __init__(self, input_file, options):
     29     self.ranks = {}
     30     self.totals = collections.defaultdict(list)
     31     self.maxes = collections.defaultdict(list)
     32     self.avgs = collections.defaultdict(list)
     33     self.load_times = []
     34     self.cpu_times = []
     35     self.network_percents = []
     36     self.num_rows_parsed = 0
     37     self.num_slowest_urls = options.num_slowest_urls
     38     if options.rank_csv_file:
     39       self._ParseRankCsvFile(os.path.expanduser(options.rank_csv_file))
     40     self._ParseInputFile(input_file, options)
     41     self._display_zeros = options.display_zeros
     42 
     43   def _ParseInputFile(self, input_file, options):
     44     with open(input_file, 'r') as csvfile:
     45       row_dict = csv.DictReader(csvfile)
     46       for row in row_dict:
     47         if (options.rank_limit and
     48             self._GetRank(row['url']) > options.rank_limit):
     49           continue
     50         cpu_time = 0
     51         load_time = float(row['load_time (ms)'])
     52         if load_time < 0:
     53           print 'Skipping %s due to negative load time' % row['url']
     54           continue
     55         for key, value in row.iteritems():
     56           if key in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'):
     57             continue
     58           if not value or value == '-':
     59             continue
     60           value = float(value)
     61           if not value:
     62             continue
     63           if '_avg' in key:
     64             self.avgs[key].append((value, row['url']))
     65           elif '_max' in key:
     66             self.maxes[key].append((value, row['url']))
     67           else:
     68             self.totals[key].append((value, row['url']))
     69             cpu_time += value
     70         self.load_times.append((load_time, row['url']))
     71         self.cpu_times.append((cpu_time, row['url']))
     72         if options.show_network:
     73           network_time = load_time - cpu_time
     74           self.totals['Network (ms)'].append((network_time, row['url']))
     75           self.network_percents.append((network_time / load_time, row['url']))
     76         self.num_rows_parsed += 1
     77         if options.max_rows and self.num_rows_parsed == int(options.max_rows):
     78           break
     79 
     80   def _ParseRankCsvFile(self, input_file):
     81     with open(input_file, 'r') as csvfile:
     82       for row in csv.reader(csvfile):
     83         assert len(row) == 2
     84         self.ranks[row[1]] = int(row[0])
     85 
     86   def _GetRank(self, url):
     87     url = url.replace('http://', '')
     88     if url in self.ranks:
     89       return self.ranks[url]
     90     return len(self.ranks)
     91 
     92   def PrintSummary(self, stdout):
     93     sum_totals = {}
     94     units = None
     95     for key, values in self.totals.iteritems():
     96       m = re.match('.* [(](.*)[)]', key)
     97       assert m, 'All keys should have units.'
     98       assert not units or units == m.group(1), 'All units should be the same.'
     99       units = m.group(1)
    100       sum_totals[key] = sum([v[0] for v in values])
    101     total_cpu_time = sum([v[0] for v in self.cpu_times])
    102     total_page_load_time = sum([v[0] for v in self.load_times])
    103 
    104     print >> stdout
    105     print >> stdout, 'Total URLs:', self.num_rows_parsed
    106     print >> stdout, 'Total page load time: %ds' % int(round(
    107         total_page_load_time / 1000))
    108     print >> stdout, 'Average page load time: %dms' % int(round(
    109         total_page_load_time / self.num_rows_parsed))
    110     if units == 'ms':
    111       print >> stdout, 'Total CPU time: %ds' % int(round(total_cpu_time / 1000))
    112       print >> stdout, 'Average CPU time: %dms' % int(round(
    113           total_cpu_time / self.num_rows_parsed))
    114     print >> stdout
    115     for key, value in sorted(sum_totals.iteritems(), reverse=True,
    116                              key=lambda i: i[1]):
    117       if not self._display_zeros and not int(value / 100.):
    118         break
    119       output_key = '%60s: ' % re.sub(' [(].*[)]', '', key)
    120       if units == 'ms':
    121         output_value = '%10ds ' % (value / 1000)
    122         output_percent = '%.1f%%' % (100 * value / total_page_load_time)
    123       else:
    124         output_value = '%10d%s ' % (value, units)
    125         output_percent = '%.1f%%' % (100 * value / total_cpu_time)
    126       print >> stdout, output_key, output_value, output_percent
    127 
    128     if not self.num_slowest_urls:
    129       return
    130 
    131     for key, values in sorted(self.totals.iteritems(), reverse=True,
    132                               key=lambda i: sum_totals[i[0]]):
    133       if not self._display_zeros and not int(sum_totals[key] / 100.):
    134         break
    135       print >> stdout
    136       print >> stdout, 'Top %d slowest %s:' % (self.num_slowest_urls,
    137                                                re.sub(' [(].*[)]', '', key))
    138       slowest = heapq.nlargest(self.num_slowest_urls, values)
    139       for value, url in slowest:
    140         print >> stdout, '%10d%s\t%s (#%s)' % (value, units, url,
    141                                                self._GetRank(url))
    142 
    143     if self.network_percents:
    144       print >> stdout
    145       print >> stdout, 'Top %d highest network to CPU time ratios:' % (
    146           self.num_slowest_urls)
    147       for percent, url in sorted(
    148           self.network_percents, reverse=True)[:self.num_slowest_urls]:
    149         percent *= 100
    150         print >> stdout, '\t', '%.1f%%' % percent, url, '(#%s)' % (
    151             self._GetRank(url))
    152 
    153 
    154 def main(arguments, stdout=sys.stdout):
    155   prog_desc = 'Parses CSV output from the loading_measurement'
    156   parser = optparse.OptionParser(usage=('%prog [options]' + '\n\n' + prog_desc))
    157 
    158   parser.add_option('--max-rows', type='int',
    159                     help='Only process this many rows')
    160   parser.add_option('--num-slowest-urls', type='int',
    161                     help='Output this many slowest URLs for each category')
    162   parser.add_option('--rank-csv-file', help='A CSV file of <rank,url>')
    163   parser.add_option('--rank-limit', type='int',
    164                     help='Only process pages higher than this rank')
    165   parser.add_option('--show-network', action='store_true',
    166                     help='Whether to display Network as a category')
    167   parser.add_option('--display-zeros', action='store_true',
    168                     help='Whether to display categories with zero time')
    169 
    170   options, args = parser.parse_args(arguments)
    171 
    172   assert len(args) == 1, 'Must pass exactly one CSV file to analyze'
    173   if options.rank_limit and not options.rank_csv_file:
    174     print 'Must pass --rank-csv-file with --rank-limit'
    175     return 1
    176 
    177   LoadingMeasurementAnalyzer(args[0], options).PrintSummary(stdout)
    178 
    179   return 0
    180 
    181 
    182 if __name__ == '__main__':
    183   sys.exit(main(sys.argv[1:]))
    184