1 #!/usr/bin/env python 2 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Parses CSV output from the loading_measurement and outputs interesting stats. 7 8 Example usage: 9 $ tools/perf/run_measurement --browser=release \ 10 --output-format=csv --output=/path/to/loading_measurement_output.csv \ 11 loading_measurement tools/perf/page_sets/top_1m.json 12 $ tools/perf/measurements/loading_measurement_analyzer.py \ 13 --num-slowest-urls=100 --rank-csv-file=/path/to/top-1m.csv \ 14 /path/to/loading_measurement_output.csv 15 """ 16 17 import collections 18 import csv 19 import heapq 20 import optparse 21 import os 22 import re 23 import sys 24 25 26 class LoadingMeasurementAnalyzer(object): 27 28 def __init__(self, input_file, options): 29 self.ranks = {} 30 self.totals = collections.defaultdict(list) 31 self.maxes = collections.defaultdict(list) 32 self.avgs = collections.defaultdict(list) 33 self.load_times = [] 34 self.cpu_times = [] 35 self.network_percents = [] 36 self.num_rows_parsed = 0 37 self.num_slowest_urls = options.num_slowest_urls 38 if options.rank_csv_file: 39 self._ParseRankCsvFile(os.path.expanduser(options.rank_csv_file)) 40 self._ParseInputFile(input_file, options) 41 self._display_zeros = options.display_zeros 42 43 def _ParseInputFile(self, input_file, options): 44 with open(input_file, 'r') as csvfile: 45 row_dict = csv.DictReader(csvfile) 46 for row in row_dict: 47 if (options.rank_limit and 48 self._GetRank(row['url']) > options.rank_limit): 49 continue 50 cpu_time = 0 51 load_time = float(row['load_time (ms)']) 52 if load_time < 0: 53 print 'Skipping %s due to negative load time' % row['url'] 54 continue 55 for key, value in row.iteritems(): 56 if key in ('url', 'load_time (ms)', 'dom_content_loaded_time (ms)'): 57 continue 58 if not value or value == '-': 59 continue 60 value = float(value) 61 if not value: 62 continue 63 if '_avg' in key: 64 self.avgs[key].append((value, row['url'])) 65 elif '_max' in key: 66 self.maxes[key].append((value, row['url'])) 67 else: 68 self.totals[key].append((value, row['url'])) 69 cpu_time += value 70 self.load_times.append((load_time, row['url'])) 71 self.cpu_times.append((cpu_time, row['url'])) 72 if options.show_network: 73 network_time = load_time - cpu_time 74 self.totals['Network (ms)'].append((network_time, row['url'])) 75 self.network_percents.append((network_time / load_time, row['url'])) 76 self.num_rows_parsed += 1 77 if options.max_rows and self.num_rows_parsed == int(options.max_rows): 78 break 79 80 def _ParseRankCsvFile(self, input_file): 81 with open(input_file, 'r') as csvfile: 82 for row in csv.reader(csvfile): 83 assert len(row) == 2 84 self.ranks[row[1]] = int(row[0]) 85 86 def _GetRank(self, url): 87 url = url.replace('http://', '') 88 if url in self.ranks: 89 return self.ranks[url] 90 return len(self.ranks) 91 92 def PrintSummary(self, stdout): 93 sum_totals = {} 94 units = None 95 for key, values in self.totals.iteritems(): 96 m = re.match('.* [(](.*)[)]', key) 97 assert m, 'All keys should have units.' 98 assert not units or units == m.group(1), 'All units should be the same.' 99 units = m.group(1) 100 sum_totals[key] = sum([v[0] for v in values]) 101 total_cpu_time = sum([v[0] for v in self.cpu_times]) 102 total_page_load_time = sum([v[0] for v in self.load_times]) 103 104 print >> stdout 105 print >> stdout, 'Total URLs:', self.num_rows_parsed 106 print >> stdout, 'Total page load time: %ds' % int(round( 107 total_page_load_time / 1000)) 108 print >> stdout, 'Average page load time: %dms' % int(round( 109 total_page_load_time / self.num_rows_parsed)) 110 if units == 'ms': 111 print >> stdout, 'Total CPU time: %ds' % int(round(total_cpu_time / 1000)) 112 print >> stdout, 'Average CPU time: %dms' % int(round( 113 total_cpu_time / self.num_rows_parsed)) 114 print >> stdout 115 for key, value in sorted(sum_totals.iteritems(), reverse=True, 116 key=lambda i: i[1]): 117 if not self._display_zeros and not int(value / 100.): 118 break 119 output_key = '%60s: ' % re.sub(' [(].*[)]', '', key) 120 if units == 'ms': 121 output_value = '%10ds ' % (value / 1000) 122 output_percent = '%.1f%%' % (100 * value / total_page_load_time) 123 else: 124 output_value = '%10d%s ' % (value, units) 125 output_percent = '%.1f%%' % (100 * value / total_cpu_time) 126 print >> stdout, output_key, output_value, output_percent 127 128 if not self.num_slowest_urls: 129 return 130 131 for key, values in sorted(self.totals.iteritems(), reverse=True, 132 key=lambda i: sum_totals[i[0]]): 133 if not self._display_zeros and not int(sum_totals[key] / 100.): 134 break 135 print >> stdout 136 print >> stdout, 'Top %d slowest %s:' % (self.num_slowest_urls, 137 re.sub(' [(].*[)]', '', key)) 138 slowest = heapq.nlargest(self.num_slowest_urls, values) 139 for value, url in slowest: 140 print >> stdout, '%10d%s\t%s (#%s)' % (value, units, url, 141 self._GetRank(url)) 142 143 if self.network_percents: 144 print >> stdout 145 print >> stdout, 'Top %d highest network to CPU time ratios:' % ( 146 self.num_slowest_urls) 147 for percent, url in sorted( 148 self.network_percents, reverse=True)[:self.num_slowest_urls]: 149 percent *= 100 150 print >> stdout, '\t', '%.1f%%' % percent, url, '(#%s)' % ( 151 self._GetRank(url)) 152 153 154 def main(arguments, stdout=sys.stdout): 155 prog_desc = 'Parses CSV output from the loading_measurement' 156 parser = optparse.OptionParser(usage=('%prog [options]' + '\n\n' + prog_desc)) 157 158 parser.add_option('--max-rows', type='int', 159 help='Only process this many rows') 160 parser.add_option('--num-slowest-urls', type='int', 161 help='Output this many slowest URLs for each category') 162 parser.add_option('--rank-csv-file', help='A CSV file of <rank,url>') 163 parser.add_option('--rank-limit', type='int', 164 help='Only process pages higher than this rank') 165 parser.add_option('--show-network', action='store_true', 166 help='Whether to display Network as a category') 167 parser.add_option('--display-zeros', action='store_true', 168 help='Whether to display categories with zero time') 169 170 options, args = parser.parse_args(arguments) 171 172 assert len(args) == 1, 'Must pass exactly one CSV file to analyze' 173 if options.rank_limit and not options.rank_csv_file: 174 print 'Must pass --rank-csv-file with --rank-limit' 175 return 1 176 177 LoadingMeasurementAnalyzer(args[0], options).PrintSummary(stdout) 178 179 return 0 180 181 182 if __name__ == '__main__': 183 sys.exit(main(sys.argv[1:])) 184