Home | History | Annotate | Download | only in bench
      1 #!/usr/bin/env python
      2 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """ Generate bench_expectations file from a given set of bench data files. """
      7 
      8 import argparse
      9 import bench_util
     10 import json
     11 import os
     12 import re
     13 import sys
     14 import urllib2
     15 
     16 # Parameters for calculating bench ranges.
     17 RANGE_RATIO_UPPER = 1.5  # Ratio of range for upper bounds.
     18 RANGE_RATIO_LOWER = 2.0  # Ratio of range for lower bounds.
     19 ERR_RATIO = 0.08  # Further widens the range by the ratio of average value.
     20 ERR_UB = 1.0  # Adds an absolute upper error to cope with small benches.
     21 ERR_LB = 1.5
     22 
     23 # List of bench configs to monitor. Ignore all other configs.
     24 CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000',
     25                       'simple_viewport_1000x1000_angle',
     26                       'simple_viewport_1000x1000_gpu',
     27                       'simple_viewport_1000x1000_scalar_1.100000',
     28                       'simple_viewport_1000x1000_scalar_1.100000_gpu',
     29                      ]
     30 
     31 # List of flaky entries that should be excluded. Each entry is defined by a list
     32 # of 3 strings, corresponding to the substrings of [bench, config, builder] to
     33 # search for. A bench expectations line is excluded when each of the 3 strings
     34 # in the list is a substring of the corresponding element of the given line. For
     35 # instance, ['desk_yahooanswers', 'gpu', 'Ubuntu'] will skip expectation entries
     36 # of SKP benchs whose name contains 'desk_yahooanswers' on all gpu-related
     37 # configs of all Ubuntu builders.
     38 ENTRIES_TO_EXCLUDE = [
     39                      ]
     40 
     41 _GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s'
     42 
     43 def compute_ranges(benches, more_benches=None):
     44   """Given a list of bench numbers, calculate the alert range.
     45 
     46   Args:
     47     benches: a list of float bench values.
     48     more_benches: a tuple of lists of additional bench values.
     49       The first value of each tuple is the number of commits before the current
     50       one that set of values is at, and the second value is a list of
     51       bench results.
     52 
     53   Returns:
     54     a list of float [lower_bound, upper_bound].
     55   """
     56   avg = sum(benches)/len(benches)
     57   minimum = min(benches)
     58   maximum = max(benches)
     59   diff = maximum - minimum
     60 
     61   return [minimum - diff*RANGE_RATIO_LOWER - avg*ERR_RATIO - ERR_LB,
     62           maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB]
     63 
     64 
     65 def create_expectations_dict(revision_data_points, builder, extra_data=None):
     66   """Convert list of bench data points into a dictionary of expectations data.
     67 
     68   Args:
     69     revision_data_points: a list of BenchDataPoint objects.
     70     builder: string of the corresponding buildbot builder name.
     71 
     72   Returns:
     73     a dictionary of this form:
     74         keys = tuple of (config, bench) strings.
     75         values = list of float [expected, lower_bound, upper_bound] for the key.
     76   """
     77   bench_dict = {}
     78   for point in revision_data_points:
     79     if (point.time_type or  # Not walltime which has time_type ''
     80         not point.config in CONFIGS_TO_INCLUDE):
     81       continue
     82     to_skip = False
     83     for bench_substr, config_substr, builder_substr in ENTRIES_TO_EXCLUDE:
     84       if (bench_substr in point.bench and config_substr in point.config and
     85           builder_substr in builder):
     86         to_skip = True
     87         break
     88     if to_skip:
     89       continue
     90     key = (point.config, point.bench)
     91 
     92     extras = []
     93     for idx, dataset in extra_data:
     94       for data in dataset:
     95         if (data.bench == point.bench and data.config == point.config and
     96               data.time_type == point.time_type and data.per_iter_time):
     97           extras.append((idx, data.per_iter_time))
     98 
     99     if key in bench_dict:
    100       raise Exception('Duplicate bench entry: ' + str(key))
    101     bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras)
    102 
    103   return bench_dict
    104 
    105 
    106 def get_parent_commits(start_hash, num_back):
    107   """Returns a list of commits that are the parent of the commit passed in."""
    108   list_commits = urllib2.urlopen(
    109       'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' %
    110       (start_hash, num_back))
    111   # NOTE: Very brittle. Removes the four extraneous characters
    112   # so json can be read successfully
    113   trunc_list = list_commits.read()[4:]
    114   json_data = json.loads(trunc_list)
    115   return [revision['commit'] for revision in json_data['log']]
    116 
    117 
    118 def get_file_suffixes(commit_hash, directory):
    119   """Gets all the suffixes available in the directory"""
    120   possible_files = os.listdir(directory)
    121   prefix = 'bench_' + commit_hash + '_data_'
    122   return [name[len(prefix):] for name in possible_files
    123       if name.startswith(prefix)]
    124 
    125 
    126 def download_bench_data(builder, commit_hash, suffixes, directory):
    127   """Downloads data, returns the number successfully downloaded"""
    128   cur_files = os.listdir(directory)
    129   count = 0
    130   for suffix in suffixes:
    131     file_name = 'bench_'+commit_hash+'_data_'+suffix
    132     if file_name in cur_files:
    133       continue
    134     try:
    135       src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name))
    136       with open(os.path.join(directory, file_name), 'w') as dest:
    137         dest.writelines(src)
    138         count += 1
    139     except urllib2.HTTPError:
    140       pass
    141   return count
    142 
    143 
    144 def main():
    145     """Reads bench data points, then calculate and export expectations.
    146     """
    147     parser = argparse.ArgumentParser()
    148     parser.add_argument(
    149         '-a', '--representation_alg', default='25th',
    150         help='bench representation algorithm to use, see bench_util.py.')
    151     parser.add_argument(
    152         '-b', '--builder', required=True,
    153         help='name of the builder whose bench ranges we are computing.')
    154     parser.add_argument(
    155         '-d', '--input_dir', required=True,
    156         help='a directory containing bench data files.')
    157     parser.add_argument(
    158         '-o', '--output_file', required=True,
    159         help='file path and name for storing the output bench expectations.')
    160     parser.add_argument(
    161         '-r', '--git_revision', required=True,
    162         help='the git hash to indicate the revision of input data to use.')
    163     parser.add_argument(
    164         '-t', '--back_track', required=False, default=10,
    165         help='the number of commit hashes backwards to look to include' +
    166              'in the calculations.')
    167     parser.add_argument(
    168         '-m', '--max_commits', required=False, default=1,
    169         help='the number of commit hashes to include in the calculations.')
    170     args = parser.parse_args()
    171 
    172     builder = args.builder
    173 
    174     data_points = bench_util.parse_skp_bench_data(
    175         args.input_dir, args.git_revision, args.representation_alg)
    176 
    177     parent_commits = get_parent_commits(args.git_revision, args.back_track)
    178     print "Using commits: {}".format(parent_commits)
    179     suffixes = get_file_suffixes(args.git_revision, args.input_dir)
    180     print "Using suffixes: {}".format(suffixes)
    181 
    182     # TODO(kelvinly): Find a better approach to than directly copying from
    183     # the GS server?
    184     downloaded_commits = []
    185     for idx, commit in enumerate(parent_commits):
    186       num_downloaded = download_bench_data(
    187           builder, commit, suffixes, args.input_dir)
    188       if num_downloaded > 0:
    189         downloaded_commits.append((num_downloaded, idx, commit))
    190 
    191     if len(downloaded_commits) < args.max_commits:
    192       print ('Less than desired number of commits found. Please increase'
    193             '--back_track in later runs')
    194     trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits]
    195     extra_data = []
    196     for _, idx, commit in trunc_commits:
    197       extra_data.append((idx, bench_util.parse_skp_bench_data(
    198           args.input_dir, commit, args.representation_alg)))
    199 
    200     expectations_dict = create_expectations_dict(data_points, builder,
    201                                                  extra_data)
    202 
    203     out_lines = []
    204     keys = expectations_dict.keys()
    205     keys.sort()
    206     for (config, bench) in keys:
    207       (expected, lower_bound, upper_bound) = expectations_dict[(config, bench)]
    208       out_lines.append('%(bench)s_%(config)s_,%(builder)s-%(representation)s,'
    209           '%(expected)s,%(lower_bound)s,%(upper_bound)s' % {
    210               'bench': bench,
    211               'config': config,
    212               'builder': builder,
    213               'representation': args.representation_alg,
    214               'expected': expected,
    215               'lower_bound': lower_bound,
    216               'upper_bound': upper_bound})
    217 
    218     with open(args.output_file, 'w') as file_handle:
    219       file_handle.write('\n'.join(out_lines))
    220 
    221 
    222 if __name__ == "__main__":
    223     main()
    224