Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Script to compare the performance of two different chromeOS builds.
      7 
      8 This script is meant to be used when the performance impact of a change in
      9 chromeOS needs to be analyzed. It requires that you have already created two
     10 chromeOS test images (one with the change, and one without), and that you have
     11 at least one device available on which to run performance tests.
     12 
     13 This script is actually a light-weight wrapper around crosperf, a tool for
     14 automatically imaging one or more chromeOS devices with particular builds,
     15 running a set of tests on those builds, and then notifying the user of test
     16 results (along with some statistical analysis of perf keyvals). This wrapper
     17 script performs the following tasks:
     18 
     19 1) Creates a crosperf "experiment" file to be consumed by crosperf.
     20 2) Invokes crosperf using the created experiment file. Crosperf produces 2
     21 outputs: an e-mail that is sent to the user who invoked it; and an output
     22 folder that is named based on the given --experiment-name, which is created in
     23 the directory in which this script was run.
     24 3) Parses the results of crosperf and outputs a summary of relevant data. This
     25 script produces output in a CSV file, as well as in stdout.
     26 
     27 Before running this script for the first time, you should set up your system to
     28 run sudo without prompting for a password (otherwise, crosperf prompts for a
     29 sudo password). You should only have to do that once per host machine.
     30 
     31 Once you're set up with passwordless sudo, you can run the script (preferably
     32 from an empty directory, since several output files are produced):
     33 
     34 > python perf_compare.py --crosperf=CROSPERF_EXE --image-1=IMAGE_1 \
     35   --image-2=IMAGE_2 --board-1=BOARD_1 --board-2=BOARD_2 --remote-1=REMOTE_1 \
     36   --remote-2=REMOTE_2
     37 
     38 You'll need to specify the following inputs: the full path to the crosperf
     39 executable; the absolute paths to 2 locally-built chromeOS images (which must
     40 reside in the "typical location" relative to the chroot, as required by
     41 crosperf); the name of the boards associated with the 2 images (if both images
     42 have the same board, you can specify that single board with --board=BOARD); and
     43 the IP addresses of the 2 remote devices on which to run crosperf (if you have
     44 only a single device available, specify it with --remote=REMOTE). Run with -h to
     45 see the full set of accepted command-line arguments.
     46 
     47 Notes:
     48 
     49 1) When you run this script, it will delete any previously-created crosperf
     50 output directories and created CSV files based on the specified
     51 --experiment-name.  If you don't want to lose any old crosperf/CSV data, either
     52 move it to another location, or run this script with a different
     53 --experiment-name.
     54 2) This script will only run the benchmarks and process the perf keys specified
     55 in the file "perf_benchmarks.json".  Some benchmarks output more perf keys than
     56 what are specified in perf_benchmarks.json, and these will appear in the
     57 crosperf outputs, but not in the outputs produced specifically by this script.
     58 """
     59 
     60 
     61 import json
     62 import logging
     63 import math
     64 import optparse
     65 import os
     66 import re
     67 import shutil
     68 import subprocess
     69 import sys
     70 
     71 
     72 _ITERATIONS = 5
     73 _IMAGE_1_NAME = 'Image1'
     74 _IMAGE_2_NAME = 'Image2'
     75 _DEFAULT_EXPERIMENT_NAME = 'perf_comparison'
     76 _ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
     77 _BENCHMARK_INFO_FILE_NAME = os.path.join(_ROOT_DIR, 'perf_benchmarks.json')
     78 _CROSPERF_REPORT_LINE_DELIMITER = '\t'
     79 _EXPERIMENT_FILE_NAME = 'experiment.txt'
     80 
     81 _BENCHMARK_INFO_TEMPLATE = """
     82 benchmark: {benchmark} {{
     83   autotest_name: {autotest_name}
     84   autotest_args: --use_emerged {autotest_args}
     85   iterations: {iterations}
     86 }}
     87 """
     88 
     89 _IMAGE_INFO_TEMPLATE = """
     90 label: {label} {{
     91   chromeos_image: {image}
     92   board: {board}
     93   remote: {remote}
     94 }}
     95 """
     96 
     97 
     98 def prompt_for_input(prompt_message):
     99     """Prompts for user input and returns the inputted text as a string."""
    100     return raw_input('%s:> ' % prompt_message)
    101 
    102 
    103 def identify_benchmarks_to_run(benchmark_info, iteration_nums, perf_keys):
    104     """Identifies which benchmarks to run, and for how many iterations.
    105 
    106     @param benchmark_info: A list of dictionaries containing information about
    107         the complete set of default perf benchmarks to run.
    108     @param iteration_nums: See output_benchmarks_info().
    109     @param perf_keys: See output_benchmarks_info().
    110 
    111     @return A tuple (X, Y), where X is a list of dictionaries containing
    112         information about the set of benchmarks to run, and Y is the set of
    113         perf keys requested to be run.
    114     """
    115     perf_keys_requested = set()
    116     benchmarks_to_run = []
    117     if not perf_keys:
    118         # Run every benchmark for the specified number of iterations.
    119         benchmarks_to_run = benchmark_info
    120         for benchmark in benchmarks_to_run:
    121             benchmark['iterations'] = iteration_nums[0]
    122             for perf_key in benchmark['perf_keys']:
    123                 perf_keys_requested.add(perf_key)
    124     else:
    125         # Identify which benchmarks to run, and for how many iterations.
    126         identified_benchmarks = {}
    127         for i, perf_key in enumerate(perf_keys):
    128             perf_keys_requested.add(perf_key)
    129             benchmarks = [benchmark for benchmark in benchmark_info
    130                           if perf_key in benchmark['perf_keys']]
    131             if not benchmarks:
    132                 logging.error('Perf key "%s" isn\'t associated with a known '
    133                               'benchmark.', perf_key)
    134                 sys.exit(1)
    135             elif len(benchmarks) > 1:
    136                 logging.error('Perf key "%s" is associated with more than one '
    137                               'benchmark, but should be unique.', perf_key)
    138                 sys.exit(1)
    139             benchmark_to_add = benchmarks[0]
    140             benchmark_to_add = identified_benchmarks.setdefault(
    141                 benchmark_to_add['benchmark'], benchmark_to_add)
    142             if len(iteration_nums) == 1:
    143                 # If only a single iteration number is specified, we assume
    144                 # that applies to every benchmark.
    145                 benchmark_to_add['iterations'] = iteration_nums[0]
    146             else:
    147                 # The user must have specified a separate iteration number for
    148                 # each perf key.  If the benchmark associated with the current
    149                 # perf key already has an interation number associated with it,
    150                 # choose the maximum of the two.
    151                 iter_num = iteration_nums[i]
    152                 if 'iterations' in benchmark_to_add:
    153                     benchmark_to_add['iterations'] = (
    154                         iter_num if iter_num > benchmark_to_add['iterations']
    155                         else benchmark_to_add['iterations'])
    156                 else:
    157                     benchmark_to_add['iterations'] = iter_num
    158         benchmarks_to_run = identified_benchmarks.values()
    159 
    160     return benchmarks_to_run, perf_keys_requested
    161 
    162 
    163 def output_benchmarks_info(f, iteration_nums, perf_keys):
    164     """Identifies details of benchmarks to run, and writes that info to a file.
    165 
    166     @param f: A file object that is writeable.
    167     @param iteration_nums: A list of one or more integers representing the
    168         number of iterations to run for one or more benchmarks.
    169     @param perf_keys: A list of one or more string perf keys we need to
    170         run, or None if we should use the complete set of default perf keys.
    171 
    172     @return Set of perf keys actually requested to be run in the output file.
    173     """
    174     benchmark_info = []
    175     with open(_BENCHMARK_INFO_FILE_NAME, 'r') as f_bench:
    176         benchmark_info = json.load(f_bench)
    177 
    178     benchmarks_to_run, perf_keys_requested = identify_benchmarks_to_run(
    179         benchmark_info, iteration_nums, perf_keys)
    180 
    181     for benchmark in benchmarks_to_run:
    182         f.write(_BENCHMARK_INFO_TEMPLATE.format(
    183                     benchmark=benchmark['benchmark'],
    184                     autotest_name=benchmark['autotest_name'],
    185                     autotest_args=benchmark.get('autotest_args', ''),
    186                     iterations=benchmark['iterations']))
    187 
    188     return perf_keys_requested
    189 
    190 
    191 def output_image_info(f, label, image, board, remote):
    192     """Writes information about a given image to an output file.
    193 
    194     @param f: A file object that is writeable.
    195     @param label: A string label for the given image.
    196     @param image: The string path to the image on disk.
    197     @param board: The string board associated with the image.
    198     @param remote: The string IP address on which to install the image.
    199     """
    200     f.write(_IMAGE_INFO_TEMPLATE.format(
    201                 label=label, image=image, board=board, remote=remote))
    202 
    203 
    204 def invoke_crosperf(crosperf_exe, result_dir, experiment_name, board_1, board_2,
    205                     remote_1, remote_2, iteration_nums, perf_keys, image_1,
    206                     image_2, image_1_name, image_2_name):
    207     """Invokes crosperf with a set of benchmarks and waits for it to complete.
    208 
    209     @param crosperf_exe: The string path to a crosperf executable.
    210     @param result_dir: The string name of the directory in which crosperf is
    211         expected to write its output.
    212     @param experiment_name: A string name to give the crosperf invocation.
    213     @param board_1: The string board associated with the first image.
    214     @param board_2: The string board associated with the second image.
    215     @param remote_1: The string IP address/name of the first remote device.
    216     @param remote_2: The string IP address/name of the second remote device.
    217     @param iteration_nums: A list of integers representing the number of
    218         iterations to run for the different benchmarks.
    219     @param perf_keys: A list of perf keys to run, or None to run the full set
    220         of default perf benchmarks.
    221     @param image_1: The string path to the first image.
    222     @param image_2: The string path to the second image.
    223     @param image_1_name: A string label to give the first image.
    224     @param image_2_name: A string label to give the second image.
    225 
    226     @return A tuple (X, Y), where X is the path to the created crosperf report
    227         file, and Y is the set of perf keys actually requested to be run.
    228     """
    229     # Create experiment file for crosperf.
    230     with open(_EXPERIMENT_FILE_NAME, 'w') as f:
    231         f.write('name: {name}\n'.format(name=experiment_name))
    232         perf_keys_requested = output_benchmarks_info(
    233             f, iteration_nums, perf_keys)
    234         output_image_info(f, image_1_name, image_1, board_1, remote_1)
    235         output_image_info(f, image_2_name, image_2, board_2, remote_2)
    236 
    237     # Invoke crosperf with the experiment file.
    238     logging.info('Invoking crosperf with created experiment file...')
    239     p = subprocess.Popen([crosperf_exe, _EXPERIMENT_FILE_NAME],
    240                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    241 
    242     # Pass through crosperf output as debug messages until crosperf run is
    243     # complete.
    244     while True:
    245         next_line = p.stdout.readline().strip()
    246         if not next_line and p.poll() != None:
    247             break
    248         logging.debug(next_line)
    249         sys.stdout.flush()
    250     p.communicate()
    251     exit_code = p.returncode
    252 
    253     if exit_code:
    254         logging.error('Crosperf returned exit code %s', exit_code)
    255         sys.exit(1)
    256 
    257     report_file = os.path.join(result_dir, 'results.html')
    258     if not os.path.exists(report_file):
    259         logging.error('Crosperf report file missing, cannot proceed.')
    260         sys.exit(1)
    261 
    262     logging.info('Crosperf run complete.')
    263     logging.info('Crosperf results available in "%s"', result_dir)
    264     return report_file, perf_keys_requested
    265 
    266 
    267 def parse_crosperf_report_file(report_file, perf_keys_requested):
    268     """Reads in and parses a crosperf report file for relevant perf data.
    269 
    270     @param report_file: See generate_results().
    271     @param perf_keys_requested: See generate_results().
    272 
    273     @return A dictionary containing perf information extracted from the crosperf
    274         report file.
    275     """
    276     results = {}
    277     with open(report_file, 'r') as f:
    278         contents = f.read()
    279 
    280         match = re.search(r'summary-tsv.+?/pre', contents, flags=re.DOTALL)
    281         contents = match.group(0)
    282 
    283         curr_benchmark = None
    284         for line in contents.splitlines():
    285             delimiter = r'\s+?'
    286             match = re.search(
    287                 r'Benchmark:%s(?P<benchmark>\w+?);%sIterations:%s'
    288                  '(?P<iterations>\w+?)\s' % (delimiter, delimiter, delimiter),
    289                 line)
    290             if match:
    291                 curr_benchmark = match.group('benchmark')
    292                 iterations = match.group('iterations')
    293                 results[curr_benchmark] = {'iterations': iterations,
    294                                            'p_values': []}
    295                 continue
    296             split = line.strip().split(_CROSPERF_REPORT_LINE_DELIMITER)
    297             if (len(split) == 12 and split[-2] == '--' and
    298                 split[0] not in ['retval', 'iterations'] and
    299                 split[0] in perf_keys_requested):
    300                 results[curr_benchmark]['p_values'].append(
    301                     (split[0], split[-1]))
    302 
    303     return results
    304 
    305 
    306 def generate_results(report_file, result_file, perf_keys_requested):
    307     """Output relevant crosperf results to a CSV file, and to stdout.
    308 
    309     This code parses the "results.html" output file of crosperf. It then creates
    310     a CSV file that has the following format per line:
    311 
    312     benchmark_name,num_iterations,perf_key,p_value[,perf_key,p_value]
    313 
    314     @param report_file: The string name of the report file created by crosperf.
    315     @param result_file: A string name for the CSV file to output.
    316     @param perf_keys_requested: The set of perf keys originally requested to be
    317         run.
    318     """
    319     results = parse_crosperf_report_file(report_file, perf_keys_requested)
    320 
    321     # Output p-value data to a CSV file.
    322     with open(result_file, 'w') as f:
    323         for bench in results:
    324             perf_key_substring = ','.join(
    325                 ['%s,%s' % (x[0], x[1]) for x in results[bench]['p_values']])
    326             f.write('%s,%s,%s\n' % (
    327                 bench, results[bench]['iterations'], perf_key_substring))
    328 
    329     logging.info('P-value results available in "%s"', result_file)
    330 
    331     # Collect and output some additional summary results to stdout.
    332     small_p_value = []
    333     nan_p_value = []
    334     perf_keys_obtained = set()
    335     for benchmark in results:
    336         p_values = results[benchmark]['p_values']
    337         for key, p_val in p_values:
    338             perf_keys_obtained.add(key)
    339             if float(p_val) <= 0.05:
    340                 small_p_value.append((benchmark, key, p_val))
    341             elif math.isnan(float(p_val)):
    342                 nan_p_value.append((benchmark, key, p_val))
    343 
    344     if small_p_value:
    345         logging.info('The following perf keys showed statistically significant '
    346              'result differences (p-value <= 0.05):')
    347         for item in small_p_value:
    348             logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2])
    349     else:
    350         logging.info('No perf keys showed statistically significant result '
    351                      'differences (p-value <= 0.05)')
    352 
    353     if nan_p_value:
    354         logging.info('The following perf keys had "NaN" p-values:')
    355         for item in nan_p_value:
    356             logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2])
    357 
    358     # Check if any perf keys are missing from what was requested, and notify
    359     # the user if so.
    360     for key_requested in perf_keys_requested:
    361         if key_requested not in perf_keys_obtained:
    362             logging.warning('Could not find results for requested perf key '
    363                             '"%s".', key_requested)
    364 
    365 
    366 def parse_options():
    367     """Parses command-line arguments."""
    368     parser = optparse.OptionParser()
    369 
    370     parser.add_option('--crosperf', metavar='PATH', type='string', default=None,
    371                       help='Absolute path to the crosperf executable '
    372                            '(required).')
    373     parser.add_option('--image-1', metavar='PATH', type='string', default=None,
    374                       help='Absolute path to the first image .bin file '
    375                            '(required).')
    376     parser.add_option('--image-2', metavar='PATH', type='string', default=None,
    377                       help='Absolute path to the second image .bin file '
    378                            '(required).')
    379 
    380     board_group = optparse.OptionGroup(
    381         parser, 'Specifying the boards (required)')
    382     board_group.add_option('--board', metavar='BOARD', type='string',
    383                            default=None,
    384                            help='Name of the board associated with the images, '
    385                                 'if both images have the same board. If each '
    386                                 'image has a different board, use '
    387                                 'options --board-1 and --board-2 instead.')
    388     board_group.add_option('--board-1', metavar='BOARD', type='string',
    389                            default=None,
    390                            help='Board associated with the first image.')
    391     board_group.add_option('--board-2', metavar='BOARD', type='string',
    392                            default=None,
    393                            help='Board associated with the second image.')
    394     parser.add_option_group(board_group)
    395 
    396     remote_group = optparse.OptionGroup(
    397         parser, 'Specifying the remote devices (required)')
    398     remote_group.add_option('--remote', metavar='IP', type='string',
    399                             default=None,
    400                             help='IP address/name of remote device to use, if '
    401                                  'only one physical device is to be used. If '
    402                                  'using two devices, use options --remote-1 '
    403                                  'and --remote-2 instead.')
    404     remote_group.add_option('--remote-1', metavar='IP', type='string',
    405                             default=None,
    406                             help='IP address/name of first device to use.')
    407     remote_group.add_option('--remote-2', metavar='IP', type='string',
    408                             default=None,
    409                             help='IP address/name of second device to use.')
    410     parser.add_option_group(remote_group)
    411 
    412     optional_group = optparse.OptionGroup(parser, 'Optional settings')
    413     optional_group.add_option('--image-1-name', metavar='NAME', type='string',
    414                               default=_IMAGE_1_NAME,
    415                               help='Descriptive name for the first image. '
    416                                    'Defaults to "%default".')
    417     optional_group.add_option('--image-2-name', metavar='NAME', type='string',
    418                               default=_IMAGE_2_NAME,
    419                               help='Descriptive name for the second image. '
    420                                     'Defaults to "%default".')
    421     optional_group.add_option('--experiment-name', metavar='NAME',
    422                               type='string', default=_DEFAULT_EXPERIMENT_NAME,
    423                               help='A descriptive name for the performance '
    424                                    'comparison experiment to run. Defaults to '
    425                                    '"%default".')
    426     optional_group.add_option('--perf-keys', metavar='KEY1[,KEY2...]',
    427                               type='string', default=None,
    428                               help='Comma-separated list of perf keys to '
    429                                    'evaluate, if you do not want to run the '
    430                                    'complete set. By default, will evaluate '
    431                                    'with the complete set of perf keys.')
    432     optional_group.add_option('--iterations', metavar='N1[,N2...]',
    433                               type='string', default=str(_ITERATIONS),
    434                               help='Number of iterations to use to evaluate '
    435                                    'each perf key (defaults to %default). If '
    436                                    'specifying a custom list of perf keys '
    437                                    '(with --perf-keys) and you want to have a '
    438                                    'different number of iterations for each '
    439                                    'perf key, specify a comma-separated list '
    440                                    'of iteration numbers where N1 corresponds '
    441                                    'to KEY1, N2 corresponds to KEY2, etc.')
    442     optional_group.add_option('-v', '--verbose', action='store_true',
    443                               default=False, help='Use verbose logging.')
    444     parser.add_option_group(optional_group)
    445 
    446     options, _ = parser.parse_args()
    447     return options
    448 
    449 
    450 def verify_command_line_options(options, iteration_nums, perf_keys):
    451     """Verifies there are no errors in the specified command-line options.
    452 
    453     @param options: An optparse.Options object.
    454     @param iteration_nums: An array of numbers representing the number of
    455         iterations to perform to evaluate each perf key.
    456     @param perf_keys: A list of strings representing perf keys to evaluate, or
    457         None if no particular perf keys are specified.
    458 
    459     @return True, if there were no errors in the command-line options, or
    460         False if any error was detected.
    461     """
    462     success = True
    463     if not options.crosperf:
    464         logging.error('You must specify the path to a crosperf executable.')
    465         success = False
    466     if options.crosperf and not os.path.isfile(options.crosperf):
    467         logging.error('Could not locate crosperf executable "%s".',
    468                       options.crosperf)
    469         if options.crosperf.startswith('/google'):
    470             logging.error('Did you remember to run prodaccess?')
    471         success = False
    472     if not options.image_1 or not options.image_2:
    473         logging.error('You must specify the paths for 2 image .bin files.')
    474         success = False
    475     if not options.board and (not options.board_1 or not options.board_2):
    476         logging.error('You must specify the board name(s): either a single '
    477                       'board with --board, or else two board names with '
    478                       '--board-1 and --board-2.')
    479         success = False
    480     if options.board and options.board_1 and options.board_2:
    481         logging.error('Specify either one board with --board, or two boards '
    482                       'with --board-1 and --board-2, but not both.')
    483         success = False
    484     if not options.remote and (not options.remote_1 or not options.remote_2):
    485         logging.error('You must specify the remote device(s) to use: either a '
    486                       'single device with --remote, or else two devices with '
    487                       '--remote-1 and --remote-2.')
    488         success = False
    489     if options.remote and options.remote_1 and options.remote_2:
    490         logging.error('Specify either one remote device with --remote, or two '
    491                       'devices with --remote-1 and --remote-2, but not both.')
    492         success = False
    493     if len(iteration_nums) > 1 and not perf_keys:
    494         logging.error('You should only specify multiple iteration numbers '
    495                       'if you\'re specifying a custom list of perf keys to '
    496                       'evaluate.')
    497         success = False
    498     if (options.perf_keys and len(iteration_nums) > 1 and
    499         len(options.perf_keys.split(',')) > len(iteration_nums)):
    500         logging.error('You specified %d custom perf keys, but only %d '
    501                       'iteration numbers.', len(options.perf_keys.split(',')),
    502                       len(iteration_nums))
    503         success = False
    504     return success
    505 
    506 
    507 def main():
    508     options = parse_options()
    509 
    510     log_level = logging.DEBUG if options.verbose else logging.INFO
    511     logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
    512                         level=log_level)
    513 
    514     iteration_nums = [int(i) for i in options.iterations.split(',')]
    515     perf_keys = options.perf_keys.split(',') if options.perf_keys else None
    516 
    517     # Verify there are no errors in the specified command-line options.
    518     if not verify_command_line_options(options, iteration_nums, perf_keys):
    519         return 1
    520 
    521     # Clean up any old results that will be overwritten.
    522     result_dir = options.experiment_name + '_results'
    523     if os.path.isdir(result_dir):
    524         shutil.rmtree(result_dir)
    525     result_file = options.experiment_name + '_results.csv'
    526     if os.path.isfile(result_file):
    527         os.remove(result_file)
    528 
    529     if options.remote:
    530         remote_1, remote_2 = options.remote, options.remote
    531     else:
    532         remote_1, remote_2 = options.remote_1, options.remote_2
    533 
    534     if options.board:
    535         board_1, board_2 = options.board, options.board
    536     else:
    537         board_1, board_2 = options.board_1, options.board_2
    538 
    539     report_file, perf_keys_requested = invoke_crosperf(
    540         options.crosperf, result_dir, options.experiment_name, board_1, board_2,
    541         remote_1, remote_2, iteration_nums, perf_keys, options.image_1,
    542         options.image_2, options.image_1_name, options.image_2_name)
    543     generate_results(report_file, result_file, perf_keys_requested)
    544 
    545     return 0
    546 
    547 
    548 if __name__ == '__main__':
    549     sys.exit(main())
    550