1 #!/usr/bin/env python 2 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Script to compare the performance of two different chromeOS builds. 7 8 This script is meant to be used when the performance impact of a change in 9 chromeOS needs to be analyzed. It requires that you have already created two 10 chromeOS test images (one with the change, and one without), and that you have 11 at least one device available on which to run performance tests. 12 13 This script is actually a light-weight wrapper around crosperf, a tool for 14 automatically imaging one or more chromeOS devices with particular builds, 15 running a set of tests on those builds, and then notifying the user of test 16 results (along with some statistical analysis of perf keyvals). This wrapper 17 script performs the following tasks: 18 19 1) Creates a crosperf "experiment" file to be consumed by crosperf. 20 2) Invokes crosperf using the created experiment file. Crosperf produces 2 21 outputs: an e-mail that is sent to the user who invoked it; and an output 22 folder that is named based on the given --experiment-name, which is created in 23 the directory in which this script was run. 24 3) Parses the results of crosperf and outputs a summary of relevant data. This 25 script produces output in a CSV file, as well as in stdout. 26 27 Before running this script for the first time, you should set up your system to 28 run sudo without prompting for a password (otherwise, crosperf prompts for a 29 sudo password). You should only have to do that once per host machine. 30 31 Once you're set up with passwordless sudo, you can run the script (preferably 32 from an empty directory, since several output files are produced): 33 34 > python perf_compare.py --crosperf=CROSPERF_EXE --image-1=IMAGE_1 \ 35 --image-2=IMAGE_2 --board-1=BOARD_1 --board-2=BOARD_2 --remote-1=REMOTE_1 \ 36 --remote-2=REMOTE_2 37 38 You'll need to specify the following inputs: the full path to the crosperf 39 executable; the absolute paths to 2 locally-built chromeOS images (which must 40 reside in the "typical location" relative to the chroot, as required by 41 crosperf); the name of the boards associated with the 2 images (if both images 42 have the same board, you can specify that single board with --board=BOARD); and 43 the IP addresses of the 2 remote devices on which to run crosperf (if you have 44 only a single device available, specify it with --remote=REMOTE). Run with -h to 45 see the full set of accepted command-line arguments. 46 47 Notes: 48 49 1) When you run this script, it will delete any previously-created crosperf 50 output directories and created CSV files based on the specified 51 --experiment-name. If you don't want to lose any old crosperf/CSV data, either 52 move it to another location, or run this script with a different 53 --experiment-name. 54 2) This script will only run the benchmarks and process the perf keys specified 55 in the file "perf_benchmarks.json". Some benchmarks output more perf keys than 56 what are specified in perf_benchmarks.json, and these will appear in the 57 crosperf outputs, but not in the outputs produced specifically by this script. 58 """ 59 60 61 import json 62 import logging 63 import math 64 import optparse 65 import os 66 import re 67 import shutil 68 import subprocess 69 import sys 70 71 72 _ITERATIONS = 5 73 _IMAGE_1_NAME = 'Image1' 74 _IMAGE_2_NAME = 'Image2' 75 _DEFAULT_EXPERIMENT_NAME = 'perf_comparison' 76 _ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 77 _BENCHMARK_INFO_FILE_NAME = os.path.join(_ROOT_DIR, 'perf_benchmarks.json') 78 _CROSPERF_REPORT_LINE_DELIMITER = '\t' 79 _EXPERIMENT_FILE_NAME = 'experiment.txt' 80 81 _BENCHMARK_INFO_TEMPLATE = """ 82 benchmark: {benchmark} {{ 83 autotest_name: {autotest_name} 84 autotest_args: --use_emerged {autotest_args} 85 iterations: {iterations} 86 }} 87 """ 88 89 _IMAGE_INFO_TEMPLATE = """ 90 label: {label} {{ 91 chromeos_image: {image} 92 board: {board} 93 remote: {remote} 94 }} 95 """ 96 97 98 def prompt_for_input(prompt_message): 99 """Prompts for user input and returns the inputted text as a string.""" 100 return raw_input('%s:> ' % prompt_message) 101 102 103 def identify_benchmarks_to_run(benchmark_info, iteration_nums, perf_keys): 104 """Identifies which benchmarks to run, and for how many iterations. 105 106 @param benchmark_info: A list of dictionaries containing information about 107 the complete set of default perf benchmarks to run. 108 @param iteration_nums: See output_benchmarks_info(). 109 @param perf_keys: See output_benchmarks_info(). 110 111 @return A tuple (X, Y), where X is a list of dictionaries containing 112 information about the set of benchmarks to run, and Y is the set of 113 perf keys requested to be run. 114 """ 115 perf_keys_requested = set() 116 benchmarks_to_run = [] 117 if not perf_keys: 118 # Run every benchmark for the specified number of iterations. 119 benchmarks_to_run = benchmark_info 120 for benchmark in benchmarks_to_run: 121 benchmark['iterations'] = iteration_nums[0] 122 for perf_key in benchmark['perf_keys']: 123 perf_keys_requested.add(perf_key) 124 else: 125 # Identify which benchmarks to run, and for how many iterations. 126 identified_benchmarks = {} 127 for i, perf_key in enumerate(perf_keys): 128 perf_keys_requested.add(perf_key) 129 benchmarks = [benchmark for benchmark in benchmark_info 130 if perf_key in benchmark['perf_keys']] 131 if not benchmarks: 132 logging.error('Perf key "%s" isn\'t associated with a known ' 133 'benchmark.', perf_key) 134 sys.exit(1) 135 elif len(benchmarks) > 1: 136 logging.error('Perf key "%s" is associated with more than one ' 137 'benchmark, but should be unique.', perf_key) 138 sys.exit(1) 139 benchmark_to_add = benchmarks[0] 140 benchmark_to_add = identified_benchmarks.setdefault( 141 benchmark_to_add['benchmark'], benchmark_to_add) 142 if len(iteration_nums) == 1: 143 # If only a single iteration number is specified, we assume 144 # that applies to every benchmark. 145 benchmark_to_add['iterations'] = iteration_nums[0] 146 else: 147 # The user must have specified a separate iteration number for 148 # each perf key. If the benchmark associated with the current 149 # perf key already has an interation number associated with it, 150 # choose the maximum of the two. 151 iter_num = iteration_nums[i] 152 if 'iterations' in benchmark_to_add: 153 benchmark_to_add['iterations'] = ( 154 iter_num if iter_num > benchmark_to_add['iterations'] 155 else benchmark_to_add['iterations']) 156 else: 157 benchmark_to_add['iterations'] = iter_num 158 benchmarks_to_run = identified_benchmarks.values() 159 160 return benchmarks_to_run, perf_keys_requested 161 162 163 def output_benchmarks_info(f, iteration_nums, perf_keys): 164 """Identifies details of benchmarks to run, and writes that info to a file. 165 166 @param f: A file object that is writeable. 167 @param iteration_nums: A list of one or more integers representing the 168 number of iterations to run for one or more benchmarks. 169 @param perf_keys: A list of one or more string perf keys we need to 170 run, or None if we should use the complete set of default perf keys. 171 172 @return Set of perf keys actually requested to be run in the output file. 173 """ 174 benchmark_info = [] 175 with open(_BENCHMARK_INFO_FILE_NAME, 'r') as f_bench: 176 benchmark_info = json.load(f_bench) 177 178 benchmarks_to_run, perf_keys_requested = identify_benchmarks_to_run( 179 benchmark_info, iteration_nums, perf_keys) 180 181 for benchmark in benchmarks_to_run: 182 f.write(_BENCHMARK_INFO_TEMPLATE.format( 183 benchmark=benchmark['benchmark'], 184 autotest_name=benchmark['autotest_name'], 185 autotest_args=benchmark.get('autotest_args', ''), 186 iterations=benchmark['iterations'])) 187 188 return perf_keys_requested 189 190 191 def output_image_info(f, label, image, board, remote): 192 """Writes information about a given image to an output file. 193 194 @param f: A file object that is writeable. 195 @param label: A string label for the given image. 196 @param image: The string path to the image on disk. 197 @param board: The string board associated with the image. 198 @param remote: The string IP address on which to install the image. 199 """ 200 f.write(_IMAGE_INFO_TEMPLATE.format( 201 label=label, image=image, board=board, remote=remote)) 202 203 204 def invoke_crosperf(crosperf_exe, result_dir, experiment_name, board_1, board_2, 205 remote_1, remote_2, iteration_nums, perf_keys, image_1, 206 image_2, image_1_name, image_2_name): 207 """Invokes crosperf with a set of benchmarks and waits for it to complete. 208 209 @param crosperf_exe: The string path to a crosperf executable. 210 @param result_dir: The string name of the directory in which crosperf is 211 expected to write its output. 212 @param experiment_name: A string name to give the crosperf invocation. 213 @param board_1: The string board associated with the first image. 214 @param board_2: The string board associated with the second image. 215 @param remote_1: The string IP address/name of the first remote device. 216 @param remote_2: The string IP address/name of the second remote device. 217 @param iteration_nums: A list of integers representing the number of 218 iterations to run for the different benchmarks. 219 @param perf_keys: A list of perf keys to run, or None to run the full set 220 of default perf benchmarks. 221 @param image_1: The string path to the first image. 222 @param image_2: The string path to the second image. 223 @param image_1_name: A string label to give the first image. 224 @param image_2_name: A string label to give the second image. 225 226 @return A tuple (X, Y), where X is the path to the created crosperf report 227 file, and Y is the set of perf keys actually requested to be run. 228 """ 229 # Create experiment file for crosperf. 230 with open(_EXPERIMENT_FILE_NAME, 'w') as f: 231 f.write('name: {name}\n'.format(name=experiment_name)) 232 perf_keys_requested = output_benchmarks_info( 233 f, iteration_nums, perf_keys) 234 output_image_info(f, image_1_name, image_1, board_1, remote_1) 235 output_image_info(f, image_2_name, image_2, board_2, remote_2) 236 237 # Invoke crosperf with the experiment file. 238 logging.info('Invoking crosperf with created experiment file...') 239 p = subprocess.Popen([crosperf_exe, _EXPERIMENT_FILE_NAME], 240 stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 241 242 # Pass through crosperf output as debug messages until crosperf run is 243 # complete. 244 while True: 245 next_line = p.stdout.readline().strip() 246 if not next_line and p.poll() != None: 247 break 248 logging.debug(next_line) 249 sys.stdout.flush() 250 p.communicate() 251 exit_code = p.returncode 252 253 if exit_code: 254 logging.error('Crosperf returned exit code %s', exit_code) 255 sys.exit(1) 256 257 report_file = os.path.join(result_dir, 'results.html') 258 if not os.path.exists(report_file): 259 logging.error('Crosperf report file missing, cannot proceed.') 260 sys.exit(1) 261 262 logging.info('Crosperf run complete.') 263 logging.info('Crosperf results available in "%s"', result_dir) 264 return report_file, perf_keys_requested 265 266 267 def parse_crosperf_report_file(report_file, perf_keys_requested): 268 """Reads in and parses a crosperf report file for relevant perf data. 269 270 @param report_file: See generate_results(). 271 @param perf_keys_requested: See generate_results(). 272 273 @return A dictionary containing perf information extracted from the crosperf 274 report file. 275 """ 276 results = {} 277 with open(report_file, 'r') as f: 278 contents = f.read() 279 280 match = re.search(r'summary-tsv.+?/pre', contents, flags=re.DOTALL) 281 contents = match.group(0) 282 283 curr_benchmark = None 284 for line in contents.splitlines(): 285 delimiter = r'\s+?' 286 match = re.search( 287 r'Benchmark:%s(?P<benchmark>\w+?);%sIterations:%s' 288 '(?P<iterations>\w+?)\s' % (delimiter, delimiter, delimiter), 289 line) 290 if match: 291 curr_benchmark = match.group('benchmark') 292 iterations = match.group('iterations') 293 results[curr_benchmark] = {'iterations': iterations, 294 'p_values': []} 295 continue 296 split = line.strip().split(_CROSPERF_REPORT_LINE_DELIMITER) 297 if (len(split) == 12 and split[-2] == '--' and 298 split[0] not in ['retval', 'iterations'] and 299 split[0] in perf_keys_requested): 300 results[curr_benchmark]['p_values'].append( 301 (split[0], split[-1])) 302 303 return results 304 305 306 def generate_results(report_file, result_file, perf_keys_requested): 307 """Output relevant crosperf results to a CSV file, and to stdout. 308 309 This code parses the "results.html" output file of crosperf. It then creates 310 a CSV file that has the following format per line: 311 312 benchmark_name,num_iterations,perf_key,p_value[,perf_key,p_value] 313 314 @param report_file: The string name of the report file created by crosperf. 315 @param result_file: A string name for the CSV file to output. 316 @param perf_keys_requested: The set of perf keys originally requested to be 317 run. 318 """ 319 results = parse_crosperf_report_file(report_file, perf_keys_requested) 320 321 # Output p-value data to a CSV file. 322 with open(result_file, 'w') as f: 323 for bench in results: 324 perf_key_substring = ','.join( 325 ['%s,%s' % (x[0], x[1]) for x in results[bench]['p_values']]) 326 f.write('%s,%s,%s\n' % ( 327 bench, results[bench]['iterations'], perf_key_substring)) 328 329 logging.info('P-value results available in "%s"', result_file) 330 331 # Collect and output some additional summary results to stdout. 332 small_p_value = [] 333 nan_p_value = [] 334 perf_keys_obtained = set() 335 for benchmark in results: 336 p_values = results[benchmark]['p_values'] 337 for key, p_val in p_values: 338 perf_keys_obtained.add(key) 339 if float(p_val) <= 0.05: 340 small_p_value.append((benchmark, key, p_val)) 341 elif math.isnan(float(p_val)): 342 nan_p_value.append((benchmark, key, p_val)) 343 344 if small_p_value: 345 logging.info('The following perf keys showed statistically significant ' 346 'result differences (p-value <= 0.05):') 347 for item in small_p_value: 348 logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2]) 349 else: 350 logging.info('No perf keys showed statistically significant result ' 351 'differences (p-value <= 0.05)') 352 353 if nan_p_value: 354 logging.info('The following perf keys had "NaN" p-values:') 355 for item in nan_p_value: 356 logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2]) 357 358 # Check if any perf keys are missing from what was requested, and notify 359 # the user if so. 360 for key_requested in perf_keys_requested: 361 if key_requested not in perf_keys_obtained: 362 logging.warning('Could not find results for requested perf key ' 363 '"%s".', key_requested) 364 365 366 def parse_options(): 367 """Parses command-line arguments.""" 368 parser = optparse.OptionParser() 369 370 parser.add_option('--crosperf', metavar='PATH', type='string', default=None, 371 help='Absolute path to the crosperf executable ' 372 '(required).') 373 parser.add_option('--image-1', metavar='PATH', type='string', default=None, 374 help='Absolute path to the first image .bin file ' 375 '(required).') 376 parser.add_option('--image-2', metavar='PATH', type='string', default=None, 377 help='Absolute path to the second image .bin file ' 378 '(required).') 379 380 board_group = optparse.OptionGroup( 381 parser, 'Specifying the boards (required)') 382 board_group.add_option('--board', metavar='BOARD', type='string', 383 default=None, 384 help='Name of the board associated with the images, ' 385 'if both images have the same board. If each ' 386 'image has a different board, use ' 387 'options --board-1 and --board-2 instead.') 388 board_group.add_option('--board-1', metavar='BOARD', type='string', 389 default=None, 390 help='Board associated with the first image.') 391 board_group.add_option('--board-2', metavar='BOARD', type='string', 392 default=None, 393 help='Board associated with the second image.') 394 parser.add_option_group(board_group) 395 396 remote_group = optparse.OptionGroup( 397 parser, 'Specifying the remote devices (required)') 398 remote_group.add_option('--remote', metavar='IP', type='string', 399 default=None, 400 help='IP address/name of remote device to use, if ' 401 'only one physical device is to be used. If ' 402 'using two devices, use options --remote-1 ' 403 'and --remote-2 instead.') 404 remote_group.add_option('--remote-1', metavar='IP', type='string', 405 default=None, 406 help='IP address/name of first device to use.') 407 remote_group.add_option('--remote-2', metavar='IP', type='string', 408 default=None, 409 help='IP address/name of second device to use.') 410 parser.add_option_group(remote_group) 411 412 optional_group = optparse.OptionGroup(parser, 'Optional settings') 413 optional_group.add_option('--image-1-name', metavar='NAME', type='string', 414 default=_IMAGE_1_NAME, 415 help='Descriptive name for the first image. ' 416 'Defaults to "%default".') 417 optional_group.add_option('--image-2-name', metavar='NAME', type='string', 418 default=_IMAGE_2_NAME, 419 help='Descriptive name for the second image. ' 420 'Defaults to "%default".') 421 optional_group.add_option('--experiment-name', metavar='NAME', 422 type='string', default=_DEFAULT_EXPERIMENT_NAME, 423 help='A descriptive name for the performance ' 424 'comparison experiment to run. Defaults to ' 425 '"%default".') 426 optional_group.add_option('--perf-keys', metavar='KEY1[,KEY2...]', 427 type='string', default=None, 428 help='Comma-separated list of perf keys to ' 429 'evaluate, if you do not want to run the ' 430 'complete set. By default, will evaluate ' 431 'with the complete set of perf keys.') 432 optional_group.add_option('--iterations', metavar='N1[,N2...]', 433 type='string', default=str(_ITERATIONS), 434 help='Number of iterations to use to evaluate ' 435 'each perf key (defaults to %default). If ' 436 'specifying a custom list of perf keys ' 437 '(with --perf-keys) and you want to have a ' 438 'different number of iterations for each ' 439 'perf key, specify a comma-separated list ' 440 'of iteration numbers where N1 corresponds ' 441 'to KEY1, N2 corresponds to KEY2, etc.') 442 optional_group.add_option('-v', '--verbose', action='store_true', 443 default=False, help='Use verbose logging.') 444 parser.add_option_group(optional_group) 445 446 options, _ = parser.parse_args() 447 return options 448 449 450 def verify_command_line_options(options, iteration_nums, perf_keys): 451 """Verifies there are no errors in the specified command-line options. 452 453 @param options: An optparse.Options object. 454 @param iteration_nums: An array of numbers representing the number of 455 iterations to perform to evaluate each perf key. 456 @param perf_keys: A list of strings representing perf keys to evaluate, or 457 None if no particular perf keys are specified. 458 459 @return True, if there were no errors in the command-line options, or 460 False if any error was detected. 461 """ 462 success = True 463 if not options.crosperf: 464 logging.error('You must specify the path to a crosperf executable.') 465 success = False 466 if options.crosperf and not os.path.isfile(options.crosperf): 467 logging.error('Could not locate crosperf executable "%s".', 468 options.crosperf) 469 if options.crosperf.startswith('/google'): 470 logging.error('Did you remember to run prodaccess?') 471 success = False 472 if not options.image_1 or not options.image_2: 473 logging.error('You must specify the paths for 2 image .bin files.') 474 success = False 475 if not options.board and (not options.board_1 or not options.board_2): 476 logging.error('You must specify the board name(s): either a single ' 477 'board with --board, or else two board names with ' 478 '--board-1 and --board-2.') 479 success = False 480 if options.board and options.board_1 and options.board_2: 481 logging.error('Specify either one board with --board, or two boards ' 482 'with --board-1 and --board-2, but not both.') 483 success = False 484 if not options.remote and (not options.remote_1 or not options.remote_2): 485 logging.error('You must specify the remote device(s) to use: either a ' 486 'single device with --remote, or else two devices with ' 487 '--remote-1 and --remote-2.') 488 success = False 489 if options.remote and options.remote_1 and options.remote_2: 490 logging.error('Specify either one remote device with --remote, or two ' 491 'devices with --remote-1 and --remote-2, but not both.') 492 success = False 493 if len(iteration_nums) > 1 and not perf_keys: 494 logging.error('You should only specify multiple iteration numbers ' 495 'if you\'re specifying a custom list of perf keys to ' 496 'evaluate.') 497 success = False 498 if (options.perf_keys and len(iteration_nums) > 1 and 499 len(options.perf_keys.split(',')) > len(iteration_nums)): 500 logging.error('You specified %d custom perf keys, but only %d ' 501 'iteration numbers.', len(options.perf_keys.split(',')), 502 len(iteration_nums)) 503 success = False 504 return success 505 506 507 def main(): 508 options = parse_options() 509 510 log_level = logging.DEBUG if options.verbose else logging.INFO 511 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', 512 level=log_level) 513 514 iteration_nums = [int(i) for i in options.iterations.split(',')] 515 perf_keys = options.perf_keys.split(',') if options.perf_keys else None 516 517 # Verify there are no errors in the specified command-line options. 518 if not verify_command_line_options(options, iteration_nums, perf_keys): 519 return 1 520 521 # Clean up any old results that will be overwritten. 522 result_dir = options.experiment_name + '_results' 523 if os.path.isdir(result_dir): 524 shutil.rmtree(result_dir) 525 result_file = options.experiment_name + '_results.csv' 526 if os.path.isfile(result_file): 527 os.remove(result_file) 528 529 if options.remote: 530 remote_1, remote_2 = options.remote, options.remote 531 else: 532 remote_1, remote_2 = options.remote_1, options.remote_2 533 534 if options.board: 535 board_1, board_2 = options.board, options.board 536 else: 537 board_1, board_2 = options.board_1, options.board_2 538 539 report_file, perf_keys_requested = invoke_crosperf( 540 options.crosperf, result_dir, options.experiment_name, board_1, board_2, 541 remote_1, remote_2, iteration_nums, perf_keys, options.image_1, 542 options.image_2, options.image_1_name, options.image_2_name) 543 generate_results(report_file, result_file, perf_keys_requested) 544 545 return 0 546 547 548 if __name__ == '__main__': 549 sys.exit(main()) 550