1 #!/usr/bin/env python 2 # Copyright 2016 the V8 project authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 ''' 6 Usage: runtime-call-stats.py [-h] <command> ... 7 8 Optional arguments: 9 -h, --help show this help message and exit 10 11 Commands: 12 run run chrome with --runtime-call-stats and generate logs 13 stats process logs and print statistics 14 json process logs from several versions and generate JSON 15 help help information 16 17 For each command, you can try ./runtime-call-stats.py help command. 18 ''' 19 20 import argparse 21 import json 22 import os 23 import re 24 import shutil 25 import subprocess 26 import sys 27 import tempfile 28 import operator 29 30 import numpy 31 import scipy 32 import scipy.stats 33 from math import sqrt 34 35 36 # Run benchmarks. 37 38 def print_command(cmd_args): 39 def fix_for_printing(arg): 40 m = re.match(r'^--([^=]+)=(.*)$', arg) 41 if m and (' ' in m.group(2) or m.group(2).startswith('-')): 42 arg = "--{}='{}'".format(m.group(1), m.group(2)) 43 elif ' ' in arg: 44 arg = "'{}'".format(arg) 45 return arg 46 print " ".join(map(fix_for_printing, cmd_args)) 47 48 49 def start_replay_server(args, sites): 50 with tempfile.NamedTemporaryFile(prefix='callstats-inject-', suffix='.js', 51 mode='wt', delete=False) as f: 52 injection = f.name 53 generate_injection(f, sites, args.refresh) 54 http_port = 4080 + args.port_offset 55 https_port = 4443 + args.port_offset 56 cmd_args = [ 57 args.replay_bin, 58 "--port=%s" % http_port, 59 "--ssl_port=%s" % https_port, 60 "--no-dns_forwarding", 61 "--use_closest_match", 62 "--no-diff_unknown_requests", 63 "--inject_scripts=deterministic.js,{}".format(injection), 64 args.replay_wpr, 65 ] 66 print "=" * 80 67 print_command(cmd_args) 68 with open(os.devnull, 'w') as null: 69 server = subprocess.Popen(cmd_args, stdout=null, stderr=null) 70 print "RUNNING REPLAY SERVER: %s with PID=%s" % (args.replay_bin, server.pid) 71 print "=" * 80 72 return {'process': server, 'injection': injection} 73 74 75 def stop_replay_server(server): 76 print("SHUTTING DOWN REPLAY SERVER %s" % server['process'].pid) 77 server['process'].terminate() 78 os.remove(server['injection']) 79 80 81 def generate_injection(f, sites, refreshes=0): 82 print >> f, """\ 83 (function() { 84 var s = window.sessionStorage.getItem("refreshCounter"); 85 var refreshTotal = """, refreshes, """; 86 var refreshCounter = s ? parseInt(s) : refreshTotal; 87 var refreshId = refreshTotal - refreshCounter; 88 if (refreshCounter > 0) { 89 window.sessionStorage.setItem("refreshCounter", refreshCounter-1); 90 } 91 function match(url, item) { 92 if ('regexp' in item) { return url.match(item.regexp) !== null }; 93 var url_wanted = item.url; 94 /* Allow automatic redirections from http to https. */ 95 if (url_wanted.startsWith("http://") && url.startsWith("https://")) { 96 url_wanted = "https://" + url_wanted.substr(7); 97 } 98 return url.startsWith(url_wanted); 99 }; 100 function onLoad(url) { 101 for (var item of sites) { 102 if (!match(url, item)) continue; 103 var timeout = 'timeline' in item ? 2000 * item.timeline 104 : 'timeout' in item ? 1000 * (item.timeout - 3) 105 : 10000; 106 console.log("Setting time out of " + timeout + " for: " + url); 107 window.setTimeout(function() { 108 console.log("Time is out for: " + url); 109 var msg = "STATS: (" + refreshId + ") " + url; 110 %GetAndResetRuntimeCallStats(1, msg); 111 if (refreshCounter > 0) { 112 console.log( 113 "Refresh counter is " + refreshCounter + ", refreshing: " + url); 114 window.location.reload(); 115 } 116 }, timeout); 117 return; 118 } 119 console.log("Ignoring: " + url); 120 }; 121 var sites = 122 """, json.dumps(sites), """; 123 onLoad(window.location.href); 124 })();""" 125 126 127 def run_site(site, domain, args, timeout=None): 128 print "="*80 129 print "RUNNING DOMAIN %s" % domain 130 print "="*80 131 result_template = "{domain}#{count}.txt" if args.repeat else "{domain}.txt" 132 count = 0 133 if timeout is None: timeout = args.timeout 134 if args.replay_wpr: 135 timeout *= 1 + args.refresh 136 timeout += 1 137 retries_since_good_run = 0 138 while count == 0 or args.repeat is not None and count < args.repeat: 139 count += 1 140 result = result_template.format(domain=domain, count=count) 141 retries = 0 142 while args.retries is None or retries < args.retries: 143 retries += 1 144 try: 145 if args.user_data_dir: 146 user_data_dir = args.user_data_dir 147 else: 148 user_data_dir = tempfile.mkdtemp(prefix="chr_") 149 js_flags = "--runtime-call-stats" 150 if args.replay_wpr: js_flags += " --allow-natives-syntax" 151 if args.js_flags: js_flags += " " + args.js_flags 152 chrome_flags = [ 153 "--no-default-browser-check", 154 "--no-sandbox", 155 "--disable-translate", 156 "--js-flags={}".format(js_flags), 157 "--no-first-run", 158 "--user-data-dir={}".format(user_data_dir), 159 ] 160 if args.replay_wpr: 161 http_port = 4080 + args.port_offset 162 https_port = 4443 + args.port_offset 163 chrome_flags += [ 164 "--host-resolver-rules=MAP *:80 localhost:%s, " \ 165 "MAP *:443 localhost:%s, " \ 166 "EXCLUDE localhost" % ( 167 http_port, https_port), 168 "--ignore-certificate-errors", 169 "--disable-seccomp-sandbox", 170 "--disable-web-security", 171 "--reduce-security-for-testing", 172 "--allow-insecure-localhost", 173 ] 174 else: 175 chrome_flags += [ 176 "--single-process", 177 ] 178 if args.chrome_flags: 179 chrome_flags += args.chrome_flags.split() 180 cmd_args = [ 181 "timeout", str(timeout), 182 args.with_chrome 183 ] + chrome_flags + [ site ] 184 print "- " * 40 185 print_command(cmd_args) 186 print "- " * 40 187 with open(result, "wt") as f: 188 with open(args.log_stderr or os.devnull, 'at') as err: 189 status = subprocess.call(cmd_args, stdout=f, stderr=err) 190 # 124 means timeout killed chrome, 0 means the user was bored first! 191 # If none of these two happened, then chrome apparently crashed, so 192 # it must be called again. 193 if status != 124 and status != 0: 194 print("CHROME CRASHED, REPEATING RUN"); 195 continue 196 # If the stats file is empty, chrome must be called again. 197 if os.path.isfile(result) and os.path.getsize(result) > 0: 198 if args.print_url: 199 with open(result, "at") as f: 200 print >> f 201 print >> f, "URL: {}".format(site) 202 retries_since_good_run = 0 203 break 204 if retries_since_good_run < 6: 205 timeout += 2 ** retries_since_good_run 206 retries_since_good_run += 1 207 print("EMPTY RESULT, REPEATING RUN ({})".format( 208 retries_since_good_run)); 209 finally: 210 if not args.user_data_dir: 211 shutil.rmtree(user_data_dir) 212 213 214 def read_sites_file(args): 215 try: 216 sites = [] 217 try: 218 with open(args.sites_file, "rt") as f: 219 for item in json.load(f): 220 if 'timeout' not in item: 221 # This is more-or-less arbitrary. 222 item['timeout'] = int(1.5 * item['timeline'] + 7) 223 if item['timeout'] > args.timeout: item['timeout'] = args.timeout 224 sites.append(item) 225 except ValueError: 226 with open(args.sites_file, "rt") as f: 227 for line in f: 228 line = line.strip() 229 if not line or line.startswith('#'): continue 230 sites.append({'url': line, 'timeout': args.timeout}) 231 return sites 232 except IOError as e: 233 args.error("Cannot read from {}. {}.".format(args.sites_file, e.strerror)) 234 sys.exit(1) 235 236 237 def do_run(args): 238 # Determine the websites to benchmark. 239 if args.sites_file: 240 sites = read_sites_file(args) 241 else: 242 sites = [{'url': site, 'timeout': args.timeout} for site in args.sites] 243 # Disambiguate domains, if needed. 244 L = [] 245 domains = {} 246 for item in sites: 247 site = item['url'] 248 domain = None 249 if args.domain: 250 domain = args.domain 251 elif 'domain' in item: 252 domain = item['domain'] 253 else: 254 m = re.match(r'^(https?://)?([^/]+)(/.*)?$', site) 255 if not m: 256 args.error("Invalid URL {}.".format(site)) 257 continue 258 domain = m.group(2) 259 entry = [site, domain, None, item['timeout']] 260 if domain not in domains: 261 domains[domain] = entry 262 else: 263 if not isinstance(domains[domain], int): 264 domains[domain][2] = 1 265 domains[domain] = 1 266 domains[domain] += 1 267 entry[2] = domains[domain] 268 L.append(entry) 269 replay_server = start_replay_server(args, sites) if args.replay_wpr else None 270 try: 271 # Run them. 272 for site, domain, count, timeout in L: 273 if count is not None: domain = "{}%{}".format(domain, count) 274 print site, domain, timeout 275 run_site(site, domain, args, timeout) 276 finally: 277 if replay_server: 278 stop_replay_server(replay_server) 279 280 281 # Calculate statistics. 282 283 def statistics(data): 284 N = len(data) 285 average = numpy.average(data) 286 median = numpy.median(data) 287 low = numpy.min(data) 288 high= numpy.max(data) 289 if N > 1: 290 # evaluate sample variance by setting delta degrees of freedom (ddof) to 291 # 1. The degree used in calculations is N - ddof 292 stddev = numpy.std(data, ddof=1) 293 # Get the endpoints of the range that contains 95% of the distribution 294 t_bounds = scipy.stats.t.interval(0.95, N-1) 295 #assert abs(t_bounds[0] + t_bounds[1]) < 1e-6 296 # sum mean to the confidence interval 297 ci = { 298 'abs': t_bounds[1] * stddev / sqrt(N), 299 'low': average + t_bounds[0] * stddev / sqrt(N), 300 'high': average + t_bounds[1] * stddev / sqrt(N) 301 } 302 else: 303 stddev = 0 304 ci = { 'abs': 0, 'low': average, 'high': average } 305 if abs(stddev) > 0.0001 and abs(average) > 0.0001: 306 ci['perc'] = t_bounds[1] * stddev / sqrt(N) / average * 100 307 else: 308 ci['perc'] = 0 309 return { 'samples': N, 'average': average, 'median': median, 310 'stddev': stddev, 'min': low, 'max': high, 'ci': ci } 311 312 313 def read_stats(path, domain, args): 314 groups = []; 315 if args.aggregate: 316 groups = [ 317 ('Group-IC', re.compile(".*IC.*")), 318 ('Group-Optimize', 319 re.compile("StackGuard|.*Optimize.*|.*Deoptimize.*|Recompile.*")), 320 ('Group-Compile', re.compile(".*Compile.*")), 321 ('Group-Parse', re.compile(".*Parse.*")), 322 ('Group-Callback', re.compile(".*Callback.*")), 323 ('Group-API', re.compile(".*API.*")), 324 ('Group-GC', re.compile("GC|AllocateInTargetSpace")), 325 ('Group-JavaScript', re.compile("JS_Execution")), 326 ('Group-Runtime', re.compile(".*"))] 327 with open(path, "rt") as f: 328 # Process the whole file and sum repeating entries. 329 entries = { 'Sum': {'time': 0, 'count': 0} } 330 for group_name, regexp in groups: 331 entries[group_name] = { 'time': 0, 'count': 0 } 332 for line in f: 333 line = line.strip() 334 # Discard headers and footers. 335 if not line: continue 336 if line.startswith("Runtime Function"): continue 337 if line.startswith("===="): continue 338 if line.startswith("----"): continue 339 if line.startswith("URL:"): continue 340 if line.startswith("STATS:"): continue 341 # We have a regular line. 342 fields = line.split() 343 key = fields[0] 344 time = float(fields[1].replace("ms", "")) 345 count = int(fields[3]) 346 if key not in entries: entries[key] = { 'time': 0, 'count': 0 } 347 entries[key]['time'] += time 348 entries[key]['count'] += count 349 # We calculate the sum, if it's not the "total" line. 350 if key != "Total": 351 entries['Sum']['time'] += time 352 entries['Sum']['count'] += count 353 for group_name, regexp in groups: 354 if not regexp.match(key): continue 355 entries[group_name]['time'] += time 356 entries[group_name]['count'] += count 357 break 358 # Append the sums as single entries to domain. 359 for key in entries : 360 if key not in domain: domain[key] = { 'time_list': [], 'count_list': [] } 361 domain[key]['time_list'].append(entries[key]['time']) 362 domain[key]['count_list'].append(entries[key]['count']) 363 364 365 def print_stats(S, args): 366 # Sort by ascending/descending time average, then by ascending/descending 367 # count average, then by ascending name. 368 def sort_asc_func(item): 369 return (item[1]['time_stat']['average'], 370 item[1]['count_stat']['average'], 371 item[0]) 372 def sort_desc_func(item): 373 return (-item[1]['time_stat']['average'], 374 -item[1]['count_stat']['average'], 375 item[0]) 376 # Sorting order is in the commend-line arguments. 377 sort_func = sort_asc_func if args.sort == "asc" else sort_desc_func 378 # Possibly limit how many elements to print. 379 L = [item for item in sorted(S.items(), key=sort_func) 380 if item[0] not in ["Total", "Sum"]] 381 N = len(L) 382 if args.limit == 0: 383 low, high = 0, N 384 elif args.sort == "desc": 385 low, high = 0, args.limit 386 else: 387 low, high = N-args.limit, N 388 # How to print entries. 389 def print_entry(key, value): 390 def stats(s, units=""): 391 conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc']) 392 return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf) 393 print "{:>50s} {} {}".format( 394 key, 395 stats(value['time_stat'], units="ms"), 396 stats(value['count_stat']) 397 ) 398 # Print and calculate partial sums, if necessary. 399 for i in range(low, high): 400 print_entry(*L[i]) 401 if args.totals and args.limit != 0 and not args.aggregate: 402 if i == low: 403 partial = { 'time_list': [0] * len(L[i][1]['time_list']), 404 'count_list': [0] * len(L[i][1]['count_list']) } 405 assert len(partial['time_list']) == len(L[i][1]['time_list']) 406 assert len(partial['count_list']) == len(L[i][1]['count_list']) 407 for j, v in enumerate(L[i][1]['time_list']): 408 partial['time_list'][j] += v 409 for j, v in enumerate(L[i][1]['count_list']): 410 partial['count_list'][j] += v 411 # Print totals, if necessary. 412 if args.totals: 413 print '-' * 80 414 if args.limit != 0 and not args.aggregate: 415 partial['time_stat'] = statistics(partial['time_list']) 416 partial['count_stat'] = statistics(partial['count_list']) 417 print_entry("Partial", partial) 418 print_entry("Sum", S["Sum"]) 419 print_entry("Total", S["Total"]) 420 421 422 def do_stats(args): 423 domains = {} 424 for path in args.logfiles: 425 filename = os.path.basename(path) 426 m = re.match(r'^([^#]+)(#.*)?$', filename) 427 domain = m.group(1) 428 if domain not in domains: domains[domain] = {} 429 read_stats(path, domains[domain], args) 430 if args.aggregate: 431 create_total_page_stats(domains, args) 432 for i, domain in enumerate(sorted(domains)): 433 if len(domains) > 1: 434 if i > 0: print 435 print "{}:".format(domain) 436 print '=' * 80 437 domain_stats = domains[domain] 438 for key in domain_stats: 439 domain_stats[key]['time_stat'] = \ 440 statistics(domain_stats[key]['time_list']) 441 domain_stats[key]['count_stat'] = \ 442 statistics(domain_stats[key]['count_list']) 443 print_stats(domain_stats, args) 444 445 446 # Create a Total page with all entries summed up. 447 def create_total_page_stats(domains, args): 448 total = {} 449 def sum_up(parent, key, other): 450 sums = parent[key] 451 for i, item in enumerate(other[key]): 452 if i >= len(sums): 453 sums.extend([0] * (i - len(sums) + 1)) 454 if item is not None: 455 sums[i] += item 456 # Sum up all the entries/metrics from all domains 457 for domain, entries in domains.items(): 458 for key, domain_stats in entries.items(): 459 if key not in total: 460 total[key] = {} 461 total[key]['time_list'] = list(domain_stats['time_list']) 462 total[key]['count_list'] = list(domain_stats['count_list']) 463 else: 464 sum_up(total[key], 'time_list', domain_stats) 465 sum_up(total[key], 'count_list', domain_stats) 466 # Add a new "Total" page containing the summed up metrics. 467 domains['Total'] = total 468 469 470 # Generate JSON file. 471 472 def do_json(args): 473 versions = {} 474 for path in args.logdirs: 475 if os.path.isdir(path): 476 for root, dirs, files in os.walk(path): 477 version = os.path.basename(root) 478 if version not in versions: versions[version] = {} 479 for filename in files: 480 if filename.endswith(".txt"): 481 m = re.match(r'^([^#]+)(#.*)?\.txt$', filename) 482 domain = m.group(1) 483 if domain not in versions[version]: versions[version][domain] = {} 484 read_stats(os.path.join(root, filename), 485 versions[version][domain], args) 486 for version, domains in versions.items(): 487 if args.aggregate: 488 create_total_page_stats(domains, args) 489 for domain, entries in domains.items(): 490 stats = [] 491 for name, value in entries.items(): 492 # We don't want the calculated sum in the JSON file. 493 if name == "Sum": continue 494 entry = [name] 495 for x in ['time_list', 'count_list']: 496 s = statistics(entries[name][x]) 497 entry.append(round(s['average'], 1)) 498 entry.append(round(s['ci']['abs'], 1)) 499 entry.append(round(s['ci']['perc'], 2)) 500 stats.append(entry) 501 domains[domain] = stats 502 print json.dumps(versions, separators=(',', ':')) 503 504 505 # Help. 506 507 def do_help(parser, subparsers, args): 508 if args.help_cmd: 509 if args.help_cmd in subparsers: 510 subparsers[args.help_cmd].print_help() 511 else: 512 args.error("Unknown command '{}'".format(args.help_cmd)) 513 else: 514 parser.print_help() 515 516 517 # Main program, parse command line and execute. 518 519 def coexist(*l): 520 given = sum(1 for x in l if x) 521 return given == 0 or given == len(l) 522 523 def main(): 524 parser = argparse.ArgumentParser() 525 subparser_adder = parser.add_subparsers(title="commands", dest="command", 526 metavar="<command>") 527 subparsers = {} 528 # Command: run. 529 subparsers["run"] = subparser_adder.add_parser( 530 "run", help="run --help") 531 subparsers["run"].set_defaults( 532 func=do_run, error=subparsers["run"].error) 533 subparsers["run"].add_argument( 534 "--chrome-flags", type=str, default="", 535 help="specify additional chrome flags") 536 subparsers["run"].add_argument( 537 "--js-flags", type=str, default="", 538 help="specify additional V8 flags") 539 subparsers["run"].add_argument( 540 "--domain", type=str, default="", 541 help="specify the output file domain name") 542 subparsers["run"].add_argument( 543 "--no-url", dest="print_url", action="store_false", default=True, 544 help="do not include url in statistics file") 545 subparsers["run"].add_argument( 546 "-n", "--repeat", type=int, metavar="<num>", 547 help="specify iterations for each website (default: once)") 548 subparsers["run"].add_argument( 549 "-k", "--refresh", type=int, metavar="<num>", default=0, 550 help="specify refreshes for each iteration (default: 0)") 551 subparsers["run"].add_argument( 552 "--replay-wpr", type=str, metavar="<path>", 553 help="use the specified web page replay (.wpr) archive") 554 subparsers["run"].add_argument( 555 "--replay-bin", type=str, metavar="<path>", 556 help="specify the replay.py script typically located in " \ 557 "$CHROMIUM/src/third_party/webpagereplay/replay.py") 558 subparsers["run"].add_argument( 559 "-r", "--retries", type=int, metavar="<num>", 560 help="specify retries if website is down (default: forever)") 561 subparsers["run"].add_argument( 562 "-f", "--sites-file", type=str, metavar="<path>", 563 help="specify file containing benchmark websites") 564 subparsers["run"].add_argument( 565 "-t", "--timeout", type=int, metavar="<seconds>", default=60, 566 help="specify seconds before chrome is killed") 567 subparsers["run"].add_argument( 568 "-p", "--port-offset", type=int, metavar="<offset>", default=0, 569 help="specify the offset for the replay server's default ports") 570 subparsers["run"].add_argument( 571 "-u", "--user-data-dir", type=str, metavar="<path>", 572 help="specify user data dir (default is temporary)") 573 subparsers["run"].add_argument( 574 "-c", "--with-chrome", type=str, metavar="<path>", 575 default="/usr/bin/google-chrome", 576 help="specify chrome executable to use") 577 subparsers["run"].add_argument( 578 "-l", "--log-stderr", type=str, metavar="<path>", 579 help="specify where chrome's stderr should go (default: /dev/null)") 580 subparsers["run"].add_argument( 581 "sites", type=str, metavar="<URL>", nargs="*", 582 help="specify benchmark website") 583 # Command: stats. 584 subparsers["stats"] = subparser_adder.add_parser( 585 "stats", help="stats --help") 586 subparsers["stats"].set_defaults( 587 func=do_stats, error=subparsers["stats"].error) 588 subparsers["stats"].add_argument( 589 "-l", "--limit", type=int, metavar="<num>", default=0, 590 help="limit how many items to print (default: none)") 591 subparsers["stats"].add_argument( 592 "-s", "--sort", choices=["asc", "desc"], default="asc", 593 help="specify sorting order (default: ascending)") 594 subparsers["stats"].add_argument( 595 "-n", "--no-total", dest="totals", action="store_false", default=True, 596 help="do not print totals") 597 subparsers["stats"].add_argument( 598 "logfiles", type=str, metavar="<logfile>", nargs="*", 599 help="specify log files to parse") 600 subparsers["stats"].add_argument( 601 "--aggregate", dest="aggregate", action="store_true", default=False, 602 help="Create aggregated entries. Adds Group-* entries at the toplevel. " + 603 "Additionally creates a Total page with all entries.") 604 # Command: json. 605 subparsers["json"] = subparser_adder.add_parser( 606 "json", help="json --help") 607 subparsers["json"].set_defaults( 608 func=do_json, error=subparsers["json"].error) 609 subparsers["json"].add_argument( 610 "logdirs", type=str, metavar="<logdir>", nargs="*", 611 help="specify directories with log files to parse") 612 subparsers["json"].add_argument( 613 "--aggregate", dest="aggregate", action="store_true", default=False, 614 help="Create aggregated entries. Adds Group-* entries at the toplevel. " + 615 "Additionally creates a Total page with all entries.") 616 # Command: help. 617 subparsers["help"] = subparser_adder.add_parser( 618 "help", help="help information") 619 subparsers["help"].set_defaults( 620 func=lambda args: do_help(parser, subparsers, args), 621 error=subparsers["help"].error) 622 subparsers["help"].add_argument( 623 "help_cmd", type=str, metavar="<command>", nargs="?", 624 help="command for which to display help") 625 # Execute the command. 626 args = parser.parse_args() 627 setattr(args, 'script_path', os.path.dirname(sys.argv[0])) 628 if args.command == "run" and coexist(args.sites_file, args.sites): 629 args.error("use either option --sites-file or site URLs") 630 sys.exit(1) 631 elif args.command == "run" and not coexist(args.replay_wpr, args.replay_bin): 632 args.error("options --replay-wpr and --replay-bin must be used together") 633 sys.exit(1) 634 else: 635 args.func(args) 636 637 if __name__ == "__main__": 638 sys.exit(main()) 639