1 #!/usr/bin/env python 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Archives or replays webpages and creates SKPs in a Google Storage location. 7 8 To archive webpages and store SKP files (archives should be rarely updated): 9 10 cd skia 11 python tools/skp/webpages_playback.py --data_store=gs://rmistry --record \ 12 --page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \ 13 --browser_executable=/tmp/chromium/out/Release/chrome 14 15 The above command uses Google Storage bucket 'rmistry' to download needed files. 16 17 To replay archived webpages and re-generate SKP files (should be run whenever 18 SkPicture.PICTURE_VERSION changes): 19 20 cd skia 21 python tools/skp/webpages_playback.py --data_store=gs://rmistry \ 22 --page_sets=all --skia_tools=/home/default/trunk/out/Debug/ \ 23 --browser_executable=/tmp/chromium/out/Release/chrome 24 25 26 Specify the --page_sets flag (default value is 'all') to pick a list of which 27 webpages should be archived and/or replayed. Eg: 28 29 --page_sets=tools/skp/page_sets/skia_yahooanswers_desktop.py,\ 30 tools/skp/page_sets/skia_googlecalendar_nexus10.py 31 32 The --browser_executable flag should point to the browser binary you want to use 33 to capture archives and/or capture SKP files. Majority of the time it should be 34 a newly built chrome binary. 35 36 The --data_store flag controls where the needed artifacts are downloaded from. 37 It also controls where the generated artifacts, such as recorded webpages and 38 resulting skp renderings, are uploaded to. URLs with scheme 'gs://' use Google 39 Storage. Otherwise use local filesystem. 40 41 The --upload=True flag means generated artifacts will be 42 uploaded or copied to the location specified by --data_store. (default value is 43 False if not specified). 44 45 The --non-interactive flag controls whether the script will prompt the user 46 (default value is False if not specified). 47 48 The --skia_tools flag if specified will allow this script to run 49 debugger, render_pictures, and render_pdfs on the captured 50 SKP(s). The tools are run after all SKPs are succesfully captured to make sure 51 they can be added to the buildbots with no breakages. 52 """ 53 54 import glob 55 import optparse 56 import os 57 import posixpath 58 import shutil 59 import subprocess 60 import sys 61 import tempfile 62 import time 63 import traceback 64 65 66 ROOT_PLAYBACK_DIR_NAME = 'playback' 67 SKPICTURES_DIR_NAME = 'skps' 68 69 GS_PREFIX = 'gs://' 70 71 PARTNERS_GS_BUCKET = 'gs://chrome-partner-telemetry' 72 73 # Local archive and SKP directories. 74 LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR = os.path.join( 75 os.path.abspath(os.path.dirname(__file__)), 'page_sets', 'data') 76 TMP_SKP_DIR = tempfile.mkdtemp() 77 78 # Name of the SKP benchmark 79 SKP_BENCHMARK = 'skpicture_printer' 80 81 # The max base name length of Skp files. 82 MAX_SKP_BASE_NAME_LEN = 31 83 84 # Dictionary of device to platform prefixes for SKP files. 85 DEVICE_TO_PLATFORM_PREFIX = { 86 'desktop': 'desk', 87 'galaxynexus': 'mobi', 88 'nexus10': 'tabl' 89 } 90 91 # How many times the record_wpr binary should be retried. 92 RETRY_RECORD_WPR_COUNT = 5 93 # How many times the run_benchmark binary should be retried. 94 RETRY_RUN_MEASUREMENT_COUNT = 3 95 96 X11_DISPLAY = os.getenv('DISPLAY', ':0') 97 98 # Path to Chromium's page sets. 99 CHROMIUM_PAGE_SETS_PATH = os.path.join('tools', 'perf', 'page_sets') 100 101 # Dictionary of supported Chromium page sets to their file prefixes. 102 CHROMIUM_PAGE_SETS_TO_PREFIX = { 103 'key_mobile_sites_smooth.py': 'keymobi', 104 'top_25_smooth.py': 'top25desk', 105 } 106 107 PAGE_SETS_TO_EXCLUSIONS = { 108 # See skbug.com/7348 109 'key_mobile_sites_smooth.py': '"(digg|worldjournal)"', 110 # See skbug.com/7421 111 'top_25_smooth.py': '"(mail\.google\.com)"', 112 } 113 114 115 def remove_prefix(s, prefix): 116 if s.startswith(prefix): 117 return s[len(prefix):] 118 return s 119 120 121 class SkPicturePlayback(object): 122 """Class that archives or replays webpages and creates SKPs.""" 123 124 def __init__(self, parse_options): 125 """Constructs a SkPicturePlayback BuildStep instance.""" 126 assert parse_options.browser_executable, 'Must specify --browser_executable' 127 self._browser_executable = parse_options.browser_executable 128 self._browser_args = '--disable-setuid-sandbox' 129 if parse_options.browser_extra_args: 130 self._browser_args = '%s %s' % ( 131 self._browser_args, parse_options.browser_extra_args) 132 133 self._chrome_page_sets_path = os.path.join(parse_options.chrome_src_path, 134 CHROMIUM_PAGE_SETS_PATH) 135 self._all_page_sets_specified = parse_options.page_sets == 'all' 136 self._page_sets = self._ParsePageSets(parse_options.page_sets) 137 138 self._record = parse_options.record 139 self._skia_tools = parse_options.skia_tools 140 self._non_interactive = parse_options.non_interactive 141 self._upload = parse_options.upload 142 self._skp_prefix = parse_options.skp_prefix 143 data_store_location = parse_options.data_store 144 if data_store_location.startswith(GS_PREFIX): 145 self.gs = GoogleStorageDataStore(data_store_location) 146 else: 147 self.gs = LocalFileSystemDataStore(data_store_location) 148 self._upload_to_partner_bucket = parse_options.upload_to_partner_bucket 149 self._alternate_upload_dir = parse_options.alternate_upload_dir 150 self._telemetry_binaries_dir = os.path.join(parse_options.chrome_src_path, 151 'tools', 'perf') 152 self._catapult_dir = os.path.join(parse_options.chrome_src_path, 153 'third_party', 'catapult') 154 155 self._local_skp_dir = os.path.join( 156 parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, SKPICTURES_DIR_NAME) 157 self._local_record_webpages_archive_dir = os.path.join( 158 parse_options.output_dir, ROOT_PLAYBACK_DIR_NAME, 'webpages_archive') 159 160 # List of SKP files generated by this script. 161 self._skp_files = [] 162 163 def _ParsePageSets(self, page_sets): 164 if not page_sets: 165 raise ValueError('Must specify at least one page_set!') 166 elif self._all_page_sets_specified: 167 # Get everything from the page_sets directory. 168 page_sets_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 169 'page_sets') 170 ps = [os.path.join(page_sets_dir, page_set) 171 for page_set in os.listdir(page_sets_dir) 172 if not os.path.isdir(os.path.join(page_sets_dir, page_set)) and 173 page_set.endswith('.py')] 174 chromium_ps = [ 175 os.path.join(self._chrome_page_sets_path, cr_page_set) 176 for cr_page_set in CHROMIUM_PAGE_SETS_TO_PREFIX] 177 ps.extend(chromium_ps) 178 elif '*' in page_sets: 179 # Explode and return the glob. 180 ps = glob.glob(page_sets) 181 else: 182 ps = page_sets.split(',') 183 ps.sort() 184 return ps 185 186 def _IsChromiumPageSet(self, page_set): 187 """Returns true if the specified page set is a Chromium page set.""" 188 return page_set.startswith(self._chrome_page_sets_path) 189 190 def Run(self): 191 """Run the SkPicturePlayback BuildStep.""" 192 193 # Delete any left over data files in the data directory. 194 for archive_file in glob.glob( 195 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 'skia_*')): 196 os.remove(archive_file) 197 198 # Create the required local storage directories. 199 self._CreateLocalStorageDirs() 200 201 # Start the timer. 202 start_time = time.time() 203 204 # Loop through all page_sets. 205 for page_set in self._page_sets: 206 207 page_set_basename = os.path.basename(page_set).split('.')[0] 208 page_set_json_name = page_set_basename + '.json' 209 wpr_data_file = ( 210 page_set.split(os.path.sep)[-1].split('.')[0] + '_000.wprgo') 211 page_set_dir = os.path.dirname(page_set) 212 213 if self._IsChromiumPageSet(page_set): 214 print 'Using Chromium\'s captured archives for Chromium\'s page sets.' 215 elif self._record: 216 # Create an archive of the specified webpages if '--record=True' is 217 # specified. 218 record_wpr_cmd = ( 219 'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir), 220 'DISPLAY=%s' % X11_DISPLAY, 221 os.path.join(self._telemetry_binaries_dir, 'record_wpr'), 222 '--extra-browser-args="%s"' % self._browser_args, 223 '--browser=exact', 224 '--browser-executable=%s' % self._browser_executable, 225 '--use-wpr-go', 226 '%s_page_set' % page_set_basename, 227 '--page-set-base-dir=%s' % page_set_dir 228 ) 229 for _ in range(RETRY_RECORD_WPR_COUNT): 230 try: 231 subprocess.check_call(' '.join(record_wpr_cmd), shell=True) 232 233 # Copy over the created archive into the local webpages archive 234 # directory. 235 shutil.copy( 236 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, wpr_data_file), 237 self._local_record_webpages_archive_dir) 238 shutil.copy( 239 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 240 page_set_json_name), 241 self._local_record_webpages_archive_dir) 242 243 # Break out of the retry loop since there were no errors. 244 break 245 except Exception: 246 # There was a failure continue with the loop. 247 traceback.print_exc() 248 else: 249 # If we get here then record_wpr did not succeed and thus did not 250 # break out of the loop. 251 raise Exception('record_wpr failed for page_set: %s' % page_set) 252 253 else: 254 # Get the webpages archive so that it can be replayed. 255 self._DownloadWebpagesArchive(wpr_data_file, page_set_json_name) 256 257 run_benchmark_cmd = [ 258 'PYTHONPATH=%s:%s:$PYTHONPATH' % (page_set_dir, self._catapult_dir), 259 'DISPLAY=%s' % X11_DISPLAY, 260 'timeout', '1800', 261 os.path.join(self._telemetry_binaries_dir, 'run_benchmark'), 262 '--extra-browser-args="%s"' % self._browser_args, 263 '--browser=exact', 264 '--browser-executable=%s' % self._browser_executable, 265 SKP_BENCHMARK, 266 '--page-set-name=%s' % page_set_basename, 267 '--page-set-base-dir=%s' % page_set_dir, 268 '--skp-outdir=%s' % TMP_SKP_DIR, 269 '--also-run-disabled-tests', 270 ] 271 272 exclusions = PAGE_SETS_TO_EXCLUSIONS.get(os.path.basename(page_set)) 273 if exclusions: 274 run_benchmark_cmd.append('--story-filter-exclude=' + exclusions) 275 276 for _ in range(RETRY_RUN_MEASUREMENT_COUNT): 277 try: 278 print '\n\n=======Capturing SKP of %s=======\n\n' % page_set 279 subprocess.check_call(' '.join(run_benchmark_cmd), shell=True) 280 except subprocess.CalledProcessError: 281 # There was a failure continue with the loop. 282 traceback.print_exc() 283 print '\n\n=======Retrying %s=======\n\n' % page_set 284 time.sleep(10) 285 continue 286 287 # Rename generated SKP files into more descriptive names. 288 self._RenameSkpFiles(page_set) 289 # Break out of the retry loop since there were no errors. 290 break 291 else: 292 # If we get here then run_benchmark did not succeed and thus did not 293 # break out of the loop. 294 raise Exception('run_benchmark failed for page_set: %s' % page_set) 295 296 print '\n\n=======Capturing SKP files took %s seconds=======\n\n' % ( 297 time.time() - start_time) 298 299 if self._skia_tools: 300 render_pictures_cmd = [ 301 os.path.join(self._skia_tools, 'render_pictures'), 302 '-r', self._local_skp_dir 303 ] 304 render_pdfs_cmd = [ 305 os.path.join(self._skia_tools, 'render_pdfs'), 306 '-r', self._local_skp_dir 307 ] 308 309 for tools_cmd in (render_pictures_cmd, render_pdfs_cmd): 310 print '\n\n=======Running %s=======' % ' '.join(tools_cmd) 311 subprocess.check_call(tools_cmd) 312 313 if not self._non_interactive: 314 print '\n\n=======Running debugger=======' 315 os.system('%s %s' % (os.path.join(self._skia_tools, 'debugger'), 316 self._local_skp_dir)) 317 318 print '\n\n' 319 320 if self._upload: 321 print '\n\n=======Uploading to %s=======\n\n' % self.gs.target_type() 322 # Copy the directory structure in the root directory into Google Storage. 323 dest_dir_name = ROOT_PLAYBACK_DIR_NAME 324 if self._alternate_upload_dir: 325 dest_dir_name = self._alternate_upload_dir 326 327 self.gs.upload_dir_contents( 328 self._local_skp_dir, dest_dir=dest_dir_name) 329 330 print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % ( 331 posixpath.join(self.gs.target_name(), dest_dir_name, 332 SKPICTURES_DIR_NAME)) 333 334 else: 335 print '\n\n=======Not Uploading to %s=======\n\n' % self.gs.target_type() 336 print 'Generated resources are available in %s\n\n' % ( 337 self._local_skp_dir) 338 339 if self._upload_to_partner_bucket: 340 print '\n\n=======Uploading to Partner bucket %s =======\n\n' % ( 341 PARTNERS_GS_BUCKET) 342 partner_gs = GoogleStorageDataStore(PARTNERS_GS_BUCKET) 343 partner_gs.delete_path(SKPICTURES_DIR_NAME) 344 print 'Uploading %s to %s' % (self._local_skp_dir, SKPICTURES_DIR_NAME) 345 partner_gs.upload_dir_contents(self._local_skp_dir, SKPICTURES_DIR_NAME) 346 print '\n\n=======New SKPs have been uploaded to %s =======\n\n' % ( 347 posixpath.join(partner_gs.target_name(), SKPICTURES_DIR_NAME)) 348 349 return 0 350 351 def _GetSkiaSkpFileName(self, page_set): 352 """Returns the SKP file name for Skia page sets.""" 353 # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py 354 ps_filename = os.path.basename(page_set) 355 # skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop 356 ps_basename, _ = os.path.splitext(ps_filename) 357 # skia_yahooanswers_desktop -> skia, yahooanswers, desktop 358 _, page_name, device = ps_basename.split('_') 359 basename = '%s_%s' % (DEVICE_TO_PLATFORM_PREFIX[device], page_name) 360 return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp' 361 362 def _GetChromiumSkpFileName(self, page_set, site): 363 """Returns the SKP file name for Chromium page sets.""" 364 # /path/to/http___mobile_news_sandbox_pt0 -> http___mobile_news_sandbox_pt0 365 _, webpage = os.path.split(site) 366 # http___mobile_news_sandbox_pt0 -> mobile_news_sandbox_pt0 367 for prefix in ('http___', 'https___', 'www_'): 368 if webpage.startswith(prefix): 369 webpage = webpage[len(prefix):] 370 # /path/to/skia_yahooanswers_desktop.py -> skia_yahooanswers_desktop.py 371 ps_filename = os.path.basename(page_set) 372 # http___mobile_news_sandbox -> pagesetprefix_http___mobile_news_sandbox 373 basename = '%s_%s' % (CHROMIUM_PAGE_SETS_TO_PREFIX[ps_filename], webpage) 374 return basename[:MAX_SKP_BASE_NAME_LEN] + '.skp' 375 376 def _RenameSkpFiles(self, page_set): 377 """Rename generated SKP files into more descriptive names. 378 379 Look into the subdirectory of TMP_SKP_DIR and find the most interesting 380 .skp in there to be this page_set's representative .skp. 381 """ 382 subdirs = glob.glob(os.path.join(TMP_SKP_DIR, '*')) 383 for site in subdirs: 384 if self._IsChromiumPageSet(page_set): 385 filename = self._GetChromiumSkpFileName(page_set, site) 386 else: 387 filename = self._GetSkiaSkpFileName(page_set) 388 filename = filename.lower() 389 390 if self._skp_prefix: 391 filename = '%s%s' % (self._skp_prefix, filename) 392 393 # We choose the largest .skp as the most likely to be interesting. 394 largest_skp = max(glob.glob(os.path.join(site, '*.skp')), 395 key=lambda path: os.stat(path).st_size) 396 dest = os.path.join(self._local_skp_dir, filename) 397 print 'Moving', largest_skp, 'to', dest 398 shutil.move(largest_skp, dest) 399 self._skp_files.append(filename) 400 shutil.rmtree(site) 401 402 def _CreateLocalStorageDirs(self): 403 """Creates required local storage directories for this script.""" 404 for d in (self._local_record_webpages_archive_dir, 405 self._local_skp_dir): 406 if os.path.exists(d): 407 shutil.rmtree(d) 408 os.makedirs(d) 409 410 def _DownloadWebpagesArchive(self, wpr_data_file, page_set_json_name): 411 """Downloads the webpages archive and its required page set from GS.""" 412 wpr_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 'webpages_archive', 413 wpr_data_file) 414 page_set_source = posixpath.join(ROOT_PLAYBACK_DIR_NAME, 415 'webpages_archive', 416 page_set_json_name) 417 gs = self.gs 418 if (gs.does_storage_object_exist(wpr_source) and 419 gs.does_storage_object_exist(page_set_source)): 420 gs.download_file(wpr_source, 421 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 422 wpr_data_file)) 423 gs.download_file(page_set_source, 424 os.path.join(LOCAL_REPLAY_WEBPAGES_ARCHIVE_DIR, 425 page_set_json_name)) 426 else: 427 raise Exception('%s and %s do not exist in %s!' % (gs.target_type(), 428 wpr_source, page_set_source)) 429 430 class DataStore: 431 """An abstract base class for uploading recordings to a data storage. 432 The interface emulates the google storage api.""" 433 def target_name(self): 434 raise NotImplementedError() 435 def target_type(self): 436 raise NotImplementedError() 437 def does_storage_object_exist(self, name): 438 raise NotImplementedError() 439 def download_file(self, name, local_path): 440 raise NotImplementedError() 441 def upload_dir_contents(self, source_dir, dest_dir): 442 raise NotImplementedError() 443 444 445 class GoogleStorageDataStore(DataStore): 446 def __init__(self, data_store_url): 447 self._url = data_store_url.rstrip('/') 448 449 def target_name(self): 450 return self._url 451 452 def target_type(self): 453 return 'Google Storage' 454 455 def does_storage_object_exist(self, name): 456 try: 457 output = subprocess.check_output([ 458 'gsutil', 'ls', '/'.join((self._url, name))]) 459 except subprocess.CalledProcessError: 460 return False 461 if len(output.splitlines()) != 1: 462 return False 463 return True 464 465 def delete_path(self, path): 466 subprocess.check_call(['gsutil', 'rm', '-r', '/'.join((self._url, path))]) 467 468 def download_file(self, name, local_path): 469 subprocess.check_call([ 470 'gsutil', 'cp', '/'.join((self._url, name)), local_path]) 471 472 def upload_dir_contents(self, source_dir, dest_dir): 473 subprocess.check_call([ 474 'gsutil', 'cp', '-r', source_dir, '/'.join((self._url, dest_dir))]) 475 476 477 class LocalFileSystemDataStore(DataStore): 478 def __init__(self, data_store_location): 479 self._base_dir = data_store_location 480 def target_name(self): 481 return self._base_dir 482 def target_type(self): 483 return self._base_dir 484 def does_storage_object_exist(self, name): 485 return os.path.isfile(os.path.join(self._base_dir, name)) 486 def delete_path(self, path): 487 shutil.rmtree(path) 488 def download_file(self, name, local_path): 489 shutil.copyfile(os.path.join(self._base_dir, name), local_path) 490 def upload_dir_contents(self, source_dir, dest_dir): 491 def copytree(source_dir, dest_dir): 492 if not os.path.exists(dest_dir): 493 os.makedirs(dest_dir) 494 for item in os.listdir(source_dir): 495 source = os.path.join(source_dir, item) 496 dest = os.path.join(dest_dir, item) 497 if os.path.isdir(source): 498 copytree(source, dest) 499 else: 500 shutil.copy2(source, dest) 501 copytree(source_dir, os.path.join(self._base_dir, dest_dir)) 502 503 if '__main__' == __name__: 504 option_parser = optparse.OptionParser() 505 option_parser.add_option( 506 '', '--page_sets', 507 help='Specifies the page sets to use to archive. Supports globs.', 508 default='all') 509 option_parser.add_option( 510 '', '--record', action='store_true', 511 help='Specifies whether a new website archive should be created.', 512 default=False) 513 option_parser.add_option( 514 '', '--skia_tools', 515 help=('Path to compiled Skia executable tools. ' 516 'render_pictures/render_pdfs is run on the set ' 517 'after all SKPs are captured. If the script is run without ' 518 '--non-interactive then the debugger is also run at the end. Debug ' 519 'builds are recommended because they seem to catch more failures ' 520 'than Release builds.'), 521 default=None) 522 option_parser.add_option( 523 '', '--upload', action='store_true', 524 help=('Uploads to Google Storage or copies to local filesystem storage ' 525 ' if this is True.'), 526 default=False) 527 option_parser.add_option( 528 '', '--upload_to_partner_bucket', action='store_true', 529 help=('Uploads SKPs to the chrome-partner-telemetry Google Storage ' 530 'bucket if true.'), 531 default=False) 532 option_parser.add_option( 533 '', '--data_store', 534 help=('The location of the file storage to use to download and upload ' 535 'files. Can be \'gs://<bucket>\' for Google Storage, or ' 536 'a directory for local filesystem storage'), 537 default='gs://skia-skps') 538 option_parser.add_option( 539 '', '--alternate_upload_dir', 540 help= ('Uploads to a different directory in Google Storage or local ' 541 'storage if this flag is specified'), 542 default=None) 543 option_parser.add_option( 544 '', '--output_dir', 545 help=('Temporary directory where SKPs and webpage archives will be ' 546 'outputted to.'), 547 default=tempfile.gettempdir()) 548 option_parser.add_option( 549 '', '--browser_executable', 550 help='The exact browser executable to run.', 551 default=None) 552 option_parser.add_option( 553 '', '--browser_extra_args', 554 help='Additional arguments to pass to the browser.', 555 default=None) 556 option_parser.add_option( 557 '', '--chrome_src_path', 558 help='Path to the chromium src directory.', 559 default=None) 560 option_parser.add_option( 561 '', '--non-interactive', action='store_true', 562 help='Runs the script without any prompts. If this flag is specified and ' 563 '--skia_tools is specified then the debugger is not run.', 564 default=False) 565 option_parser.add_option( 566 '', '--skp_prefix', 567 help='Prefix to add to the names of generated SKPs.', 568 default=None) 569 options, unused_args = option_parser.parse_args() 570 571 playback = SkPicturePlayback(options) 572 sys.exit(playback.Run()) 573