Home | History | Annotate | Download | only in rebaseline_server
      1 #!/usr/bin/python
      2 
      3 """
      4 Copyright 2014 Google Inc.
      5 
      6 Use of this source code is governed by a BSD-style license that can be
      7 found in the LICENSE file.
      8 
      9 Compare results of two render_pictures runs.
     10 
     11 TODO(epoger): Start using this module to compare ALL images (whether they
     12 were generated from GMs or SKPs), and rename it accordingly.
     13 """
     14 
     15 # System-level imports
     16 import logging
     17 import os
     18 import shutil
     19 import subprocess
     20 import tempfile
     21 import time
     22 
     23 # Must fix up PYTHONPATH before importing from within Skia
     24 import rs_fixpypath  # pylint: disable=W0611
     25 
     26 # Imports from within Skia
     27 from py.utils import git_utils
     28 from py.utils import gs_utils
     29 from py.utils import url_utils
     30 import buildbot_globals
     31 import column
     32 import gm_json
     33 import imagediffdb
     34 import imagepair
     35 import imagepairset
     36 import results
     37 
     38 # URL under which all render_pictures images can be found in Google Storage.
     39 #
     40 # TODO(epoger): In order to allow live-view of GMs and other images, read this
     41 # from the input summary files, or allow the caller to set it within the
     42 # GET_live_results call.
     43 DEFAULT_IMAGE_BASE_GS_URL = 'gs://' + buildbot_globals.Get('skp_images_bucket')
     44 
     45 # Column descriptors, and display preferences for them.
     46 COLUMN__RESULT_TYPE = results.KEY__EXTRACOLUMNS__RESULT_TYPE
     47 COLUMN__SOURCE_SKP = 'sourceSkpFile'
     48 COLUMN__TILED_OR_WHOLE = 'tiledOrWhole'
     49 COLUMN__TILENUM = 'tilenum'
     50 COLUMN__BUILDER_A = 'builderA'
     51 COLUMN__RENDER_MODE_A = 'renderModeA'
     52 COLUMN__BUILDER_B = 'builderB'
     53 COLUMN__RENDER_MODE_B = 'renderModeB'
     54 # Known values for some of those columns.
     55 COLUMN__TILED_OR_WHOLE__TILED = 'tiled'
     56 COLUMN__TILED_OR_WHOLE__WHOLE = 'whole'
     57 
     58 FREEFORM_COLUMN_IDS = [
     59     COLUMN__SOURCE_SKP,
     60     COLUMN__TILENUM,
     61 ]
     62 ORDERED_COLUMN_IDS = [
     63     COLUMN__RESULT_TYPE,
     64     COLUMN__SOURCE_SKP,
     65     COLUMN__TILED_OR_WHOLE,
     66     COLUMN__TILENUM,
     67     COLUMN__BUILDER_A,
     68     COLUMN__RENDER_MODE_A,
     69     COLUMN__BUILDER_B,
     70     COLUMN__RENDER_MODE_B,
     71 ]
     72 
     73 # A special "repo:" URL type that we use to refer to Skia repo contents.
     74 # (Useful for comparing against expectations files we store in our repo.)
     75 REPO_URL_PREFIX = 'repo:'
     76 REPO_BASEPATH = os.path.abspath(os.path.join(
     77     os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
     78 
     79 # Which sections within a JSON summary file can contain results.
     80 ALLOWED_SECTION_NAMES = [
     81     gm_json.JSONKEY_ACTUALRESULTS,
     82     gm_json.JSONKEY_EXPECTEDRESULTS,
     83 ]
     84 
     85 
     86 class RenderedPicturesComparisons(results.BaseComparisons):
     87   """Loads results from multiple render_pictures runs into an ImagePairSet.
     88   """
     89 
     90   def __init__(self,
     91                setA_dir, setB_dir,
     92                setA_section, setB_section,
     93                image_diff_db,
     94                image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL, diff_base_url=None,
     95                setA_label=None, setB_label=None,
     96                gs=None, truncate_results=False, prefetch_only=False,
     97                download_all_images=False):
     98     """Constructor: downloads images and generates diffs.
     99 
    100     Once the object has been created (which may take a while), you can call its
    101     get_packaged_results_of_type() method to quickly retrieve the results...
    102     unless you have set prefetch_only to True, in which case we will
    103     asynchronously warm up the ImageDiffDB cache but not fill in self._results.
    104 
    105     Args:
    106       setA_dir: root directory to copy all JSON summaries from, and to use as
    107           setA within the comparisons. This directory may be specified as a
    108           gs:// URL, special "repo:" URL, or local filepath.
    109       setB_dir: root directory to copy all JSON summaries from, and to use as
    110           setB within the comparisons. This directory may be specified as a
    111           gs:// URL, special "repo:" URL, or local filepath.
    112       setA_section: which section within setA to examine; must be one of
    113           ALLOWED_SECTION_NAMES
    114       setB_section: which section within setB to examine; must be one of
    115           ALLOWED_SECTION_NAMES
    116       image_diff_db: ImageDiffDB instance
    117       image_base_gs_url: "gs://" URL pointing at the Google Storage bucket/dir
    118           under which all render_pictures result images can
    119           be found; this will be used to read images for comparison within
    120           this code, and included in the ImagePairSet (as an HTTP URL) so its
    121           consumers know where to download the images from
    122       diff_base_url: base URL within which the client should look for diff
    123           images; if not specified, defaults to a "file:///" URL representation
    124           of image_diff_db's storage_root
    125       setA_label: description to use for results in setA; if None, will be
    126           set to a reasonable default
    127       setB_label: description to use for results in setB; if None, will be
    128           set to a reasonable default
    129       gs: instance of GSUtils object we can use to download summary files
    130       truncate_results: FOR MANUAL TESTING: if True, truncate the set of images
    131           we process, to speed up testing.
    132       prefetch_only: if True, return the new object as quickly as possible
    133           with empty self._results (just queue up all the files to process,
    134           don't wait around for them to be processed and recorded); otherwise,
    135           block until the results have been assembled and recorded in
    136           self._results.
    137       download_all_images: if True, download all images, even if we don't
    138           need them to generate diffs.  This will take much longer to complete,
    139           but is useful for warming up the bitmap cache on local disk.
    140     """
    141     super(RenderedPicturesComparisons, self).__init__()
    142     self._image_diff_db = image_diff_db
    143     self._image_base_gs_url = image_base_gs_url
    144     self._diff_base_url = (
    145         diff_base_url or
    146         url_utils.create_filepath_url(image_diff_db.storage_root))
    147     self._gs = gs
    148     self.truncate_results = truncate_results
    149     self._prefetch_only = prefetch_only
    150     self._download_all_images = download_all_images
    151 
    152     # If we are comparing two different section types, we can use those
    153     # as the default labels for setA and setB.
    154     if setA_section != setB_section:
    155       self._setA_label = setA_label or setA_section
    156       self._setB_label = setB_label or setB_section
    157     else:
    158       self._setA_label = setA_label or 'setA'
    159       self._setB_label = setB_label or 'setB'
    160 
    161     tempdir = tempfile.mkdtemp()
    162     try:
    163       setA_root = os.path.join(tempdir, 'setA')
    164       setB_root = os.path.join(tempdir, 'setB')
    165       # TODO(stephana): There is a potential race condition here... we copy
    166       # the contents out of the source_dir, and THEN we get the commithash
    167       # of source_dir.  If source_dir points at a git checkout, and that
    168       # checkout is updated (by a different thread/process) during this
    169       # operation, then the contents and commithash will be out of sync.
    170       self._copy_dir_contents(source_dir=setA_dir, dest_dir=setA_root)
    171       setA_repo_revision = self._get_repo_revision(source_dir=setA_dir)
    172       self._copy_dir_contents(source_dir=setB_dir, dest_dir=setB_root)
    173       setB_repo_revision = self._get_repo_revision(source_dir=setB_dir)
    174 
    175       self._setA_descriptions = {
    176           results.KEY__SET_DESCRIPTIONS__DIR: setA_dir,
    177           results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setA_repo_revision,
    178           results.KEY__SET_DESCRIPTIONS__SECTION: setA_section,
    179       }
    180       self._setB_descriptions = {
    181           results.KEY__SET_DESCRIPTIONS__DIR: setB_dir,
    182           results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setB_repo_revision,
    183           results.KEY__SET_DESCRIPTIONS__SECTION: setB_section,
    184       }
    185 
    186       time_start = int(time.time())
    187       self._results = self._load_result_pairs(
    188           setA_root=setA_root, setB_root=setB_root,
    189           setA_section=setA_section, setB_section=setB_section)
    190       if self._results:
    191         self._timestamp = int(time.time())
    192         logging.info('Number of download file collisions: %s' %
    193                      imagediffdb.global_file_collisions)
    194         logging.info('Results complete; took %d seconds.' %
    195                      (self._timestamp - time_start))
    196     finally:
    197       shutil.rmtree(tempdir)
    198 
    199   def _load_result_pairs(self, setA_root, setB_root,
    200                          setA_section, setB_section):
    201     """Loads all JSON image summaries from 2 directory trees and compares them.
    202 
    203     TODO(stephana): This method is only called from within __init__(); it might
    204     make more sense to just roll the content of this method into __init__().
    205 
    206     Args:
    207       setA_root: root directory containing JSON summaries of rendering results
    208       setB_root: root directory containing JSON summaries of rendering results
    209       setA_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
    210           gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setA
    211       setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
    212           gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB
    213 
    214     Returns the summary of all image diff results (or None, depending on
    215     self._prefetch_only).
    216     """
    217     logging.info('Reading JSON image summaries from dirs %s and %s...' % (
    218         setA_root, setB_root))
    219     setA_dicts = self.read_dicts_from_root(setA_root)
    220     setB_dicts = self.read_dicts_from_root(setB_root)
    221     logging.info('Comparing summary dicts...')
    222 
    223     all_image_pairs = imagepairset.ImagePairSet(
    224         descriptions=(self._setA_label, self._setB_label),
    225         diff_base_url=self._diff_base_url)
    226     failing_image_pairs = imagepairset.ImagePairSet(
    227         descriptions=(self._setA_label, self._setB_label),
    228         diff_base_url=self._diff_base_url)
    229 
    230     # Override settings for columns that should be filtered using freeform text.
    231     for column_id in FREEFORM_COLUMN_IDS:
    232       factory = column.ColumnHeaderFactory(
    233           header_text=column_id, use_freeform_filter=True)
    234       all_image_pairs.set_column_header_factory(
    235           column_id=column_id, column_header_factory=factory)
    236       failing_image_pairs.set_column_header_factory(
    237           column_id=column_id, column_header_factory=factory)
    238 
    239     all_image_pairs.ensure_extra_column_values_in_summary(
    240         column_id=COLUMN__RESULT_TYPE, values=[
    241             results.KEY__RESULT_TYPE__FAILED,
    242             results.KEY__RESULT_TYPE__NOCOMPARISON,
    243             results.KEY__RESULT_TYPE__SUCCEEDED,
    244         ])
    245     failing_image_pairs.ensure_extra_column_values_in_summary(
    246         column_id=COLUMN__RESULT_TYPE, values=[
    247             results.KEY__RESULT_TYPE__FAILED,
    248             results.KEY__RESULT_TYPE__NOCOMPARISON,
    249         ])
    250 
    251     logging.info('Starting to add imagepairs to queue.')
    252     self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
    253 
    254     union_dict_paths = sorted(set(setA_dicts.keys() + setB_dicts.keys()))
    255     num_union_dict_paths = len(union_dict_paths)
    256     dict_num = 0
    257     for dict_path in union_dict_paths:
    258       dict_num += 1
    259       logging.info(
    260           'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' %
    261           (dict_num, num_union_dict_paths, dict_path))
    262 
    263       dictA = self.get_default(setA_dicts, None, dict_path)
    264       self._validate_dict_version(dictA)
    265       dictA_results = self.get_default(dictA, {}, setA_section)
    266 
    267       dictB = self.get_default(setB_dicts, None, dict_path)
    268       self._validate_dict_version(dictB)
    269       dictB_results = self.get_default(dictB, {}, setB_section)
    270 
    271       image_A_base_url = self.get_default(
    272           setA_dicts, self._image_base_gs_url, dict_path,
    273           gm_json.JSONKEY_IMAGE_BASE_GS_URL)
    274       image_B_base_url = self.get_default(
    275           setB_dicts, self._image_base_gs_url, dict_path,
    276           gm_json.JSONKEY_IMAGE_BASE_GS_URL)
    277 
    278       # get the builders and render modes for each set
    279       builder_A     = self.get_default(dictA, None,
    280                         gm_json.JSONKEY_DESCRIPTIONS,
    281                         gm_json.JSONKEY_DESCRIPTIONS_BUILDER)
    282       render_mode_A = self.get_default(dictA, None,
    283                         gm_json.JSONKEY_DESCRIPTIONS,
    284                         gm_json.JSONKEY_DESCRIPTIONS_RENDER_MODE)
    285       builder_B     = self.get_default(dictB, None,
    286                         gm_json.JSONKEY_DESCRIPTIONS,
    287                         gm_json.JSONKEY_DESCRIPTIONS_BUILDER)
    288       render_mode_B = self.get_default(dictB, None,
    289                         gm_json.JSONKEY_DESCRIPTIONS,
    290                         gm_json.JSONKEY_DESCRIPTIONS_RENDER_MODE)
    291 
    292       skp_names = sorted(set(dictA_results.keys() + dictB_results.keys()))
    293       # Just for manual testing... truncate to an arbitrary subset.
    294       if self.truncate_results:
    295         skp_names = skp_names[1:3]
    296       for skp_name in skp_names:
    297         imagepairs_for_this_skp = []
    298 
    299         whole_image_A = self.get_default(
    300             dictA_results, None,
    301             skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
    302         whole_image_B = self.get_default(
    303             dictB_results, None,
    304             skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
    305 
    306         imagepairs_for_this_skp.append(self._create_image_pair(
    307             image_dict_A=whole_image_A, image_dict_B=whole_image_B,
    308             image_A_base_url=image_A_base_url,
    309             image_B_base_url=image_B_base_url,
    310             builder_A=builder_A, render_mode_A=render_mode_A,
    311             builder_B=builder_B, render_mode_B=render_mode_B,
    312             source_json_file=dict_path,
    313             source_skp_name=skp_name, tilenum=None))
    314 
    315         tiled_images_A = self.get_default(
    316             dictA_results, [],
    317             skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
    318         tiled_images_B = self.get_default(
    319             dictB_results, [],
    320             skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
    321         if tiled_images_A or tiled_images_B:
    322           num_tiles_A = len(tiled_images_A)
    323           num_tiles_B = len(tiled_images_B)
    324           num_tiles = max(num_tiles_A, num_tiles_B)
    325           for tile_num in range(num_tiles):
    326             imagepairs_for_this_skp.append(self._create_image_pair(
    327                 image_dict_A=(tiled_images_A[tile_num]
    328                               if tile_num < num_tiles_A else None),
    329                 image_dict_B=(tiled_images_B[tile_num]
    330                               if tile_num < num_tiles_B else None),
    331                 image_A_base_url=image_A_base_url,
    332                 image_B_base_url=image_B_base_url,
    333                 builder_A=builder_A, render_mode_A=render_mode_A,
    334                 builder_B=builder_B, render_mode_B=render_mode_B,
    335                 source_json_file=dict_path,
    336                 source_skp_name=skp_name, tilenum=tile_num))
    337 
    338         for one_imagepair in imagepairs_for_this_skp:
    339           if one_imagepair:
    340             all_image_pairs.add_image_pair(one_imagepair)
    341             result_type = one_imagepair.extra_columns_dict\
    342                 [COLUMN__RESULT_TYPE]
    343             if result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
    344               failing_image_pairs.add_image_pair(one_imagepair)
    345 
    346     logging.info('Finished adding imagepairs to queue.')
    347     self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
    348 
    349     if self._prefetch_only:
    350       return None
    351     else:
    352       return {
    353           results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
    354               column_ids_in_order=ORDERED_COLUMN_IDS),
    355           results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
    356               column_ids_in_order=ORDERED_COLUMN_IDS),
    357       }
    358 
    359   def _validate_dict_version(self, result_dict):
    360     """Raises Exception if the dict is not the type/version we know how to read.
    361 
    362     Args:
    363       result_dict: dictionary holding output of render_pictures; if None,
    364           this method will return without raising an Exception
    365     """
    366     # TODO(stephana): These values should be defined as constants somewhere,
    367     # to be kept in sync between this file and writable_expectations.py
    368     expected_header_type = 'ChecksummedImages'
    369     expected_header_revision = 1
    370 
    371     if result_dict == None:
    372       return
    373     header = result_dict[gm_json.JSONKEY_HEADER]
    374     header_type = header[gm_json.JSONKEY_HEADER_TYPE]
    375     if header_type != expected_header_type:
    376       raise Exception('expected header_type "%s", but got "%s"' % (
    377           expected_header_type, header_type))
    378     header_revision = header[gm_json.JSONKEY_HEADER_REVISION]
    379     if header_revision != expected_header_revision:
    380       raise Exception('expected header_revision %d, but got %d' % (
    381           expected_header_revision, header_revision))
    382 
    383   def _create_image_pair(self, image_dict_A, image_dict_B,
    384                          image_A_base_url, image_B_base_url,
    385                          builder_A, render_mode_A,
    386                          builder_B, render_mode_B,
    387                          source_json_file,
    388                          source_skp_name, tilenum):
    389     """Creates an ImagePair object for this pair of images.
    390 
    391     Args:
    392       image_dict_A: dict with JSONKEY_IMAGE_* keys, or None if no image
    393       image_dict_B: dict with JSONKEY_IMAGE_* keys, or None if no image
    394       image_A_base_url: base URL for image A
    395       image_B_base_url: base URL for image B
    396       builder_A: builder that created image set A or None if unknow
    397       render_mode_A: render mode used to generate image set A or None if
    398                      unknown.
    399       builder_B: builder that created image set A or None if unknow
    400       render_mode_B: render mode used to generate image set A or None if
    401                      unknown.
    402       source_json_file: string; relative path of the JSON file where this
    403                         result came from, within setA and setB.
    404       source_skp_name: string; name of the source SKP file
    405       tilenum: which tile, or None if a wholeimage
    406 
    407     Returns:
    408       An ImagePair object, or None if both image_dict_A and image_dict_B are
    409       None.
    410     """
    411     if (not image_dict_A) and (not image_dict_B):
    412       return None
    413 
    414     def _checksum_and_relative_url(dic):
    415       if dic:
    416         return ((dic[gm_json.JSONKEY_IMAGE_CHECKSUMALGORITHM],
    417                  int(dic[gm_json.JSONKEY_IMAGE_CHECKSUMVALUE])),
    418                 dic[gm_json.JSONKEY_IMAGE_FILEPATH])
    419       else:
    420         return None, None
    421 
    422     imageA_checksum, imageA_relative_url = _checksum_and_relative_url(
    423         image_dict_A)
    424     imageB_checksum, imageB_relative_url = _checksum_and_relative_url(
    425         image_dict_B)
    426 
    427     if not imageA_checksum:
    428       result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
    429     elif not imageB_checksum:
    430       result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
    431     elif imageA_checksum == imageB_checksum:
    432       result_type = results.KEY__RESULT_TYPE__SUCCEEDED
    433     else:
    434       result_type = results.KEY__RESULT_TYPE__FAILED
    435 
    436     extra_columns_dict = {
    437         COLUMN__RESULT_TYPE: result_type,
    438         COLUMN__SOURCE_SKP: source_skp_name,
    439         COLUMN__BUILDER_A: builder_A,
    440         COLUMN__RENDER_MODE_A: render_mode_A,
    441         COLUMN__BUILDER_B: builder_B,
    442         COLUMN__RENDER_MODE_B: render_mode_B,
    443     }
    444     if tilenum == None:
    445       extra_columns_dict[COLUMN__TILED_OR_WHOLE] = COLUMN__TILED_OR_WHOLE__WHOLE
    446       extra_columns_dict[COLUMN__TILENUM] = 'N/A'
    447     else:
    448       extra_columns_dict[COLUMN__TILED_OR_WHOLE] = COLUMN__TILED_OR_WHOLE__TILED
    449       extra_columns_dict[COLUMN__TILENUM] = str(tilenum)
    450 
    451     try:
    452       return imagepair.ImagePair(
    453           image_diff_db=self._image_diff_db,
    454           imageA_base_url=image_A_base_url,
    455           imageB_base_url=image_B_base_url,
    456           imageA_relative_url=imageA_relative_url,
    457           imageB_relative_url=imageB_relative_url,
    458           extra_columns=extra_columns_dict,
    459           source_json_file=source_json_file,
    460           download_all_images=self._download_all_images)
    461     except (KeyError, TypeError):
    462       logging.exception(
    463           'got exception while creating ImagePair for'
    464           ' urlPair=("%s","%s"), source_skp_name="%s", tilenum="%s"' % (
    465               imageA_relative_url, imageB_relative_url, source_skp_name,
    466               tilenum))
    467       return None
    468 
    469   def _copy_dir_contents(self, source_dir, dest_dir):
    470     """Copy all contents of source_dir into dest_dir, recursing into subdirs.
    471 
    472     Args:
    473       source_dir: path to source dir (GS URL, local filepath, or a special
    474           "repo:" URL type that points at a file within our Skia checkout)
    475       dest_dir: path to destination dir (local filepath)
    476 
    477     The copy operates as a "merge with overwrite": any files in source_dir will
    478     be "overlaid" on top of the existing content in dest_dir.  Existing files
    479     with the same names will be overwritten.
    480     """
    481     if gs_utils.GSUtils.is_gs_url(source_dir):
    482       (bucket, path) = gs_utils.GSUtils.split_gs_url(source_dir)
    483       self._gs.download_dir_contents(source_bucket=bucket, source_dir=path,
    484                                      dest_dir=dest_dir)
    485     elif source_dir.lower().startswith(REPO_URL_PREFIX):
    486       repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
    487       shutil.copytree(repo_dir, dest_dir)
    488     else:
    489       shutil.copytree(source_dir, dest_dir)
    490 
    491   def _get_repo_revision(self, source_dir):
    492     """Get the commit hash of source_dir, IF it refers to a git checkout.
    493 
    494     Args:
    495       source_dir: path to source dir (GS URL, local filepath, or a special
    496           "repo:" URL type that points at a file within our Skia checkout;
    497           only the "repo:" URL type will have a commit hash.
    498     """
    499     if source_dir.lower().startswith(REPO_URL_PREFIX):
    500       repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
    501       return subprocess.check_output(
    502           args=[git_utils.GIT, 'rev-parse', 'HEAD'], cwd=repo_dir).strip()
    503     else:
    504       return None
    505