Home | History | Annotate | Download | only in rebaseline_server
      1 #!/usr/bin/python
      2 
      3 """
      4 Copyright 2013 Google Inc.
      5 
      6 Use of this source code is governed by a BSD-style license that can be
      7 found in the LICENSE file.
      8 
      9 Repackage expected/actual GM results as needed by our HTML rebaseline viewer.
     10 """
     11 
     12 # System-level imports
     13 import fnmatch
     14 import json
     15 import logging
     16 import os
     17 import re
     18 import sys
     19 import time
     20 
     21 # Imports from within Skia
     22 #
     23 # We need to add the 'gm' directory, so that we can import gm_json.py within
     24 # that directory.  That script allows us to parse the actual-results.json file
     25 # written out by the GM tool.
     26 # Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
     27 # so any dirs that are already in the PYTHONPATH will be preferred.
     28 GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
     29 if GM_DIRECTORY not in sys.path:
     30   sys.path.append(GM_DIRECTORY)
     31 import gm_json
     32 import imagediffdb
     33 
     34 IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
     35 IMAGE_FILENAME_FORMATTER = '%s_%s.png'  # pass in (testname, config)
     36 
     37 FIELDS_PASSED_THRU_VERBATIM = [
     38     gm_json.JSONKEY_EXPECTEDRESULTS_BUGS,
     39     gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE,
     40     gm_json.JSONKEY_EXPECTEDRESULTS_REVIEWED,
     41 ]
     42 CATEGORIES_TO_SUMMARIZE = [
     43     'builder', 'test', 'config', 'resultType',
     44     gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE,
     45     gm_json.JSONKEY_EXPECTEDRESULTS_REVIEWED,
     46 ]
     47 
     48 RESULTS_ALL = 'all'
     49 RESULTS_FAILURES = 'failures'
     50 
     51 class Results(object):
     52   """ Loads actual and expected results from all builders, supplying combined
     53   reports as requested.
     54 
     55   Once this object has been constructed, the results (in self._results[])
     56   are immutable.  If you want to update the results based on updated JSON
     57   file contents, you will need to create a new Results object."""
     58 
     59   def __init__(self, actuals_root, expected_root, generated_images_root):
     60     """
     61     Args:
     62       actuals_root: root directory containing all actual-results.json files
     63       expected_root: root directory containing all expected-results.json files
     64       generated_images_root: directory within which to create all pixel diffs;
     65           if this directory does not yet exist, it will be created
     66     """
     67     self._image_diff_db = imagediffdb.ImageDiffDB(generated_images_root)
     68     self._actuals_root = actuals_root
     69     self._expected_root = expected_root
     70     self._load_actual_and_expected()
     71     self._timestamp = int(time.time())
     72 
     73   def get_timestamp(self):
     74     """Return the time at which this object was created, in seconds past epoch
     75     (UTC).
     76     """
     77     return self._timestamp
     78 
     79   def edit_expectations(self, modifications):
     80     """Edit the expectations stored within this object and write them back
     81     to disk.
     82 
     83     Note that this will NOT update the results stored in self._results[] ;
     84     in order to see those updates, you must instantiate a new Results object
     85     based on the (now updated) files on disk.
     86 
     87     Args:
     88       modifications: a list of dictionaries, one for each expectation to update:
     89 
     90          [
     91            {
     92              'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
     93              'test': 'bigmatrix',
     94              'config': '8888',
     95              'expectedHashType': 'bitmap-64bitMD5',
     96              'expectedHashDigest': '10894408024079689926',
     97              'bugs': [123, 456],
     98              'ignore-failure': false,
     99              'reviewed-by-human': true,
    100            },
    101            ...
    102          ]
    103 
    104     """
    105     expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)
    106     for mod in modifications:
    107       image_name = IMAGE_FILENAME_FORMATTER % (mod['test'], mod['config'])
    108       # TODO(epoger): assumes a single allowed digest per test
    109       allowed_digests = [[mod['expectedHashType'],
    110                           int(mod['expectedHashDigest'])]]
    111       new_expectations = {
    112           gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,
    113       }
    114       for field in FIELDS_PASSED_THRU_VERBATIM:
    115         value = mod.get(field)
    116         if value is not None:
    117           new_expectations[field] = value
    118       builder_dict = expected_builder_dicts[mod['builder']]
    119       builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)
    120       if not builder_expectations:
    121         builder_expectations = {}
    122         builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations
    123       builder_expectations[image_name] = new_expectations
    124     Results._write_dicts_to_root(expected_builder_dicts, self._expected_root)
    125 
    126   def get_results_of_type(self, type):
    127     """Return results of some/all tests (depending on 'type' parameter).
    128 
    129     Args:
    130       type: string describing which types of results to include; must be one
    131             of the RESULTS_* constants
    132 
    133     Results are returned as a dictionary in this form:
    134 
    135        {
    136          'categories': # dictionary of categories listed in
    137                        # CATEGORIES_TO_SUMMARIZE, with the number of times
    138                        # each value appears within its category
    139          {
    140            'resultType': # category name
    141            {
    142              'failed': 29, # category value and total number found of that value
    143              'failure-ignored': 948,
    144              'no-comparison': 4502,
    145              'succeeded': 38609,
    146            },
    147            'builder':
    148            {
    149              'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,
    150              'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,
    151              ...
    152            },
    153            ... # other categories from CATEGORIES_TO_SUMMARIZE
    154          }, # end of 'categories' dictionary
    155 
    156          'testData': # list of test results, with a dictionary for each
    157          [
    158            {
    159              'resultType': 'failed',
    160              'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
    161              'test': 'bigmatrix',
    162              'config': '8888',
    163              'expectedHashType': 'bitmap-64bitMD5',
    164              'expectedHashDigest': '10894408024079689926',
    165              'actualHashType': 'bitmap-64bitMD5',
    166              'actualHashDigest': '2409857384569',
    167              'bugs': [123, 456],
    168              'ignore-failure': false,
    169              'reviewed-by-human': true,
    170            },
    171            ...
    172          ], # end of 'testData' list
    173        }
    174     """
    175     return self._results[type]
    176 
    177   @staticmethod
    178   def _read_dicts_from_root(root, pattern='*.json'):
    179     """Read all JSON dictionaries within a directory tree.
    180 
    181     Args:
    182       root: path to root of directory tree
    183       pattern: which files to read within root (fnmatch-style pattern)
    184 
    185     Returns:
    186       A meta-dictionary containing all the JSON dictionaries found within
    187       the directory tree, keyed by the builder name of each dictionary.
    188 
    189     Raises:
    190       IOError if root does not refer to an existing directory
    191     """
    192     if not os.path.isdir(root):
    193       raise IOError('no directory found at path %s' % root)
    194     meta_dict = {}
    195     for dirpath, dirnames, filenames in os.walk(root):
    196       for matching_filename in fnmatch.filter(filenames, pattern):
    197         builder = os.path.basename(dirpath)
    198         # If we are reading from the collection of actual results, skip over
    199         # the Trybot results (we don't maintain baselines for them).
    200         if builder.endswith('-Trybot'):
    201           continue
    202         fullpath = os.path.join(dirpath, matching_filename)
    203         meta_dict[builder] = gm_json.LoadFromFile(fullpath)
    204     return meta_dict
    205 
    206   @staticmethod
    207   def _write_dicts_to_root(meta_dict, root, pattern='*.json'):
    208     """Write all per-builder dictionaries within meta_dict to files under
    209     the root path.
    210 
    211     Security note: this will only write to files that already exist within
    212     the root path (as found by os.walk() within root), so we don't need to
    213     worry about malformed content writing to disk outside of root.
    214     However, the data written to those files is not double-checked, so it
    215     could contain poisonous data.
    216 
    217     Args:
    218       meta_dict: a builder-keyed meta-dictionary containing all the JSON
    219                  dictionaries we want to write out
    220       root: path to root of directory tree within which to write files
    221       pattern: which files to write within root (fnmatch-style pattern)
    222 
    223     Raises:
    224       IOError if root does not refer to an existing directory
    225       KeyError if the set of per-builder dictionaries written out was
    226                different than expected
    227     """
    228     if not os.path.isdir(root):
    229       raise IOError('no directory found at path %s' % root)
    230     actual_builders_written = []
    231     for dirpath, dirnames, filenames in os.walk(root):
    232       for matching_filename in fnmatch.filter(filenames, pattern):
    233         builder = os.path.basename(dirpath)
    234         # We should never encounter Trybot *expectations*, but if we are
    235         # writing into the actual-results dir, skip the Trybot actuals.
    236         # (I don't know why we would ever write into the actual-results dir,
    237         # though.)
    238         if builder.endswith('-Trybot'):
    239           continue
    240         per_builder_dict = meta_dict.get(builder)
    241         if per_builder_dict is not None:
    242           fullpath = os.path.join(dirpath, matching_filename)
    243           gm_json.WriteToFile(per_builder_dict, fullpath)
    244           actual_builders_written.append(builder)
    245 
    246     # Check: did we write out the set of per-builder dictionaries we
    247     # expected to?
    248     expected_builders_written = sorted(meta_dict.keys())
    249     actual_builders_written.sort()
    250     if expected_builders_written != actual_builders_written:
    251       raise KeyError(
    252           'expected to write dicts for builders %s, but actually wrote them '
    253           'for builders %s' % (
    254               expected_builders_written, actual_builders_written))
    255 
    256   def _generate_pixel_diffs_if_needed(self, test, expected_image, actual_image):
    257     """If expected_image and actual_image both exist but are different,
    258     add the image pair to self._image_diff_db and generate pixel diffs.
    259 
    260     Args:
    261       test: string; name of test
    262       expected_image: (hashType, hashDigest) tuple describing the expected image
    263       actual_image: (hashType, hashDigest) tuple describing the actual image
    264     """
    265     if expected_image == actual_image:
    266       return
    267 
    268     (expected_hashtype, expected_hashdigest) = expected_image
    269     (actual_hashtype, actual_hashdigest) = actual_image
    270     if None in [expected_hashtype, expected_hashdigest,
    271                 actual_hashtype, actual_hashdigest]:
    272       return
    273 
    274     expected_url = gm_json.CreateGmActualUrl(
    275         test_name=test, hash_type=expected_hashtype,
    276         hash_digest=expected_hashdigest)
    277     actual_url = gm_json.CreateGmActualUrl(
    278         test_name=test, hash_type=actual_hashtype,
    279         hash_digest=actual_hashdigest)
    280     self._image_diff_db.add_image_pair(
    281         expected_image_locator=expected_hashdigest,
    282         expected_image_url=expected_url,
    283         actual_image_locator=actual_hashdigest,
    284         actual_image_url=actual_url)
    285 
    286   def _load_actual_and_expected(self):
    287     """Loads the results of all tests, across all builders (based on the
    288     files within self._actuals_root and self._expected_root),
    289     and stores them in self._results.
    290     """
    291     actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root)
    292     expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)
    293 
    294     categories_all = {}
    295     categories_failures = {}
    296 
    297     Results._ensure_included_in_category_dict(categories_all,
    298                                               'resultType', [
    299         gm_json.JSONKEY_ACTUALRESULTS_FAILED,
    300         gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,
    301         gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
    302         gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,
    303         ])
    304     Results._ensure_included_in_category_dict(categories_failures,
    305                                               'resultType', [
    306         gm_json.JSONKEY_ACTUALRESULTS_FAILED,
    307         gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,
    308         gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
    309         ])
    310 
    311     data_all = []
    312     data_failures = []
    313     for builder in sorted(actual_builder_dicts.keys()):
    314       actual_results_for_this_builder = (
    315           actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])
    316       for result_type in sorted(actual_results_for_this_builder.keys()):
    317         results_of_this_type = actual_results_for_this_builder[result_type]
    318         if not results_of_this_type:
    319           continue
    320         for image_name in sorted(results_of_this_type.keys()):
    321           actual_image = results_of_this_type[image_name]
    322 
    323           # Default empty expectations; overwrite these if we find any real ones
    324           expectations_per_test = None
    325           expected_image = [None, None]
    326           try:
    327             expectations_per_test = (
    328                 expected_builder_dicts
    329                 [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name])
    330             # TODO(epoger): assumes a single allowed digest per test
    331             expected_image = (
    332                 expectations_per_test
    333                 [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0])
    334           except (KeyError, TypeError):
    335             # There are several cases in which we would expect to find
    336             # no expectations for a given test:
    337             #
    338             # 1. result_type == NOCOMPARISON
    339             #   There are no expectations for this test yet!
    340             #
    341             # 2. alternate rendering mode failures (e.g. serialized)
    342             #   In cases like
    343             #   https://code.google.com/p/skia/issues/detail?id=1684
    344             #   ('tileimagefilter GM test failing in serialized render mode'),
    345             #   the gm-actuals will list a failure for the alternate
    346             #   rendering mode even though we don't have explicit expectations
    347             #   for the test (the implicit expectation is that it must
    348             #   render the same in all rendering modes).
    349             #
    350             # Don't log type 1, because it is common.
    351             # Log other types, because they are rare and we should know about
    352             # them, but don't throw an exception, because we need to keep our
    353             # tools working in the meanwhile!
    354             if result_type != gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON:
    355               logging.warning('No expectations found for test: %s' % {
    356                   'builder': builder,
    357                   'image_name': image_name,
    358                   'result_type': result_type,
    359                   })
    360 
    361           # If this test was recently rebaselined, it will remain in
    362           # the 'failed' set of actuals until all the bots have
    363           # cycled (although the expectations have indeed been set
    364           # from the most recent actuals).  Treat these as successes
    365           # instead of failures.
    366           #
    367           # TODO(epoger): Do we need to do something similar in
    368           # other cases, such as when we have recently marked a test
    369           # as ignoreFailure but it still shows up in the 'failed'
    370           # category?  Maybe we should not rely on the result_type
    371           # categories recorded within the gm_actuals AT ALL, and
    372           # instead evaluate the result_type ourselves based on what
    373           # we see in expectations vs actual checksum?
    374           if expected_image == actual_image:
    375             updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED
    376           else:
    377             updated_result_type = result_type
    378 
    379           (test, config) = IMAGE_FILENAME_RE.match(image_name).groups()
    380           self._generate_pixel_diffs_if_needed(
    381               test=test, expected_image=expected_image,
    382               actual_image=actual_image)
    383           results_for_this_test = {
    384               'resultType': updated_result_type,
    385               'builder': builder,
    386               'test': test,
    387               'config': config,
    388               'actualHashType': actual_image[0],
    389               'actualHashDigest': str(actual_image[1]),
    390               'expectedHashType': expected_image[0],
    391               'expectedHashDigest': str(expected_image[1]),
    392 
    393               # FIELDS_PASSED_THRU_VERBATIM that may be overwritten below...
    394               gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE: False,
    395           }
    396           if expectations_per_test:
    397             for field in FIELDS_PASSED_THRU_VERBATIM:
    398               results_for_this_test[field] = expectations_per_test.get(field)
    399 
    400           if updated_result_type == gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON:
    401             pass # no diff record to calculate at all
    402           elif updated_result_type == gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:
    403             results_for_this_test['numDifferingPixels'] = 0
    404             results_for_this_test['percentDifferingPixels'] = 0
    405             results_for_this_test['weightedDiffMeasure'] = 0
    406             results_for_this_test['maxDiffPerChannel'] = 0
    407           else:
    408             try:
    409               diff_record = self._image_diff_db.get_diff_record(
    410                   expected_image_locator=expected_image[1],
    411                   actual_image_locator=actual_image[1])
    412               results_for_this_test['numDifferingPixels'] = (
    413                   diff_record.get_num_pixels_differing())
    414               results_for_this_test['percentDifferingPixels'] = (
    415                   diff_record.get_percent_pixels_differing())
    416               results_for_this_test['weightedDiffMeasure'] = (
    417                   diff_record.get_weighted_diff_measure())
    418               results_for_this_test['maxDiffPerChannel'] = (
    419                   diff_record.get_max_diff_per_channel())
    420             except KeyError:
    421               logging.warning('unable to find diff_record for ("%s", "%s")' %
    422                               (expected_image[1], actual_image[1]))
    423               pass
    424 
    425           Results._add_to_category_dict(categories_all, results_for_this_test)
    426           data_all.append(results_for_this_test)
    427 
    428           # TODO(epoger): In effect, we have a list of resultTypes that we
    429           # include in the different result lists (data_all and data_failures).
    430           # This same list should be used by the calls to
    431           # Results._ensure_included_in_category_dict() earlier on.
    432           if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:
    433             Results._add_to_category_dict(categories_failures,
    434                                           results_for_this_test)
    435             data_failures.append(results_for_this_test)
    436 
    437     self._results = {
    438       RESULTS_ALL:
    439         {'categories': categories_all, 'testData': data_all},
    440       RESULTS_FAILURES:
    441         {'categories': categories_failures, 'testData': data_failures},
    442     }
    443 
    444   @staticmethod
    445   def _add_to_category_dict(category_dict, test_results):
    446     """Add test_results to the category dictionary we are building.
    447     (See documentation of self.get_results_of_type() for the format of this
    448     dictionary.)
    449 
    450     Args:
    451       category_dict: category dict-of-dicts to add to; modify this in-place
    452       test_results: test data with which to update category_list, in a dict:
    453          {
    454            'category_name': 'category_value',
    455            'category_name': 'category_value',
    456            ...
    457          }
    458     """
    459     for category in CATEGORIES_TO_SUMMARIZE:
    460       category_value = test_results.get(category)
    461       if not category_dict.get(category):
    462         category_dict[category] = {}
    463       if not category_dict[category].get(category_value):
    464         category_dict[category][category_value] = 0
    465       category_dict[category][category_value] += 1
    466 
    467   @staticmethod
    468   def _ensure_included_in_category_dict(category_dict,
    469                                         category_name, category_values):
    470     """Ensure that the category name/value pairs are included in category_dict,
    471     even if there aren't any results with that name/value pair.
    472     (See documentation of self.get_results_of_type() for the format of this
    473     dictionary.)
    474 
    475     Args:
    476       category_dict: category dict-of-dicts to modify
    477       category_name: category name, as a string
    478       category_values: list of values we want to make sure are represented
    479                        for this category
    480     """
    481     if not category_dict.get(category_name):
    482       category_dict[category_name] = {}
    483     for category_value in category_values:
    484       if not category_dict[category_name].get(category_value):
    485         category_dict[category_name][category_value] = 0
    486