Home | History | Annotate | Download | only in tools
      1 # Copyright 2015 The PDFium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 
      6 import json
      7 import os
      8 import shlex
      9 import shutil
     10 import urllib2
     11 
     12 
     13 def _ParseKeyValuePairs(kv_str):
     14   """
     15   Parses a string of the type 'key1 value1 key2 value2' into a dict.
     16   """
     17   kv_pairs = shlex.split(kv_str)
     18   if len(kv_pairs) % 2:
     19     raise ValueError('Uneven number of key/value pairs. Got %s' % kv_str)
     20   return { kv_pairs[i]:kv_pairs[i + 1] for i in xrange(0, len(kv_pairs), 2) }
     21 
     22 
     23 # This module downloads a json provided by Skia Gold with the expected baselines
     24 # for each test file.
     25 #
     26 # The expected format for the json is:
     27 # {
     28 #   "commit": {
     29 #     "author": "John Doe (jdoe (at] chromium.org)",
     30 #     "commit_time": 1510598123,
     31 #     "hash": "cee39e6e90c219cc91f2c94a912a06977f4461a0"
     32 #   },
     33 #   "master": {
     34 #     "abc.pdf.1": {
     35 #       "0ec3d86f545052acd7c9a16fde8ca9d4": 1,
     36 #       "80455b71673becc9fbc100d6da56ca65": 1,
     37 #       "b68e2ecb80090b4502ec89ad1be2322c": 1
     38 #      },
     39 #     "defgh.pdf.0": {
     40 #       "01e020cd4cd05c6738e479a46a506044": 1,
     41 #       "b68e2ecb80090b4502ec89ad1be2322c": 1
     42 #     }
     43 #   },
     44 #   "changeLists": {
     45 #     "18499" : {
     46 #       "abc.pdf.1": {
     47 #         "d5dd649124cf1779152253dc8fb239c5": 1,
     48 #         "42a270581930579cdb0f28674972fb1a": 1,
     49 #       }
     50 #     }
     51 #   }
     52 # }
     53 class GoldBaseline(object):
     54 
     55   def __init__(self, properties_str):
     56     """
     57     properties_str is a string with space separated key/value pairs that
     58                is used to find the cl number for which to baseline
     59     """
     60     self._properties = _ParseKeyValuePairs(properties_str)
     61     self._baselines = self._LoadSkiaGoldBaselines()
     62 
     63   def _LoadSkiaGoldBaselines(self):
     64     """
     65     Download the baseline json and return a list of the two baselines that
     66     should be used to match hashes (master and cl#).
     67     """
     68     GOLD_BASELINE_URL = ('https://storage.googleapis.com/skia-infra-gm/'
     69                          'hash_files/gold-pdfium-baseline.json')
     70     try:
     71       response = urllib2.urlopen(GOLD_BASELINE_URL)
     72       json_data = response.read()
     73     except (urllib2.HTTPError, urllib2.URLError) as e:
     74       print ('Error: Unable to read skia gold json from %s: %s'
     75              % (GOLD_BASELINE_URL, e))
     76       return None
     77 
     78     try:
     79       data = json.loads(json_data)
     80     except ValueError:
     81       print 'Error: Malformed json read from %s: %s' % (GOLD_BASELINE_URL, e)
     82       return None
     83 
     84     try:
     85       master_baseline = data['master']
     86     except (KeyError, TypeError):
     87       print ('Error: "master" key not in json read from %s: %s'
     88              % (GOLD_BASELINE_URL, e))
     89       return None
     90 
     91     cl_number_str = self._properties.get('issue')
     92     if cl_number_str is None:
     93       return [master_baseline]
     94 
     95     try:
     96       cl_baseline = data['changeLists'][cl_number_str]
     97     except KeyError:
     98       return [master_baseline]
     99 
    100     return [cl_baseline, master_baseline]
    101 
    102   # Return values for MatchLocalResult().
    103   MATCH = 'match'
    104   MISMATCH = 'mismatch'
    105   NO_BASELINE = 'no_baseline'
    106   BASELINE_DOWNLOAD_FAILED = 'baseline_download_failed'
    107 
    108   def MatchLocalResult(self, test_name, md5_hash):
    109     """
    110     Match a locally generated hash of a test cases rendered image with the
    111     expected hashes downloaded in the baselines json.
    112 
    113     Each baseline is a dict mapping the test case name to a dict with the
    114     expected hashes as keys. Therefore, this list of baselines should be
    115     searched until the test case name is found, then the hash should be matched
    116     with the options in that dict. If the hashes don't match, it should be
    117     considered a failure and we should not continue searching the baseline list.
    118 
    119     Returns MATCH if the md5 provided matches the ones in the baseline json,
    120     MISMATCH if it does not, NO_BASELINE if the test case has no baseline, or
    121     BASELINE_DOWNLOAD_FAILED if the baseline could not be downloaded and parsed.
    122     """
    123     if self._baselines is None:
    124       return GoldBaseline.BASELINE_DOWNLOAD_FAILED
    125 
    126     found_test_case = False
    127     for baseline in self._baselines:
    128       if test_name in baseline:
    129         found_test_case = True
    130         if md5_hash in baseline[test_name]:
    131           return GoldBaseline.MATCH
    132 
    133     return (GoldBaseline.MISMATCH if found_test_case
    134             else GoldBaseline.NO_BASELINE)
    135 
    136 
    137 # This module collects and writes output in a format expected by the
    138 # Gold baseline tool. Based on meta data provided explicitly and by
    139 # adding a series of test results it can be used to produce
    140 # a JSON file that is uploaded to Google Storage and ingested by Gold.
    141 #
    142 # The output will look similar this:
    143 #
    144 # {
    145 #    "build_number" : "2",
    146 #    "gitHash" : "a4a338179013b029d6dd55e737b5bd648a9fb68c",
    147 #    "key" : {
    148 #       "arch" : "arm64",
    149 #       "compiler" : "Clang",
    150 #    },
    151 #    "results" : [
    152 #       {
    153 #          "key" : {
    154 #             "config" : "vk",
    155 #             "name" : "yuv_nv12_to_rgb_effect",
    156 #             "source_type" : "gm"
    157 #          },
    158 #          "md5" : "7db34da246868d50ab9ddd776ce6d779",
    159 #          "options" : {
    160 #             "ext" : "png",
    161 #             "gamma_correct" : "no"
    162 #          }
    163 #       },
    164 #       {
    165 #          "key" : {
    166 #             "config" : "vk",
    167 #             "name" : "yuv_to_rgb_effect",
    168 #             "source_type" : "gm"
    169 #          },
    170 #          "md5" : "0b955f387740c66eb23bf0e253c80d64",
    171 #          "options" : {
    172 #             "ext" : "png",
    173 #             "gamma_correct" : "no"
    174 #          }
    175 #       }
    176 #    ],
    177 # }
    178 #
    179 class GoldResults(object):
    180   def __init__(self, source_type, outputDir, propertiesStr, keyStr,
    181                ignore_hashes_file):
    182     """
    183     source_type is the source_type (=corpus) field used for all results.
    184     output_dir is the directory where the resulting images are copied and
    185                the dm.json file is written. If the directory exists it will
    186                be removed and recreated.
    187     propertiesStr is a string with space separated key/value pairs that
    188                is used to set the top level fields in the output JSON file.
    189     keyStr is a string with space separated key/value pairs that
    190                is used to set the 'key' field in the output JSON file.
    191     ignore_hashes_file is a file that contains a list of image hashes
    192                that should be ignored.
    193     """
    194     self._source_type = source_type
    195     self._properties = _ParseKeyValuePairs(propertiesStr)
    196     self._properties["key"] = _ParseKeyValuePairs(keyStr)
    197     self._results =  []
    198     self._outputDir = outputDir
    199 
    200     # make sure the output directory exists and is empty.
    201     if os.path.exists(outputDir):
    202       shutil.rmtree(outputDir, ignore_errors=True)
    203     os.makedirs(outputDir)
    204 
    205     self._ignore_hashes = set()
    206     if ignore_hashes_file:
    207       with open(ignore_hashes_file, 'r') as ig_file:
    208         hashes=[x.strip() for x in ig_file.readlines() if x.strip()]
    209         self._ignore_hashes = set(hashes)
    210 
    211   def AddTestResult(self, testName, md5Hash, outputImagePath):
    212     # If the hash is in the list of hashes to ignore then we don'try
    213     # make a copy, but add it to the result.
    214     imgExt = os.path.splitext(outputImagePath)[1].lstrip(".")
    215     if md5Hash not in self._ignore_hashes:
    216       # Copy the image to <output_dir>/<md5Hash>.<image_extension>
    217       if not imgExt:
    218         raise ValueError("File %s does not have an extension" % outputImagePath)
    219       newFilePath = os.path.join(self._outputDir, md5Hash + '.' + imgExt)
    220       shutil.copy2(outputImagePath, newFilePath)
    221 
    222     # Add an entry to the list of test results
    223     self._results.append({
    224       "key": {
    225         "name": testName,
    226         "source_type": self._source_type,
    227       },
    228       "md5": md5Hash,
    229       "options": {
    230         "ext": imgExt,
    231         "gamma_correct": "no"
    232       }
    233     })
    234 
    235   def WriteResults(self):
    236     self._properties.update({
    237       "results": self._results
    238     })
    239 
    240     outputFileName = os.path.join(self._outputDir, "dm.json")
    241     with open(outputFileName, 'wb') as outfile:
    242       json.dump(self._properties, outfile, indent=1)
    243       outfile.write("\n")
    244 
    245 # Produce example output for manual testing.
    246 if __name__ == "__main__":
    247   # Create a test directory with three empty 'image' files.
    248   testDir = "./testdirectory"
    249   if not os.path.exists(testDir):
    250     os.makedirs(testDir)
    251   open(os.path.join(testDir, "image1.png"), 'wb').close()
    252   open(os.path.join(testDir, "image2.png"), 'wb').close()
    253   open(os.path.join(testDir, "image3.png"), 'wb').close()
    254 
    255   # Create an instance and add results.
    256   propStr = """build_number 2 "builder name" Builder-Name gitHash a4a338179013b029d6dd55e737b5bd648a9fb68c"""
    257 
    258   keyStr = "arch arm64 compiler Clang configuration Debug"
    259 
    260   hash_file = os.path.join(testDir, "ignore_hashes.txt")
    261   with open(hash_file, 'wb') as f:
    262     f.write("\n".join(["hash-1","hash-4"]) + "\n")
    263 
    264   gr = GoldResults("pdfium", testDir, propStr, keyStr, hash_file)
    265   gr.AddTestResult("test-1", "hash-1", os.path.join(testDir, "image1.png"))
    266   gr.AddTestResult("test-2", "hash-2", os.path.join(testDir, "image2.png"))
    267   gr.AddTestResult("test-3", "hash-3", os.path.join(testDir, "image3.png"))
    268   gr.WriteResults()
    269