Home | History | Annotate | Download | only in skpdiff
      1 #!/usr/bin/python
      2 # -*- coding: utf-8 -*-
      3 
      4 from __future__ import print_function
      5 import argparse
      6 import BaseHTTPServer
      7 import json
      8 import os
      9 import os.path
     10 import re
     11 import subprocess
     12 import sys
     13 import tempfile
     14 import urllib2
     15 
     16 # Grab the script path because that is where all the static assets are
     17 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
     18 
     19 # Find the tools directory for python imports
     20 TOOLS_DIR = os.path.dirname(SCRIPT_DIR)
     21 
     22 # Find the root of the skia trunk for finding skpdiff binary
     23 SKIA_ROOT_DIR = os.path.dirname(TOOLS_DIR)
     24 
     25 # Find the default location of gm expectations
     26 DEFAULT_GM_EXPECTATIONS_DIR = os.path.join(SKIA_ROOT_DIR, 'expectations', 'gm')
     27 
     28 # Imports from within Skia
     29 if TOOLS_DIR not in sys.path:
     30     sys.path.append(TOOLS_DIR)
     31 GM_DIR = os.path.join(SKIA_ROOT_DIR, 'gm')
     32 if GM_DIR not in sys.path:
     33     sys.path.append(GM_DIR)
     34 import gm_json
     35 import jsondiff
     36 
     37 # A simple dictionary of file name extensions to MIME types. The empty string
     38 # entry is used as the default when no extension was given or if the extension
     39 # has no entry in this dictionary.
     40 MIME_TYPE_MAP = {'': 'application/octet-stream',
     41                  'html': 'text/html',
     42                  'css': 'text/css',
     43                  'png': 'image/png',
     44                  'js': 'application/javascript',
     45                  'json': 'application/json'
     46                  }
     47 
     48 
     49 IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
     50 
     51 SKPDIFF_INVOKE_FORMAT = '{} --jsonp=false -o {} -f {} {}'
     52 
     53 
     54 def get_skpdiff_path(user_path=None):
     55     """Find the skpdiff binary.
     56 
     57     @param user_path If none, searches in Release and Debug out directories of
     58            the skia root. If set, checks that the path is a real file and
     59            returns it.
     60     """
     61     skpdiff_path = None
     62     possible_paths = []
     63 
     64     # Use the user given path, or try out some good default paths.
     65     if user_path:
     66         possible_paths.append(user_path)
     67     else:
     68         possible_paths.append(os.path.join(SKIA_ROOT_DIR, 'out',
     69                                            'Release', 'skpdiff'))
     70         possible_paths.append(os.path.join(SKIA_ROOT_DIR, 'out',
     71                                            'Release', 'skpdiff.exe'))
     72         possible_paths.append(os.path.join(SKIA_ROOT_DIR, 'out',
     73                                            'Debug', 'skpdiff'))
     74         possible_paths.append(os.path.join(SKIA_ROOT_DIR, 'out',
     75                                            'Debug', 'skpdiff.exe'))
     76     # Use the first path that actually points to the binary
     77     for possible_path in possible_paths:
     78         if os.path.isfile(possible_path):
     79             skpdiff_path = possible_path
     80             break
     81 
     82     # If skpdiff was not found, print out diagnostic info for the user.
     83     if skpdiff_path is None:
     84         print('Could not find skpdiff binary. Either build it into the ' +
     85               'default directory, or specify the path on the command line.')
     86         print('skpdiff paths tried:')
     87         for possible_path in possible_paths:
     88             print('   ', possible_path)
     89     return skpdiff_path
     90 
     91 
     92 def download_file(url, output_path):
     93     """Download the file at url and place it in output_path"""
     94     reader = urllib2.urlopen(url)
     95     with open(output_path, 'wb') as writer:
     96         writer.write(reader.read())
     97 
     98 
     99 def download_gm_image(image_name, image_path, hash_val):
    100     """Download the gm result into the given path.
    101 
    102     @param image_name The GM file name, for example imageblur_gpu.png.
    103     @param image_path Path to place the image.
    104     @param hash_val   The hash value of the image.
    105     """
    106     if hash_val is None:
    107         return
    108 
    109     # Separate the test name from a image name
    110     image_match = IMAGE_FILENAME_RE.match(image_name)
    111     test_name = image_match.group(1)
    112 
    113     # Calculate the URL of the requested image
    114     image_url = gm_json.CreateGmActualUrl(
    115         test_name, gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5, hash_val)
    116 
    117     # Download the image as requested
    118     download_file(image_url, image_path)
    119 
    120 
    121 def get_image_set_from_skpdiff(skpdiff_records):
    122     """Get the set of all images references in the given records.
    123 
    124     @param skpdiff_records An array of records, which are dictionary objects.
    125     """
    126     expected_set = frozenset([r['baselinePath'] for r in skpdiff_records])
    127     actual_set = frozenset([r['testPath'] for r in skpdiff_records])
    128     return expected_set | actual_set
    129 
    130 
    131 def set_expected_hash_in_json(expected_results_json, image_name, hash_value):
    132     """Set the expected hash for the object extracted from
    133     expected-results.json. Note that this only work with bitmap-64bitMD5 hash
    134     types.
    135 
    136     @param expected_results_json The Python dictionary with the results to
    137     modify.
    138     @param image_name            The name of the image to set the hash of.
    139     @param hash_value            The hash to set for the image.
    140     """
    141     expected_results = expected_results_json[gm_json.JSONKEY_EXPECTEDRESULTS]
    142 
    143     if image_name in expected_results:
    144         expected_results[image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0][1] = hash_value
    145     else:
    146         expected_results[image_name] = {
    147             gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS:
    148             [
    149                 [
    150                     gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5,
    151                     hash_value
    152                 ]
    153             ]
    154         }
    155 
    156 
    157 def get_head_version(path):
    158     """Get the version of the file at the given path stored inside the HEAD of
    159     the git repository. It is returned as a string.
    160 
    161     @param path The path of the file whose HEAD is returned. It is assumed the
    162     path is inside a git repo rooted at SKIA_ROOT_DIR.
    163     """
    164 
    165     # git-show will not work with absolute paths. This ensures we give it a path
    166     # relative to the skia root. This path also has to use forward slashes, even
    167     # on windows.
    168     git_path = os.path.relpath(path, SKIA_ROOT_DIR).replace('\\', '/')
    169     git_show_proc = subprocess.Popen(['git', 'show', 'HEAD:' + git_path],
    170                                      stdout=subprocess.PIPE)
    171 
    172     # When invoked outside a shell, git will output the last committed version
    173     # of the file directly to stdout.
    174     git_version_content, _ = git_show_proc.communicate()
    175     return git_version_content
    176 
    177 
    178 class GMInstance:
    179     """Information about a GM test result on a specific device:
    180      - device_name = the name of the device that rendered it
    181      - image_name = the GM test name and config
    182      - expected_hash = the current expected hash value
    183      - actual_hash = the actual hash value
    184      - is_rebaselined = True if actual_hash is what is currently in the expected
    185                         results file, False otherwise.
    186     """
    187     def __init__(self,
    188                  device_name, image_name,
    189                  expected_hash, actual_hash,
    190                  is_rebaselined):
    191         self.device_name = device_name
    192         self.image_name = image_name
    193         self.expected_hash = expected_hash
    194         self.actual_hash = actual_hash
    195         self.is_rebaselined = is_rebaselined
    196 
    197 
    198 class ExpectationsManager:
    199     def __init__(self, expectations_dir, expected_name, updated_name,
    200                  skpdiff_path):
    201         """
    202         @param expectations_dir   The directory to traverse for results files.
    203                This should resemble expectations/gm in the Skia trunk.
    204         @param expected_name      The name of the expected result files. These
    205                are in the format of expected-results.json.
    206         @param updated_name       The name of the updated expected result files.
    207                Normally this matches --expectations-filename-output for the
    208                rebaseline.py tool.
    209         @param skpdiff_path       The path used to execute the skpdiff command.
    210         """
    211         self._expectations_dir = expectations_dir
    212         self._expected_name = expected_name
    213         self._updated_name = updated_name
    214         self._skpdiff_path = skpdiff_path
    215         self._generate_gm_comparison()
    216 
    217     def _generate_gm_comparison(self):
    218         """Generate all the data needed to compare GMs:
    219            - determine which GMs changed
    220            - download the changed images
    221            - compare them with skpdiff
    222         """
    223 
    224         # Get the expectations and compare them with actual hashes
    225         self._get_expectations()
    226 
    227 
    228         # Create a temporary file tree that makes sense for skpdiff to operate
    229         # on. We take the realpath of the new temp directory because some OSs
    230         # (*cough* osx) put the temp directory behind a symlink that gets
    231         # resolved later down the pipeline and breaks the image map.
    232         image_output_dir = os.path.realpath(tempfile.mkdtemp('skpdiff'))
    233         expected_image_dir = os.path.join(image_output_dir, 'expected')
    234         actual_image_dir = os.path.join(image_output_dir, 'actual')
    235         os.mkdir(expected_image_dir)
    236         os.mkdir(actual_image_dir)
    237 
    238         # Download expected and actual images that differed into the temporary
    239         # file tree.
    240         self._download_expectation_images(expected_image_dir, actual_image_dir)
    241 
    242         # Invoke skpdiff with our downloaded images and place its results in the
    243         # temporary directory.
    244         self._skpdiff_output_path = os.path.join(image_output_dir,
    245                                                 'skpdiff_output.json')
    246         skpdiff_cmd = SKPDIFF_INVOKE_FORMAT.format(self._skpdiff_path,
    247                                                    self._skpdiff_output_path,
    248                                                    expected_image_dir,
    249                                                    actual_image_dir)
    250         os.system(skpdiff_cmd)
    251         self._load_skpdiff_output()
    252 
    253 
    254     def _get_expectations(self):
    255         """Fills self._expectations with GMInstance objects for each test whose
    256         expectation is different between the following two files:
    257          - the local filesystem's updated results file
    258          - git's head version of the expected results file
    259         """
    260         differ = jsondiff.GMDiffer()
    261         self._expectations = []
    262         for root, dirs, files in os.walk(self._expectations_dir):
    263             for expectation_file in files:
    264                 # There are many files in the expectations directory. We only
    265                 # care about expected results.
    266                 if expectation_file != self._expected_name:
    267                     continue
    268 
    269                 # Get the name of the results file, and be sure there is an
    270                 # updated result to compare against. If there is not, there is
    271                 # no point in diffing this device.
    272                 expected_file_path = os.path.join(root, self._expected_name)
    273                 updated_file_path = os.path.join(root, self._updated_name)
    274                 if not os.path.isfile(updated_file_path):
    275                     continue
    276 
    277                 # Always get the expected results from git because we may have
    278                 # changed them in a previous instance of the server.
    279                 expected_contents = get_head_version(expected_file_path)
    280                 updated_contents = None
    281                 with open(updated_file_path, 'rb') as updated_file:
    282                     updated_contents = updated_file.read()
    283 
    284                 # Read the expected results on disk to determine what we've
    285                 # already rebaselined.
    286                 commited_contents = None
    287                 with open(expected_file_path, 'rb') as expected_file:
    288                     commited_contents = expected_file.read()
    289 
    290                 # Find all expectations that did not match.
    291                 expected_diff = differ.GenerateDiffDictFromStrings(
    292                     expected_contents,
    293                     updated_contents)
    294 
    295                 # Generate a set of images that have already been rebaselined
    296                 # onto disk.
    297                 rebaselined_diff = differ.GenerateDiffDictFromStrings(
    298                     expected_contents,
    299                     commited_contents)
    300 
    301                 rebaselined_set = set(rebaselined_diff.keys())
    302 
    303                 # The name of the device corresponds to the name of the folder
    304                 # we are in.
    305                 device_name = os.path.basename(root)
    306 
    307                 # Store old and new versions of the expectation for each GM
    308                 for image_name, hashes in expected_diff.iteritems():
    309                     self._expectations.append(
    310                         GMInstance(device_name, image_name,
    311                                    hashes['old'], hashes['new'],
    312                                    image_name in rebaselined_set))
    313 
    314     def _load_skpdiff_output(self):
    315         """Loads the results of skpdiff and annotates them with whether they
    316         have already been rebaselined or not. The resulting data is store in
    317         self.skpdiff_records."""
    318         self.skpdiff_records = None
    319         with open(self._skpdiff_output_path, 'rb') as skpdiff_output_file:
    320             self.skpdiff_records = json.load(skpdiff_output_file)['records']
    321             for record in self.skpdiff_records:
    322                 record['isRebaselined'] = self.image_map[record['baselinePath']][1].is_rebaselined
    323 
    324 
    325     def _download_expectation_images(self, expected_image_dir, actual_image_dir):
    326         """Download the expected and actual images for the _expectations array.
    327 
    328         @param expected_image_dir The directory to download expected images
    329                into.
    330         @param actual_image_dir   The directory to download actual images into.
    331         """
    332         image_map = {}
    333 
    334         # Look through expectations and download their images.
    335         for expectation in self._expectations:
    336             # Build appropriate paths to download the images into.
    337             expected_image_path = os.path.join(expected_image_dir,
    338                                                expectation.device_name + '-' +
    339                                                expectation.image_name)
    340 
    341             actual_image_path = os.path.join(actual_image_dir,
    342                                              expectation.device_name + '-' +
    343                                              expectation.image_name)
    344 
    345             print('Downloading %s for device %s' % (
    346                 expectation.image_name, expectation.device_name))
    347 
    348             # Download images
    349             download_gm_image(expectation.image_name,
    350                               expected_image_path,
    351                               expectation.expected_hash)
    352 
    353             download_gm_image(expectation.image_name,
    354                               actual_image_path,
    355                               expectation.actual_hash)
    356 
    357             # Annotate the expectations with where the images were downloaded
    358             # to.
    359             expectation.expected_image_path = expected_image_path
    360             expectation.actual_image_path = actual_image_path
    361 
    362             # Map the image paths back to the expectations.
    363             image_map[expected_image_path] = (False, expectation)
    364             image_map[actual_image_path] = (True, expectation)
    365 
    366         self.image_map = image_map
    367 
    368     def _set_expected_hash(self, device_name, image_name, hash_value):
    369         """Set the expected hash for the image of the given device. This always
    370         writes directly to the expected results file of the given device
    371 
    372         @param device_name The name of the device to write the hash to.
    373         @param image_name  The name of the image whose hash to set.
    374         @param hash_value  The value of the hash to set.
    375         """
    376 
    377         # Retrieve the expected results file as it is in the working tree
    378         json_path = os.path.join(self._expectations_dir, device_name,
    379                                  self._expected_name)
    380         expectations = gm_json.LoadFromFile(json_path)
    381 
    382         # Set the specified hash.
    383         set_expected_hash_in_json(expectations, image_name, hash_value)
    384 
    385         # Write it out to disk using gm_json to keep the formatting consistent.
    386         gm_json.WriteToFile(expectations, json_path)
    387 
    388     def commit_rebaselines(self, rebaselines):
    389         """Sets the expected results file to use the hashes of the images in
    390         the rebaselines list. If a expected result image is not in rebaselines
    391         at all, the old hash will be used.
    392 
    393         @param rebaselines A list of image paths to use the hash of.
    394         """
    395         # Reset all expectations to their old hashes because some of them may
    396         # have been set to the new hash by a previous call to this function.
    397         for expectation in self._expectations:
    398             expectation.is_rebaselined = False
    399             self._set_expected_hash(expectation.device_name,
    400                                     expectation.image_name,
    401                                     expectation.expected_hash)
    402 
    403         # Take all the images to rebaseline
    404         for image_path in rebaselines:
    405             # Get the metadata about the image at the path.
    406             is_actual, expectation = self.image_map[image_path]
    407 
    408             expectation.is_rebaselined = is_actual
    409             expectation_hash = expectation.actual_hash if is_actual else\
    410                                expectation.expected_hash
    411 
    412             # Write out that image's hash directly to the expected results file.
    413             self._set_expected_hash(expectation.device_name,
    414                                     expectation.image_name,
    415                                     expectation_hash)
    416 
    417         self._load_skpdiff_output()
    418 
    419 
    420 class SkPDiffHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    421     def send_file(self, file_path):
    422         # Grab the extension if there is one
    423         extension = os.path.splitext(file_path)[1]
    424         if len(extension) >= 1:
    425             extension = extension[1:]
    426 
    427         # Determine the MIME type of the file from its extension
    428         mime_type = MIME_TYPE_MAP.get(extension, MIME_TYPE_MAP[''])
    429 
    430         # Open the file and send it over HTTP
    431         if os.path.isfile(file_path):
    432             with open(file_path, 'rb') as sending_file:
    433                 self.send_response(200)
    434                 self.send_header('Content-type', mime_type)
    435                 self.end_headers()
    436                 self.wfile.write(sending_file.read())
    437         else:
    438             self.send_error(404)
    439 
    440     def serve_if_in_dir(self, dir_path, file_path):
    441         # Determine if the file exists relative to the given dir_path AND exists
    442         # under the dir_path. This is to prevent accidentally serving files
    443         # outside the directory intended using symlinks, or '../'.
    444         real_path = os.path.normpath(os.path.join(dir_path, file_path))
    445         if os.path.commonprefix([real_path, dir_path]) == dir_path:
    446             if os.path.isfile(real_path):
    447                 self.send_file(real_path)
    448                 return True
    449         return False
    450 
    451     def do_GET(self):
    452         # Simple rewrite rule of the root path to 'viewer.html'
    453         if self.path == '' or self.path == '/':
    454             self.path = '/viewer.html'
    455 
    456         # The [1:] chops off the leading '/'
    457         file_path = self.path[1:]
    458 
    459         # Handle skpdiff_output.json manually because it is was processed by the
    460         # server when it was started and does not exist as a file.
    461         if file_path == 'skpdiff_output.json':
    462             self.send_response(200)
    463             self.send_header('Content-type', MIME_TYPE_MAP['json'])
    464             self.end_headers()
    465 
    466             # Add JSONP padding to the JSON because the web page expects it. It
    467             # expects it because it was designed to run with or without a web
    468             # server. Without a web server, the only way to load JSON is with
    469             # JSONP.
    470             skpdiff_records = self.server.expectations_manager.skpdiff_records
    471             self.wfile.write('var SkPDiffRecords = ')
    472             json.dump({'records': skpdiff_records}, self.wfile)
    473             self.wfile.write(';')
    474             return
    475 
    476         # Attempt to send static asset files first.
    477         if self.serve_if_in_dir(SCRIPT_DIR, file_path):
    478             return
    479 
    480         # WARNING: Serving any file the user wants is incredibly insecure. Its
    481         # redeeming quality is that we only serve gm files on a white list.
    482         if self.path in self.server.image_set:
    483             self.send_file(self.path)
    484             return
    485 
    486         # If no file to send was found, just give the standard 404
    487         self.send_error(404)
    488 
    489     def do_POST(self):
    490         if self.path == '/commit_rebaselines':
    491             content_length = int(self.headers['Content-length'])
    492             request_data = json.loads(self.rfile.read(content_length))
    493             rebaselines = request_data['rebaselines']
    494             self.server.expectations_manager.commit_rebaselines(rebaselines)
    495             self.send_response(200)
    496             self.send_header('Content-type', 'application/json')
    497             self.end_headers()
    498             self.wfile.write('{"success":true}')
    499             return
    500 
    501         # If the we have no handler for this path, give em' the 404
    502         self.send_error(404)
    503 
    504 
    505 def run_server(expectations_manager, port=8080):
    506     # It's important to parse the results file so that we can make a set of
    507     # images that the web page might request.
    508     skpdiff_records = expectations_manager.skpdiff_records
    509     image_set = get_image_set_from_skpdiff(skpdiff_records)
    510 
    511     # Do not bind to interfaces other than localhost because the server will
    512     # attempt to serve files relative to the root directory as a last resort
    513     # before 404ing. This means all of your files can be accessed from this
    514     # server, so DO NOT let this server listen to anything but localhost.
    515     server_address = ('127.0.0.1', port)
    516     http_server = BaseHTTPServer.HTTPServer(server_address, SkPDiffHandler)
    517     http_server.image_set = image_set
    518     http_server.expectations_manager = expectations_manager
    519     print('Navigate thine browser to: http://{}:{}/'.format(*server_address))
    520     http_server.serve_forever()
    521 
    522 
    523 def main():
    524     parser = argparse.ArgumentParser()
    525     parser.add_argument('--port', '-p', metavar='PORT',
    526                         type=int,
    527                         default=8080,
    528                         help='port to bind the server to; ' +
    529                         'defaults to %(default)s',
    530                         )
    531 
    532     parser.add_argument('--expectations-dir', metavar='EXPECTATIONS_DIR',
    533                         default=DEFAULT_GM_EXPECTATIONS_DIR,
    534                         help='path to the gm expectations; ' +
    535                         'defaults to %(default)s'
    536                         )
    537 
    538     parser.add_argument('--expected',
    539                         metavar='EXPECTATIONS_FILE_NAME',
    540                         default='expected-results.json',
    541                         help='the file name of the expectations JSON; ' +
    542                         'defaults to %(default)s'
    543                         )
    544 
    545     parser.add_argument('--updated',
    546                         metavar='UPDATED_FILE_NAME',
    547                         default='updated-results.json',
    548                         help='the file name of the updated expectations JSON;' +
    549                         ' defaults to %(default)s'
    550                         )
    551 
    552     parser.add_argument('--skpdiff-path', metavar='SKPDIFF_PATH',
    553                         default=None,
    554                         help='the path to the skpdiff binary to use; ' +
    555                         'defaults to out/Release/skpdiff or out/Default/skpdiff'
    556                         )
    557 
    558     args = vars(parser.parse_args())  # Convert args into a python dict
    559 
    560     # Make sure we have access to an skpdiff binary
    561     skpdiff_path = get_skpdiff_path(args['skpdiff_path'])
    562     if skpdiff_path is None:
    563         sys.exit(1)
    564 
    565     # Print out the paths of things for easier debugging
    566     print('script dir         :', SCRIPT_DIR)
    567     print('tools dir          :', TOOLS_DIR)
    568     print('root dir           :', SKIA_ROOT_DIR)
    569     print('expectations dir   :', args['expectations_dir'])
    570     print('skpdiff path       :', skpdiff_path)
    571 
    572     expectations_manager = ExpectationsManager(args['expectations_dir'],
    573                                                args['expected'],
    574                                                args['updated'],
    575                                                skpdiff_path)
    576 
    577     run_server(expectations_manager, port=args['port'])
    578 
    579 if __name__ == '__main__':
    580     main()
    581