Home | History | Annotate | Download | only in rebaseline_server
      1 #!/usr/bin/python
      2 
      3 """
      4 Copyright 2013 Google Inc.
      5 
      6 Use of this source code is governed by a BSD-style license that can be
      7 found in the LICENSE file.
      8 
      9 Calulate differences between image pairs, and store them in a database.
     10 """
     11 
     12 import contextlib
     13 import csv
     14 import logging
     15 import os
     16 import re
     17 import shutil
     18 import sys
     19 import tempfile
     20 import urllib
     21 try:
     22   from PIL import Image, ImageChops
     23 except ImportError:
     24   raise ImportError('Requires PIL to be installed; see '
     25                     + 'http://www.pythonware.com/products/pil/')
     26 
     27 # Set the PYTHONPATH to include the tools directory.
     28 sys.path.append(
     29     os.path.join(
     30         os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
     31                         'tools'))
     32 import find_run_binary
     33 
     34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
     35 
     36 DEFAULT_IMAGE_SUFFIX = '.png'
     37 DEFAULT_IMAGES_SUBDIR = 'images'
     38 
     39 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
     40 
     41 DIFFS_SUBDIR = 'diffs'
     42 WHITEDIFFS_SUBDIR = 'whitediffs'
     43 
     44 VALUES_PER_BAND = 256
     45 
     46 # Keys used within DiffRecord dictionary representations.
     47 # NOTE: Keep these in sync with static/constants.js
     48 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
     49 KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels'
     50 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
     51 KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference'
     52 
     53 
     54 class DiffRecord(object):
     55   """ Record of differences between two images. """
     56 
     57   def __init__(self, storage_root,
     58                expected_image_url, expected_image_locator,
     59                actual_image_url, actual_image_locator,
     60                expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
     61                actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
     62                image_suffix=DEFAULT_IMAGE_SUFFIX):
     63     """Download this pair of images (unless we already have them on local disk),
     64     and prepare a DiffRecord for them.
     65 
     66     TODO(epoger): Make this asynchronously download images, rather than blocking
     67     until the images have been downloaded and processed.
     68 
     69     Args:
     70       storage_root: root directory on local disk within which we store all
     71           images
     72       expected_image_url: file or HTTP url from which we will download the
     73           expected image
     74       expected_image_locator: a unique ID string under which we will store the
     75           expected image within storage_root (probably including a checksum to
     76           guarantee uniqueness)
     77       actual_image_url: file or HTTP url from which we will download the
     78           actual image
     79       actual_image_locator: a unique ID string under which we will store the
     80           actual image within storage_root (probably including a checksum to
     81           guarantee uniqueness)
     82       expected_images_subdir: the subdirectory expected images are stored in.
     83       actual_images_subdir: the subdirectory actual images are stored in.
     84       image_suffix: the suffix of images.
     85     """
     86     expected_image_locator = _sanitize_locator(expected_image_locator)
     87     actual_image_locator = _sanitize_locator(actual_image_locator)
     88 
     89     # Download the expected/actual images, if we don't have them already.
     90     # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
     91     # an exception if images are not found locally (instead of trying to
     92     # download them).
     93     expected_image_file = os.path.join(
     94         storage_root, expected_images_subdir,
     95         str(expected_image_locator) + image_suffix)
     96     actual_image_file = os.path.join(
     97         storage_root, actual_images_subdir,
     98         str(actual_image_locator) + image_suffix)
     99     try:
    100       expected_image = _download_and_open_image(
    101           expected_image_file, expected_image_url)
    102     except Exception:
    103       logging.exception('unable to download expected_image_url %s to file %s' %
    104                         (expected_image_url, expected_image_file))
    105       raise
    106     try:
    107       actual_image = _download_and_open_image(
    108           actual_image_file, actual_image_url)
    109     except Exception:
    110       logging.exception('unable to download actual_image_url %s to file %s' %
    111                         (actual_image_url, actual_image_file))
    112       raise
    113 
    114     # Generate the diff image (absolute diff at each pixel) and
    115     # max_diff_per_channel.
    116     diff_image = _generate_image_diff(actual_image, expected_image)
    117     diff_histogram = diff_image.histogram()
    118     (diff_width, diff_height) = diff_image.size
    119     self._max_diff_per_channel = _max_per_band(diff_histogram)
    120 
    121     # Generate the whitediff image (any differing pixels show as white).
    122     # This is tricky, because when you convert color images to grayscale or
    123     # black & white in PIL, it has its own ideas about thresholds.
    124     # We have to force it: if a pixel has any color at all, it's a '1'.
    125     bands = diff_image.split()
    126     graydiff_image = ImageChops.lighter(ImageChops.lighter(
    127         bands[0], bands[1]), bands[2])
    128     whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
    129                                      .convert('1', dither=Image.NONE))
    130 
    131     # Calculate the perceptual difference percentage.
    132     skpdiff_csv_dir = tempfile.mkdtemp()
    133     try:
    134       skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv')
    135       expected_img = os.path.join(storage_root, expected_images_subdir,
    136                                   str(expected_image_locator) + image_suffix)
    137       actual_img = os.path.join(storage_root, actual_images_subdir,
    138                                 str(actual_image_locator) + image_suffix)
    139       find_run_binary.run_command(
    140           [SKPDIFF_BINARY, '-p', expected_img, actual_img,
    141            '--csv', skpdiff_csv_output, '-d', 'perceptual'])
    142       with contextlib.closing(open(skpdiff_csv_output)) as csv_file:
    143         for row in csv.DictReader(csv_file):
    144           perceptual_similarity = float(row[' perceptual'].strip())
    145           if not 0 <= perceptual_similarity <= 1:
    146             # skpdiff outputs -1 if the images are different sizes. Treat any
    147             # output that does not lie in [0, 1] as having 0% perceptual
    148             # similarity.
    149             perceptual_similarity = 0
    150           # skpdiff returns the perceptual similarity, convert it to get the
    151           # perceptual difference percentage.
    152           self._perceptual_difference = 100 - (perceptual_similarity * 100)
    153     finally:
    154       shutil.rmtree(skpdiff_csv_dir)
    155 
    156     # Final touches on diff_image: use whitediff_image as an alpha mask.
    157     # Unchanged pixels are transparent; differing pixels are opaque.
    158     diff_image.putalpha(whitediff_image)
    159 
    160     # Store the diff and whitediff images generated above.
    161     diff_image_locator = _get_difference_locator(
    162         expected_image_locator=expected_image_locator,
    163         actual_image_locator=actual_image_locator)
    164     basename = str(diff_image_locator) + image_suffix
    165     _save_image(diff_image, os.path.join(
    166         storage_root, DIFFS_SUBDIR, basename))
    167     _save_image(whitediff_image, os.path.join(
    168         storage_root, WHITEDIFFS_SUBDIR, basename))
    169 
    170     # Calculate difference metrics.
    171     (self._width, self._height) = diff_image.size
    172     self._num_pixels_differing = (
    173         whitediff_image.histogram()[VALUES_PER_BAND - 1])
    174 
    175   def get_num_pixels_differing(self):
    176     """Returns the absolute number of pixels that differ."""
    177     return self._num_pixels_differing
    178 
    179   def get_percent_pixels_differing(self):
    180     """Returns the percentage of pixels that differ, as a float between
    181     0 and 100 (inclusive)."""
    182     return ((float(self._num_pixels_differing) * 100) /
    183             (self._width * self._height))
    184 
    185   def get_perceptual_difference(self):
    186     """Returns the perceptual difference percentage."""
    187     return self._perceptual_difference
    188 
    189   def get_max_diff_per_channel(self):
    190     """Returns the maximum difference between the expected and actual images
    191     for each R/G/B channel, as a list."""
    192     return self._max_diff_per_channel
    193 
    194   def as_dict(self):
    195     """Returns a dictionary representation of this DiffRecord, as needed when
    196     constructing the JSON representation."""
    197     return {
    198         KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing,
    199         KEY__DIFFERENCES__PERCENT_DIFF_PIXELS:
    200             self.get_percent_pixels_differing(),
    201         KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
    202         KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference,
    203     }
    204 
    205 
    206 class ImageDiffDB(object):
    207   """ Calculates differences between image pairs, maintaining a database of
    208   them for download."""
    209 
    210   def __init__(self, storage_root):
    211     """
    212     Args:
    213       storage_root: string; root path within the DB will store all of its stuff
    214     """
    215     self._storage_root = storage_root
    216 
    217     # Dictionary of DiffRecords, keyed by (expected_image_locator,
    218     # actual_image_locator) tuples.
    219     self._diff_dict = {}
    220 
    221   def add_image_pair(self,
    222                      expected_image_url, expected_image_locator,
    223                      actual_image_url, actual_image_locator):
    224     """Download this pair of images (unless we already have them on local disk),
    225     and prepare a DiffRecord for them.
    226 
    227     TODO(epoger): Make this asynchronously download images, rather than blocking
    228     until the images have been downloaded and processed.
    229     When we do that, we should probably add a new method that will block
    230     until all of the images have been downloaded and processed.  Otherwise,
    231     we won't know when it's safe to start calling get_diff_record().
    232     jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
    233     thread-pool/worker queue at a higher level that just uses ImageDiffDB?
    234 
    235     Args:
    236       expected_image_url: file or HTTP url from which we will download the
    237           expected image
    238       expected_image_locator: a unique ID string under which we will store the
    239           expected image within storage_root (probably including a checksum to
    240           guarantee uniqueness)
    241       actual_image_url: file or HTTP url from which we will download the
    242           actual image
    243       actual_image_locator: a unique ID string under which we will store the
    244           actual image within storage_root (probably including a checksum to
    245           guarantee uniqueness)
    246     """
    247     expected_image_locator = _sanitize_locator(expected_image_locator)
    248     actual_image_locator = _sanitize_locator(actual_image_locator)
    249     key = (expected_image_locator, actual_image_locator)
    250     if not key in self._diff_dict:
    251       try:
    252         new_diff_record = DiffRecord(
    253             self._storage_root,
    254             expected_image_url=expected_image_url,
    255             expected_image_locator=expected_image_locator,
    256             actual_image_url=actual_image_url,
    257             actual_image_locator=actual_image_locator)
    258       except Exception:
    259         # If we can't create a real DiffRecord for this (expected, actual) pair,
    260         # store None and the UI will show whatever information we DO have.
    261         # Fixes http://skbug.com/2368 .
    262         logging.exception(
    263             'got exception while creating a DiffRecord for '
    264             'expected_image_url=%s , actual_image_url=%s; returning None' % (
    265                 expected_image_url, actual_image_url))
    266         new_diff_record = None
    267       self._diff_dict[key] = new_diff_record
    268 
    269   def get_diff_record(self, expected_image_locator, actual_image_locator):
    270     """Returns the DiffRecord for this image pair.
    271 
    272     Raises a KeyError if we don't have a DiffRecord for this image pair.
    273     """
    274     key = (_sanitize_locator(expected_image_locator),
    275            _sanitize_locator(actual_image_locator))
    276     return self._diff_dict[key]
    277 
    278 
    279 # Utility functions
    280 
    281 def _max_per_band(histogram):
    282   """Given the histogram of an image, return the maximum value of each band
    283   (a.k.a. "color channel", such as R/G/B) across the entire image.
    284 
    285   Args:
    286     histogram: PIL histogram
    287 
    288   Returns the maximum value of each band within the image histogram, as a list.
    289   """
    290   max_per_band = []
    291   assert(len(histogram) % VALUES_PER_BAND == 0)
    292   num_bands = len(histogram) / VALUES_PER_BAND
    293   for band in xrange(num_bands):
    294     # Assuming that VALUES_PER_BAND is 256...
    295     #  the 'R' band makes up indices 0-255 in the histogram,
    296     #  the 'G' band makes up indices 256-511 in the histogram,
    297     #  etc.
    298     min_index = band * VALUES_PER_BAND
    299     index = min_index + VALUES_PER_BAND
    300     while index > min_index:
    301       index -= 1
    302       if histogram[index] > 0:
    303         max_per_band.append(index - min_index)
    304         break
    305   return max_per_band
    306 
    307 
    308 def _generate_image_diff(image1, image2):
    309   """Wrapper for ImageChops.difference(image1, image2) that will handle some
    310   errors automatically, or at least yield more useful error messages.
    311 
    312   TODO(epoger): Currently, some of the images generated by the bots are RGBA
    313   and others are RGB.  I'm not sure why that is.  For now, to avoid confusion
    314   within the UI, convert all to RGB when diffing.
    315 
    316   Args:
    317     image1: a PIL image object
    318     image2: a PIL image object
    319 
    320   Returns: per-pixel diffs between image1 and image2, as a PIL image object
    321   """
    322   try:
    323     return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
    324   except ValueError:
    325     logging.error('Error diffing image1 [%s] and image2 [%s].' % (
    326         repr(image1), repr(image2)))
    327     raise
    328 
    329 
    330 def _download_and_open_image(local_filepath, url):
    331   """Open the image at local_filepath; if there is no file at that path,
    332   download it from url to that path and then open it.
    333 
    334   Args:
    335     local_filepath: path on local disk where the image should be stored
    336     url: URL from which we can download the image if we don't have it yet
    337 
    338   Returns: a PIL image object
    339   """
    340   if not os.path.exists(local_filepath):
    341     _mkdir_unless_exists(os.path.dirname(local_filepath))
    342     with contextlib.closing(urllib.urlopen(url)) as url_handle:
    343       with open(local_filepath, 'wb') as file_handle:
    344         shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
    345   return _open_image(local_filepath)
    346 
    347 
    348 def _open_image(filepath):
    349   """Wrapper for Image.open(filepath) that yields more useful error messages.
    350 
    351   Args:
    352     filepath: path on local disk to load image from
    353 
    354   Returns: a PIL image object
    355   """
    356   try:
    357     return Image.open(filepath)
    358   except IOError:
    359     # If we are unable to load an image from the file, delete it from disk
    360     # and we will try to fetch it again next time.  Fixes http://skbug.com/2247
    361     logging.error('IOError loading image file %s ; deleting it.' % filepath)
    362     os.remove(filepath)
    363     raise
    364 
    365 
    366 def _save_image(image, filepath, format='PNG'):
    367   """Write an image to disk, creating any intermediate directories as needed.
    368 
    369   Args:
    370     image: a PIL image object
    371     filepath: path on local disk to write image to
    372     format: one of the PIL image formats, listed at
    373             http://effbot.org/imagingbook/formats.htm
    374   """
    375   _mkdir_unless_exists(os.path.dirname(filepath))
    376   image.save(filepath, format)
    377 
    378 
    379 def _mkdir_unless_exists(path):
    380   """Unless path refers to an already-existing directory, create it.
    381 
    382   Args:
    383     path: path on local disk
    384   """
    385   if not os.path.isdir(path):
    386     os.makedirs(path)
    387 
    388 
    389 def _sanitize_locator(locator):
    390   """Returns a sanitized version of a locator (one in which we know none of the
    391   characters will have special meaning in filenames).
    392 
    393   Args:
    394     locator: string, or something that can be represented as a string
    395   """
    396   return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
    397 
    398 
    399 def _get_difference_locator(expected_image_locator, actual_image_locator):
    400   """Returns the locator string used to look up the diffs between expected_image
    401   and actual_image.
    402 
    403   We must keep this function in sync with getImageDiffRelativeUrl() in
    404   static/loader.js
    405 
    406   Args:
    407     expected_image_locator: locator string pointing at expected image
    408     actual_image_locator: locator string pointing at actual image
    409 
    410   Returns: already-sanitized locator where the diffs between expected and
    411       actual images can be found
    412   """
    413   return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
    414                        _sanitize_locator(actual_image_locator))
    415