Home | History | Annotate | Download | only in rebaseline_server
      1 #!/usr/bin/python
      2 
      3 """
      4 Copyright 2013 Google Inc.
      5 
      6 Use of this source code is governed by a BSD-style license that can be
      7 found in the LICENSE file.
      8 
      9 Calulate differences between image pairs, and store them in a database.
     10 """
     11 
     12 import contextlib
     13 import logging
     14 import os
     15 import shutil
     16 import urllib
     17 try:
     18   from PIL import Image, ImageChops
     19 except ImportError:
     20   raise ImportError('Requires PIL to be installed; see '
     21                     + 'http://www.pythonware.com/products/pil/')
     22 
     23 IMAGE_SUFFIX = '.png'
     24 
     25 IMAGES_SUBDIR = 'images'
     26 DIFFS_SUBDIR = 'diffs'
     27 WHITEDIFFS_SUBDIR = 'whitediffs'
     28 
     29 VALUES_PER_BAND = 256
     30 
     31 
     32 class DiffRecord(object):
     33   """ Record of differences between two images. """
     34 
     35   def __init__(self, storage_root,
     36                expected_image_url, expected_image_locator,
     37                actual_image_url, actual_image_locator):
     38     """Download this pair of images (unless we already have them on local disk),
     39     and prepare a DiffRecord for them.
     40 
     41     TODO(epoger): Make this asynchronously download images, rather than blocking
     42     until the images have been downloaded and processed.
     43 
     44     Args:
     45       storage_root: root directory on local disk within which we store all
     46           images
     47       expected_image_url: file or HTTP url from which we will download the
     48           expected image
     49       expected_image_locator: a unique ID string under which we will store the
     50           expected image within storage_root (probably including a checksum to
     51           guarantee uniqueness)
     52       actual_image_url: file or HTTP url from which we will download the
     53           actual image
     54       actual_image_locator: a unique ID string under which we will store the
     55           actual image within storage_root (probably including a checksum to
     56           guarantee uniqueness)
     57     """
     58     # Download the expected/actual images, if we don't have them already.
     59     expected_image = _download_and_open_image(
     60         os.path.join(storage_root, IMAGES_SUBDIR,
     61                      str(expected_image_locator) + IMAGE_SUFFIX),
     62         expected_image_url)
     63     actual_image = _download_and_open_image(
     64         os.path.join(storage_root, IMAGES_SUBDIR,
     65                      str(actual_image_locator) + IMAGE_SUFFIX),
     66         actual_image_url)
     67 
     68     # Generate the diff image (absolute diff at each pixel) and
     69     # max_diff_per_channel.
     70     diff_image = _generate_image_diff(actual_image, expected_image)
     71     diff_histogram = diff_image.histogram()
     72     (diff_width, diff_height) = diff_image.size
     73     self._weighted_diff_measure = _calculate_weighted_diff_metric(
     74         diff_histogram, diff_width * diff_height)
     75     self._max_diff_per_channel = _max_per_band(diff_histogram)
     76 
     77     # Generate the whitediff image (any differing pixels show as white).
     78     # This is tricky, because when you convert color images to grayscale or
     79     # black & white in PIL, it has its own ideas about thresholds.
     80     # We have to force it: if a pixel has any color at all, it's a '1'.
     81     bands = diff_image.split()
     82     graydiff_image = ImageChops.lighter(ImageChops.lighter(
     83         bands[0], bands[1]), bands[2])
     84     whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
     85                                      .convert('1', dither=Image.NONE))
     86 
     87     # Final touches on diff_image: use whitediff_image as an alpha mask.
     88     # Unchanged pixels are transparent; differing pixels are opaque.
     89     diff_image.putalpha(whitediff_image)
     90 
     91     # Store the diff and whitediff images generated above.
     92     diff_image_locator = _get_difference_locator(
     93         expected_image_locator=expected_image_locator,
     94         actual_image_locator=actual_image_locator)
     95     basename = str(diff_image_locator) + IMAGE_SUFFIX
     96     _save_image(diff_image, os.path.join(
     97         storage_root, DIFFS_SUBDIR, basename))
     98     _save_image(whitediff_image, os.path.join(
     99         storage_root, WHITEDIFFS_SUBDIR, basename))
    100 
    101     # Calculate difference metrics.
    102     (self._width, self._height) = diff_image.size
    103     self._num_pixels_differing = (
    104         whitediff_image.histogram()[VALUES_PER_BAND - 1])
    105 
    106   def get_num_pixels_differing(self):
    107     """Returns the absolute number of pixels that differ."""
    108     return self._num_pixels_differing
    109 
    110   def get_percent_pixels_differing(self):
    111     """Returns the percentage of pixels that differ, as a float between
    112     0 and 100 (inclusive)."""
    113     return ((float(self._num_pixels_differing) * 100) /
    114             (self._width * self._height))
    115 
    116   def get_weighted_diff_measure(self):
    117     """Returns a weighted measure of image diffs, as a float between 0 and 100
    118     (inclusive)."""
    119     return self._weighted_diff_measure
    120 
    121   def get_max_diff_per_channel(self):
    122     """Returns the maximum difference between the expected and actual images
    123     for each R/G/B channel, as a list."""
    124     return self._max_diff_per_channel
    125 
    126 
    127 class ImageDiffDB(object):
    128   """ Calculates differences between image pairs, maintaining a database of
    129   them for download."""
    130 
    131   def __init__(self, storage_root):
    132     """
    133     Args:
    134       storage_root: string; root path within the DB will store all of its stuff
    135     """
    136     self._storage_root = storage_root
    137 
    138     # Dictionary of DiffRecords, keyed by (expected_image_locator,
    139     # actual_image_locator) tuples.
    140     self._diff_dict = {}
    141 
    142   def add_image_pair(self,
    143                      expected_image_url, expected_image_locator,
    144                      actual_image_url, actual_image_locator):
    145     """Download this pair of images (unless we already have them on local disk),
    146     and prepare a DiffRecord for them.
    147 
    148     TODO(epoger): Make this asynchronously download images, rather than blocking
    149     until the images have been downloaded and processed.
    150     When we do that, we should probably add a new method that will block
    151     until all of the images have been downloaded and processed.  Otherwise,
    152     we won't know when it's safe to start calling get_diff_record().
    153     jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
    154     thread-pool/worker queue at a higher level that just uses ImageDiffDB?
    155 
    156     Args:
    157       expected_image_url: file or HTTP url from which we will download the
    158           expected image
    159       expected_image_locator: a unique ID string under which we will store the
    160           expected image within storage_root (probably including a checksum to
    161           guarantee uniqueness)
    162       actual_image_url: file or HTTP url from which we will download the
    163           actual image
    164       actual_image_locator: a unique ID string under which we will store the
    165           actual image within storage_root (probably including a checksum to
    166           guarantee uniqueness)
    167     """
    168     key = (expected_image_locator, actual_image_locator)
    169     if not key in self._diff_dict:
    170       try:
    171         new_diff_record = DiffRecord(
    172             self._storage_root,
    173             expected_image_url=expected_image_url,
    174             expected_image_locator=expected_image_locator,
    175             actual_image_url=actual_image_url,
    176             actual_image_locator=actual_image_locator)
    177       except:
    178         logging.exception('got exception while creating new DiffRecord')
    179         return
    180       self._diff_dict[key] = new_diff_record
    181 
    182   def get_diff_record(self, expected_image_locator, actual_image_locator):
    183     """Returns the DiffRecord for this image pair.
    184 
    185     Raises a KeyError if we don't have a DiffRecord for this image pair.
    186     """
    187     key = (expected_image_locator, actual_image_locator)
    188     return self._diff_dict[key]
    189 
    190 
    191 # Utility functions
    192 
    193 def _calculate_weighted_diff_metric(histogram, num_pixels):
    194   """Given the histogram of a diff image (per-channel diff at each
    195   pixel between two images), calculate the weighted diff metric (a
    196   stab at how different the two images really are).
    197 
    198   Args:
    199     histogram: PIL histogram of a per-channel diff between two images
    200     num_pixels: integer; the total number of pixels in the diff image
    201 
    202   Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
    203   """
    204   # TODO(epoger): As a wild guess at an appropriate metric, weight each
    205   # different pixel by the square of its delta value.  (The more different
    206   # a pixel is from its expectation, the more we care about it.)
    207   # In the long term, we will probably use some metric generated by
    208   # skpdiff anyway.
    209   assert(len(histogram) % VALUES_PER_BAND == 0)
    210   num_bands = len(histogram) / VALUES_PER_BAND
    211   max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
    212   total_diff = 0
    213   for index in xrange(len(histogram)):
    214     total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
    215   return float(100 * total_diff) / max_diff
    216 
    217 def _max_per_band(histogram):
    218   """Given the histogram of an image, return the maximum value of each band
    219   (a.k.a. "color channel", such as R/G/B) across the entire image.
    220 
    221   Args:
    222     histogram: PIL histogram
    223 
    224   Returns the maximum value of each band within the image histogram, as a list.
    225   """
    226   max_per_band = []
    227   assert(len(histogram) % VALUES_PER_BAND == 0)
    228   num_bands = len(histogram) / VALUES_PER_BAND
    229   for band in xrange(num_bands):
    230     # Assuming that VALUES_PER_BAND is 256...
    231     #  the 'R' band makes up indices 0-255 in the histogram,
    232     #  the 'G' band makes up indices 256-511 in the histogram,
    233     #  etc.
    234     min_index = band * VALUES_PER_BAND
    235     index = min_index + VALUES_PER_BAND
    236     while index > min_index:
    237       index -= 1
    238       if histogram[index] > 0:
    239         max_per_band.append(index - min_index)
    240         break
    241   return max_per_band
    242 
    243 def _generate_image_diff(image1, image2):
    244   """Wrapper for ImageChops.difference(image1, image2) that will handle some
    245   errors automatically, or at least yield more useful error messages.
    246 
    247   TODO(epoger): Currently, some of the images generated by the bots are RGBA
    248   and others are RGB.  I'm not sure why that is.  For now, to avoid confusion
    249   within the UI, convert all to RGB when diffing.
    250 
    251   Args:
    252     image1: a PIL image object
    253     image2: a PIL image object
    254 
    255   Returns: per-pixel diffs between image1 and image2, as a PIL image object
    256   """
    257   try:
    258     return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
    259   except ValueError:
    260     logging.error('Error diffing image1 [%s] and image2 [%s].' % (
    261         repr(image1), repr(image2)))
    262     raise
    263 
    264 def _download_and_open_image(local_filepath, url):
    265   """Open the image at local_filepath; if there is no file at that path,
    266   download it from url to that path and then open it.
    267 
    268   Args:
    269     local_filepath: path on local disk where the image should be stored
    270     url: URL from which we can download the image if we don't have it yet
    271 
    272   Returns: a PIL image object
    273   """
    274   if not os.path.exists(local_filepath):
    275     _mkdir_unless_exists(os.path.dirname(local_filepath))
    276     with contextlib.closing(urllib.urlopen(url)) as url_handle:
    277       with open(local_filepath, 'wb') as file_handle:
    278         shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
    279   return _open_image(local_filepath)
    280 
    281 def _open_image(filepath):
    282   """Wrapper for Image.open(filepath) that yields more useful error messages.
    283 
    284   Args:
    285     filepath: path on local disk to load image from
    286 
    287   Returns: a PIL image object
    288   """
    289   try:
    290     return Image.open(filepath)
    291   except IOError:
    292     logging.error('IOError loading image file %s' % filepath)
    293     raise
    294 
    295 def _save_image(image, filepath, format='PNG'):
    296   """Write an image to disk, creating any intermediate directories as needed.
    297 
    298   Args:
    299     image: a PIL image object
    300     filepath: path on local disk to write image to
    301     format: one of the PIL image formats, listed at
    302             http://effbot.org/imagingbook/formats.htm
    303   """
    304   _mkdir_unless_exists(os.path.dirname(filepath))
    305   image.save(filepath, format)
    306 
    307 def _mkdir_unless_exists(path):
    308   """Unless path refers to an already-existing directory, create it.
    309 
    310   Args:
    311     path: path on local disk
    312   """
    313   if not os.path.isdir(path):
    314     os.makedirs(path)
    315 
    316 def _get_difference_locator(expected_image_locator, actual_image_locator):
    317   """Returns the locator string used to look up the diffs between expected_image
    318   and actual_image.
    319 
    320   Args:
    321     expected_image_locator: locator string pointing at expected image
    322     actual_image_locator: locator string pointing at actual image
    323 
    324   Returns: locator where the diffs between expected and actual images can be
    325       found
    326   """
    327   return "%s-vs-%s" % (expected_image_locator, actual_image_locator)
    328