1 #!/usr/bin/python 2 3 """ 4 Copyright 2013 Google Inc. 5 6 Use of this source code is governed by a BSD-style license that can be 7 found in the LICENSE file. 8 9 Calulate differences between image pairs, and store them in a database. 10 """ 11 12 import contextlib 13 import logging 14 import os 15 import shutil 16 import urllib 17 try: 18 from PIL import Image, ImageChops 19 except ImportError: 20 raise ImportError('Requires PIL to be installed; see ' 21 + 'http://www.pythonware.com/products/pil/') 22 23 IMAGE_SUFFIX = '.png' 24 25 IMAGES_SUBDIR = 'images' 26 DIFFS_SUBDIR = 'diffs' 27 WHITEDIFFS_SUBDIR = 'whitediffs' 28 29 VALUES_PER_BAND = 256 30 31 32 class DiffRecord(object): 33 """ Record of differences between two images. """ 34 35 def __init__(self, storage_root, 36 expected_image_url, expected_image_locator, 37 actual_image_url, actual_image_locator): 38 """Download this pair of images (unless we already have them on local disk), 39 and prepare a DiffRecord for them. 40 41 TODO(epoger): Make this asynchronously download images, rather than blocking 42 until the images have been downloaded and processed. 43 44 Args: 45 storage_root: root directory on local disk within which we store all 46 images 47 expected_image_url: file or HTTP url from which we will download the 48 expected image 49 expected_image_locator: a unique ID string under which we will store the 50 expected image within storage_root (probably including a checksum to 51 guarantee uniqueness) 52 actual_image_url: file or HTTP url from which we will download the 53 actual image 54 actual_image_locator: a unique ID string under which we will store the 55 actual image within storage_root (probably including a checksum to 56 guarantee uniqueness) 57 """ 58 # Download the expected/actual images, if we don't have them already. 59 expected_image = _download_and_open_image( 60 os.path.join(storage_root, IMAGES_SUBDIR, 61 str(expected_image_locator) + IMAGE_SUFFIX), 62 expected_image_url) 63 actual_image = _download_and_open_image( 64 os.path.join(storage_root, IMAGES_SUBDIR, 65 str(actual_image_locator) + IMAGE_SUFFIX), 66 actual_image_url) 67 68 # Generate the diff image (absolute diff at each pixel) and 69 # max_diff_per_channel. 70 diff_image = _generate_image_diff(actual_image, expected_image) 71 diff_histogram = diff_image.histogram() 72 (diff_width, diff_height) = diff_image.size 73 self._weighted_diff_measure = _calculate_weighted_diff_metric( 74 diff_histogram, diff_width * diff_height) 75 self._max_diff_per_channel = _max_per_band(diff_histogram) 76 77 # Generate the whitediff image (any differing pixels show as white). 78 # This is tricky, because when you convert color images to grayscale or 79 # black & white in PIL, it has its own ideas about thresholds. 80 # We have to force it: if a pixel has any color at all, it's a '1'. 81 bands = diff_image.split() 82 graydiff_image = ImageChops.lighter(ImageChops.lighter( 83 bands[0], bands[1]), bands[2]) 84 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND) 85 .convert('1', dither=Image.NONE)) 86 87 # Final touches on diff_image: use whitediff_image as an alpha mask. 88 # Unchanged pixels are transparent; differing pixels are opaque. 89 diff_image.putalpha(whitediff_image) 90 91 # Store the diff and whitediff images generated above. 92 diff_image_locator = _get_difference_locator( 93 expected_image_locator=expected_image_locator, 94 actual_image_locator=actual_image_locator) 95 basename = str(diff_image_locator) + IMAGE_SUFFIX 96 _save_image(diff_image, os.path.join( 97 storage_root, DIFFS_SUBDIR, basename)) 98 _save_image(whitediff_image, os.path.join( 99 storage_root, WHITEDIFFS_SUBDIR, basename)) 100 101 # Calculate difference metrics. 102 (self._width, self._height) = diff_image.size 103 self._num_pixels_differing = ( 104 whitediff_image.histogram()[VALUES_PER_BAND - 1]) 105 106 def get_num_pixels_differing(self): 107 """Returns the absolute number of pixels that differ.""" 108 return self._num_pixels_differing 109 110 def get_percent_pixels_differing(self): 111 """Returns the percentage of pixels that differ, as a float between 112 0 and 100 (inclusive).""" 113 return ((float(self._num_pixels_differing) * 100) / 114 (self._width * self._height)) 115 116 def get_weighted_diff_measure(self): 117 """Returns a weighted measure of image diffs, as a float between 0 and 100 118 (inclusive).""" 119 return self._weighted_diff_measure 120 121 def get_max_diff_per_channel(self): 122 """Returns the maximum difference between the expected and actual images 123 for each R/G/B channel, as a list.""" 124 return self._max_diff_per_channel 125 126 127 class ImageDiffDB(object): 128 """ Calculates differences between image pairs, maintaining a database of 129 them for download.""" 130 131 def __init__(self, storage_root): 132 """ 133 Args: 134 storage_root: string; root path within the DB will store all of its stuff 135 """ 136 self._storage_root = storage_root 137 138 # Dictionary of DiffRecords, keyed by (expected_image_locator, 139 # actual_image_locator) tuples. 140 self._diff_dict = {} 141 142 def add_image_pair(self, 143 expected_image_url, expected_image_locator, 144 actual_image_url, actual_image_locator): 145 """Download this pair of images (unless we already have them on local disk), 146 and prepare a DiffRecord for them. 147 148 TODO(epoger): Make this asynchronously download images, rather than blocking 149 until the images have been downloaded and processed. 150 When we do that, we should probably add a new method that will block 151 until all of the images have been downloaded and processed. Otherwise, 152 we won't know when it's safe to start calling get_diff_record(). 153 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a 154 thread-pool/worker queue at a higher level that just uses ImageDiffDB? 155 156 Args: 157 expected_image_url: file or HTTP url from which we will download the 158 expected image 159 expected_image_locator: a unique ID string under which we will store the 160 expected image within storage_root (probably including a checksum to 161 guarantee uniqueness) 162 actual_image_url: file or HTTP url from which we will download the 163 actual image 164 actual_image_locator: a unique ID string under which we will store the 165 actual image within storage_root (probably including a checksum to 166 guarantee uniqueness) 167 """ 168 key = (expected_image_locator, actual_image_locator) 169 if not key in self._diff_dict: 170 try: 171 new_diff_record = DiffRecord( 172 self._storage_root, 173 expected_image_url=expected_image_url, 174 expected_image_locator=expected_image_locator, 175 actual_image_url=actual_image_url, 176 actual_image_locator=actual_image_locator) 177 except: 178 logging.exception('got exception while creating new DiffRecord') 179 return 180 self._diff_dict[key] = new_diff_record 181 182 def get_diff_record(self, expected_image_locator, actual_image_locator): 183 """Returns the DiffRecord for this image pair. 184 185 Raises a KeyError if we don't have a DiffRecord for this image pair. 186 """ 187 key = (expected_image_locator, actual_image_locator) 188 return self._diff_dict[key] 189 190 191 # Utility functions 192 193 def _calculate_weighted_diff_metric(histogram, num_pixels): 194 """Given the histogram of a diff image (per-channel diff at each 195 pixel between two images), calculate the weighted diff metric (a 196 stab at how different the two images really are). 197 198 Args: 199 histogram: PIL histogram of a per-channel diff between two images 200 num_pixels: integer; the total number of pixels in the diff image 201 202 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive). 203 """ 204 # TODO(epoger): As a wild guess at an appropriate metric, weight each 205 # different pixel by the square of its delta value. (The more different 206 # a pixel is from its expectation, the more we care about it.) 207 # In the long term, we will probably use some metric generated by 208 # skpdiff anyway. 209 assert(len(histogram) % VALUES_PER_BAND == 0) 210 num_bands = len(histogram) / VALUES_PER_BAND 211 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2 212 total_diff = 0 213 for index in xrange(len(histogram)): 214 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2 215 return float(100 * total_diff) / max_diff 216 217 def _max_per_band(histogram): 218 """Given the histogram of an image, return the maximum value of each band 219 (a.k.a. "color channel", such as R/G/B) across the entire image. 220 221 Args: 222 histogram: PIL histogram 223 224 Returns the maximum value of each band within the image histogram, as a list. 225 """ 226 max_per_band = [] 227 assert(len(histogram) % VALUES_PER_BAND == 0) 228 num_bands = len(histogram) / VALUES_PER_BAND 229 for band in xrange(num_bands): 230 # Assuming that VALUES_PER_BAND is 256... 231 # the 'R' band makes up indices 0-255 in the histogram, 232 # the 'G' band makes up indices 256-511 in the histogram, 233 # etc. 234 min_index = band * VALUES_PER_BAND 235 index = min_index + VALUES_PER_BAND 236 while index > min_index: 237 index -= 1 238 if histogram[index] > 0: 239 max_per_band.append(index - min_index) 240 break 241 return max_per_band 242 243 def _generate_image_diff(image1, image2): 244 """Wrapper for ImageChops.difference(image1, image2) that will handle some 245 errors automatically, or at least yield more useful error messages. 246 247 TODO(epoger): Currently, some of the images generated by the bots are RGBA 248 and others are RGB. I'm not sure why that is. For now, to avoid confusion 249 within the UI, convert all to RGB when diffing. 250 251 Args: 252 image1: a PIL image object 253 image2: a PIL image object 254 255 Returns: per-pixel diffs between image1 and image2, as a PIL image object 256 """ 257 try: 258 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) 259 except ValueError: 260 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( 261 repr(image1), repr(image2))) 262 raise 263 264 def _download_and_open_image(local_filepath, url): 265 """Open the image at local_filepath; if there is no file at that path, 266 download it from url to that path and then open it. 267 268 Args: 269 local_filepath: path on local disk where the image should be stored 270 url: URL from which we can download the image if we don't have it yet 271 272 Returns: a PIL image object 273 """ 274 if not os.path.exists(local_filepath): 275 _mkdir_unless_exists(os.path.dirname(local_filepath)) 276 with contextlib.closing(urllib.urlopen(url)) as url_handle: 277 with open(local_filepath, 'wb') as file_handle: 278 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) 279 return _open_image(local_filepath) 280 281 def _open_image(filepath): 282 """Wrapper for Image.open(filepath) that yields more useful error messages. 283 284 Args: 285 filepath: path on local disk to load image from 286 287 Returns: a PIL image object 288 """ 289 try: 290 return Image.open(filepath) 291 except IOError: 292 logging.error('IOError loading image file %s' % filepath) 293 raise 294 295 def _save_image(image, filepath, format='PNG'): 296 """Write an image to disk, creating any intermediate directories as needed. 297 298 Args: 299 image: a PIL image object 300 filepath: path on local disk to write image to 301 format: one of the PIL image formats, listed at 302 http://effbot.org/imagingbook/formats.htm 303 """ 304 _mkdir_unless_exists(os.path.dirname(filepath)) 305 image.save(filepath, format) 306 307 def _mkdir_unless_exists(path): 308 """Unless path refers to an already-existing directory, create it. 309 310 Args: 311 path: path on local disk 312 """ 313 if not os.path.isdir(path): 314 os.makedirs(path) 315 316 def _get_difference_locator(expected_image_locator, actual_image_locator): 317 """Returns the locator string used to look up the diffs between expected_image 318 and actual_image. 319 320 Args: 321 expected_image_locator: locator string pointing at expected image 322 actual_image_locator: locator string pointing at actual image 323 324 Returns: locator where the diffs between expected and actual images can be 325 found 326 """ 327 return "%s-vs-%s" % (expected_image_locator, actual_image_locator) 328