1 #!/usr/bin/python 2 3 """ 4 Copyright 2013 Google Inc. 5 6 Use of this source code is governed by a BSD-style license that can be 7 found in the LICENSE file. 8 9 Calulate differences between image pairs, and store them in a database. 10 """ 11 12 import contextlib 13 import csv 14 import logging 15 import os 16 import re 17 import shutil 18 import sys 19 import tempfile 20 import urllib 21 try: 22 from PIL import Image, ImageChops 23 except ImportError: 24 raise ImportError('Requires PIL to be installed; see ' 25 + 'http://www.pythonware.com/products/pil/') 26 27 # Set the PYTHONPATH to include the tools directory. 28 sys.path.append( 29 os.path.join( 30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, 31 'tools')) 32 import find_run_binary 33 34 SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff') 35 36 DEFAULT_IMAGE_SUFFIX = '.png' 37 DEFAULT_IMAGES_SUBDIR = 'images' 38 39 DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]') 40 41 DIFFS_SUBDIR = 'diffs' 42 WHITEDIFFS_SUBDIR = 'whitediffs' 43 44 VALUES_PER_BAND = 256 45 46 # Keys used within DiffRecord dictionary representations. 47 # NOTE: Keep these in sync with static/constants.js 48 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel' 49 KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels' 50 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels' 51 KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference' 52 53 54 class DiffRecord(object): 55 """ Record of differences between two images. """ 56 57 def __init__(self, storage_root, 58 expected_image_url, expected_image_locator, 59 actual_image_url, actual_image_locator, 60 expected_images_subdir=DEFAULT_IMAGES_SUBDIR, 61 actual_images_subdir=DEFAULT_IMAGES_SUBDIR, 62 image_suffix=DEFAULT_IMAGE_SUFFIX): 63 """Download this pair of images (unless we already have them on local disk), 64 and prepare a DiffRecord for them. 65 66 TODO(epoger): Make this asynchronously download images, rather than blocking 67 until the images have been downloaded and processed. 68 69 Args: 70 storage_root: root directory on local disk within which we store all 71 images 72 expected_image_url: file or HTTP url from which we will download the 73 expected image 74 expected_image_locator: a unique ID string under which we will store the 75 expected image within storage_root (probably including a checksum to 76 guarantee uniqueness) 77 actual_image_url: file or HTTP url from which we will download the 78 actual image 79 actual_image_locator: a unique ID string under which we will store the 80 actual image within storage_root (probably including a checksum to 81 guarantee uniqueness) 82 expected_images_subdir: the subdirectory expected images are stored in. 83 actual_images_subdir: the subdirectory actual images are stored in. 84 image_suffix: the suffix of images. 85 """ 86 expected_image_locator = _sanitize_locator(expected_image_locator) 87 actual_image_locator = _sanitize_locator(actual_image_locator) 88 89 # Download the expected/actual images, if we don't have them already. 90 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise 91 # an exception if images are not found locally (instead of trying to 92 # download them). 93 expected_image_file = os.path.join( 94 storage_root, expected_images_subdir, 95 str(expected_image_locator) + image_suffix) 96 actual_image_file = os.path.join( 97 storage_root, actual_images_subdir, 98 str(actual_image_locator) + image_suffix) 99 try: 100 expected_image = _download_and_open_image( 101 expected_image_file, expected_image_url) 102 except Exception: 103 logging.exception('unable to download expected_image_url %s to file %s' % 104 (expected_image_url, expected_image_file)) 105 raise 106 try: 107 actual_image = _download_and_open_image( 108 actual_image_file, actual_image_url) 109 except Exception: 110 logging.exception('unable to download actual_image_url %s to file %s' % 111 (actual_image_url, actual_image_file)) 112 raise 113 114 # Generate the diff image (absolute diff at each pixel) and 115 # max_diff_per_channel. 116 diff_image = _generate_image_diff(actual_image, expected_image) 117 diff_histogram = diff_image.histogram() 118 (diff_width, diff_height) = diff_image.size 119 self._max_diff_per_channel = _max_per_band(diff_histogram) 120 121 # Generate the whitediff image (any differing pixels show as white). 122 # This is tricky, because when you convert color images to grayscale or 123 # black & white in PIL, it has its own ideas about thresholds. 124 # We have to force it: if a pixel has any color at all, it's a '1'. 125 bands = diff_image.split() 126 graydiff_image = ImageChops.lighter(ImageChops.lighter( 127 bands[0], bands[1]), bands[2]) 128 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND) 129 .convert('1', dither=Image.NONE)) 130 131 # Calculate the perceptual difference percentage. 132 skpdiff_csv_dir = tempfile.mkdtemp() 133 try: 134 skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv') 135 expected_img = os.path.join(storage_root, expected_images_subdir, 136 str(expected_image_locator) + image_suffix) 137 actual_img = os.path.join(storage_root, actual_images_subdir, 138 str(actual_image_locator) + image_suffix) 139 find_run_binary.run_command( 140 [SKPDIFF_BINARY, '-p', expected_img, actual_img, 141 '--csv', skpdiff_csv_output, '-d', 'perceptual']) 142 with contextlib.closing(open(skpdiff_csv_output)) as csv_file: 143 for row in csv.DictReader(csv_file): 144 perceptual_similarity = float(row[' perceptual'].strip()) 145 if not 0 <= perceptual_similarity <= 1: 146 # skpdiff outputs -1 if the images are different sizes. Treat any 147 # output that does not lie in [0, 1] as having 0% perceptual 148 # similarity. 149 perceptual_similarity = 0 150 # skpdiff returns the perceptual similarity, convert it to get the 151 # perceptual difference percentage. 152 self._perceptual_difference = 100 - (perceptual_similarity * 100) 153 finally: 154 shutil.rmtree(skpdiff_csv_dir) 155 156 # Final touches on diff_image: use whitediff_image as an alpha mask. 157 # Unchanged pixels are transparent; differing pixels are opaque. 158 diff_image.putalpha(whitediff_image) 159 160 # Store the diff and whitediff images generated above. 161 diff_image_locator = _get_difference_locator( 162 expected_image_locator=expected_image_locator, 163 actual_image_locator=actual_image_locator) 164 basename = str(diff_image_locator) + image_suffix 165 _save_image(diff_image, os.path.join( 166 storage_root, DIFFS_SUBDIR, basename)) 167 _save_image(whitediff_image, os.path.join( 168 storage_root, WHITEDIFFS_SUBDIR, basename)) 169 170 # Calculate difference metrics. 171 (self._width, self._height) = diff_image.size 172 self._num_pixels_differing = ( 173 whitediff_image.histogram()[VALUES_PER_BAND - 1]) 174 175 def get_num_pixels_differing(self): 176 """Returns the absolute number of pixels that differ.""" 177 return self._num_pixels_differing 178 179 def get_percent_pixels_differing(self): 180 """Returns the percentage of pixels that differ, as a float between 181 0 and 100 (inclusive).""" 182 return ((float(self._num_pixels_differing) * 100) / 183 (self._width * self._height)) 184 185 def get_perceptual_difference(self): 186 """Returns the perceptual difference percentage.""" 187 return self._perceptual_difference 188 189 def get_max_diff_per_channel(self): 190 """Returns the maximum difference between the expected and actual images 191 for each R/G/B channel, as a list.""" 192 return self._max_diff_per_channel 193 194 def as_dict(self): 195 """Returns a dictionary representation of this DiffRecord, as needed when 196 constructing the JSON representation.""" 197 return { 198 KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing, 199 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS: 200 self.get_percent_pixels_differing(), 201 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel, 202 KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference, 203 } 204 205 206 class ImageDiffDB(object): 207 """ Calculates differences between image pairs, maintaining a database of 208 them for download.""" 209 210 def __init__(self, storage_root): 211 """ 212 Args: 213 storage_root: string; root path within the DB will store all of its stuff 214 """ 215 self._storage_root = storage_root 216 217 # Dictionary of DiffRecords, keyed by (expected_image_locator, 218 # actual_image_locator) tuples. 219 self._diff_dict = {} 220 221 def add_image_pair(self, 222 expected_image_url, expected_image_locator, 223 actual_image_url, actual_image_locator): 224 """Download this pair of images (unless we already have them on local disk), 225 and prepare a DiffRecord for them. 226 227 TODO(epoger): Make this asynchronously download images, rather than blocking 228 until the images have been downloaded and processed. 229 When we do that, we should probably add a new method that will block 230 until all of the images have been downloaded and processed. Otherwise, 231 we won't know when it's safe to start calling get_diff_record(). 232 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a 233 thread-pool/worker queue at a higher level that just uses ImageDiffDB? 234 235 Args: 236 expected_image_url: file or HTTP url from which we will download the 237 expected image 238 expected_image_locator: a unique ID string under which we will store the 239 expected image within storage_root (probably including a checksum to 240 guarantee uniqueness) 241 actual_image_url: file or HTTP url from which we will download the 242 actual image 243 actual_image_locator: a unique ID string under which we will store the 244 actual image within storage_root (probably including a checksum to 245 guarantee uniqueness) 246 """ 247 expected_image_locator = _sanitize_locator(expected_image_locator) 248 actual_image_locator = _sanitize_locator(actual_image_locator) 249 key = (expected_image_locator, actual_image_locator) 250 if not key in self._diff_dict: 251 try: 252 new_diff_record = DiffRecord( 253 self._storage_root, 254 expected_image_url=expected_image_url, 255 expected_image_locator=expected_image_locator, 256 actual_image_url=actual_image_url, 257 actual_image_locator=actual_image_locator) 258 except Exception: 259 # If we can't create a real DiffRecord for this (expected, actual) pair, 260 # store None and the UI will show whatever information we DO have. 261 # Fixes http://skbug.com/2368 . 262 logging.exception( 263 'got exception while creating a DiffRecord for ' 264 'expected_image_url=%s , actual_image_url=%s; returning None' % ( 265 expected_image_url, actual_image_url)) 266 new_diff_record = None 267 self._diff_dict[key] = new_diff_record 268 269 def get_diff_record(self, expected_image_locator, actual_image_locator): 270 """Returns the DiffRecord for this image pair. 271 272 Raises a KeyError if we don't have a DiffRecord for this image pair. 273 """ 274 key = (_sanitize_locator(expected_image_locator), 275 _sanitize_locator(actual_image_locator)) 276 return self._diff_dict[key] 277 278 279 # Utility functions 280 281 def _max_per_band(histogram): 282 """Given the histogram of an image, return the maximum value of each band 283 (a.k.a. "color channel", such as R/G/B) across the entire image. 284 285 Args: 286 histogram: PIL histogram 287 288 Returns the maximum value of each band within the image histogram, as a list. 289 """ 290 max_per_band = [] 291 assert(len(histogram) % VALUES_PER_BAND == 0) 292 num_bands = len(histogram) / VALUES_PER_BAND 293 for band in xrange(num_bands): 294 # Assuming that VALUES_PER_BAND is 256... 295 # the 'R' band makes up indices 0-255 in the histogram, 296 # the 'G' band makes up indices 256-511 in the histogram, 297 # etc. 298 min_index = band * VALUES_PER_BAND 299 index = min_index + VALUES_PER_BAND 300 while index > min_index: 301 index -= 1 302 if histogram[index] > 0: 303 max_per_band.append(index - min_index) 304 break 305 return max_per_band 306 307 308 def _generate_image_diff(image1, image2): 309 """Wrapper for ImageChops.difference(image1, image2) that will handle some 310 errors automatically, or at least yield more useful error messages. 311 312 TODO(epoger): Currently, some of the images generated by the bots are RGBA 313 and others are RGB. I'm not sure why that is. For now, to avoid confusion 314 within the UI, convert all to RGB when diffing. 315 316 Args: 317 image1: a PIL image object 318 image2: a PIL image object 319 320 Returns: per-pixel diffs between image1 and image2, as a PIL image object 321 """ 322 try: 323 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB')) 324 except ValueError: 325 logging.error('Error diffing image1 [%s] and image2 [%s].' % ( 326 repr(image1), repr(image2))) 327 raise 328 329 330 def _download_and_open_image(local_filepath, url): 331 """Open the image at local_filepath; if there is no file at that path, 332 download it from url to that path and then open it. 333 334 Args: 335 local_filepath: path on local disk where the image should be stored 336 url: URL from which we can download the image if we don't have it yet 337 338 Returns: a PIL image object 339 """ 340 if not os.path.exists(local_filepath): 341 _mkdir_unless_exists(os.path.dirname(local_filepath)) 342 with contextlib.closing(urllib.urlopen(url)) as url_handle: 343 with open(local_filepath, 'wb') as file_handle: 344 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle) 345 return _open_image(local_filepath) 346 347 348 def _open_image(filepath): 349 """Wrapper for Image.open(filepath) that yields more useful error messages. 350 351 Args: 352 filepath: path on local disk to load image from 353 354 Returns: a PIL image object 355 """ 356 try: 357 return Image.open(filepath) 358 except IOError: 359 # If we are unable to load an image from the file, delete it from disk 360 # and we will try to fetch it again next time. Fixes http://skbug.com/2247 361 logging.error('IOError loading image file %s ; deleting it.' % filepath) 362 os.remove(filepath) 363 raise 364 365 366 def _save_image(image, filepath, format='PNG'): 367 """Write an image to disk, creating any intermediate directories as needed. 368 369 Args: 370 image: a PIL image object 371 filepath: path on local disk to write image to 372 format: one of the PIL image formats, listed at 373 http://effbot.org/imagingbook/formats.htm 374 """ 375 _mkdir_unless_exists(os.path.dirname(filepath)) 376 image.save(filepath, format) 377 378 379 def _mkdir_unless_exists(path): 380 """Unless path refers to an already-existing directory, create it. 381 382 Args: 383 path: path on local disk 384 """ 385 if not os.path.isdir(path): 386 os.makedirs(path) 387 388 389 def _sanitize_locator(locator): 390 """Returns a sanitized version of a locator (one in which we know none of the 391 characters will have special meaning in filenames). 392 393 Args: 394 locator: string, or something that can be represented as a string 395 """ 396 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator)) 397 398 399 def _get_difference_locator(expected_image_locator, actual_image_locator): 400 """Returns the locator string used to look up the diffs between expected_image 401 and actual_image. 402 403 We must keep this function in sync with getImageDiffRelativeUrl() in 404 static/loader.js 405 406 Args: 407 expected_image_locator: locator string pointing at expected image 408 actual_image_locator: locator string pointing at actual image 409 410 Returns: already-sanitized locator where the diffs between expected and 411 actual images can be found 412 """ 413 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator), 414 _sanitize_locator(actual_image_locator)) 415