1 # Copyright (C) 2011, Google Inc. All rights reserved. 2 # 3 # Redistribution and use in source and binary forms, with or without 4 # modification, are permitted provided that the following conditions are 5 # met: 6 # 7 # * Redistributions of source code must retain the above copyright 8 # notice, this list of conditions and the following disclaimer. 9 # * Redistributions in binary form must reproduce the above 10 # copyright notice, this list of conditions and the following disclaimer 11 # in the documentation and/or other materials provided with the 12 # distribution. 13 # * Neither the name of Google Inc. nor the names of its 14 # contributors may be used to endorse or promote products derived from 15 # this software without specific prior written permission. 16 # 17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 import copy 30 import logging 31 32 from webkitpy.common.memoized import memoized 33 34 _log = logging.getLogger(__name__) 35 36 37 # FIXME: Should this function be somewhere more general? 38 def _invert_dictionary(dictionary): 39 inverted_dictionary = {} 40 for key, value in dictionary.items(): 41 if inverted_dictionary.get(value): 42 inverted_dictionary[value].append(key) 43 else: 44 inverted_dictionary[value] = [key] 45 return inverted_dictionary 46 47 48 class BaselineOptimizer(object): 49 ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests' 50 51 def __init__(self, host, port_names): 52 self._filesystem = host.filesystem 53 self._port_factory = host.port_factory 54 self._scm = host.scm() 55 self._port_names = port_names 56 # Only used by unittests. 57 self.new_results_by_directory = [] 58 59 def _baseline_root(self, port, baseline_name): 60 virtual_suite = port.lookup_virtual_suite(baseline_name) 61 if virtual_suite: 62 return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name) 63 return self.ROOT_LAYOUT_TESTS_DIRECTORY 64 65 def _baseline_search_path(self, port, baseline_name): 66 virtual_suite = port.lookup_virtual_suite(baseline_name) 67 if virtual_suite: 68 return port.virtual_baseline_search_path(baseline_name) 69 return port.baseline_search_path() 70 71 @memoized 72 def _relative_baseline_search_paths(self, port_name, baseline_name): 73 port = self._port_factory.get(port_name) 74 relative_paths = [self._filesystem.relpath(path, port.webkit_base()) for path in self._baseline_search_path(port, baseline_name)] 75 return relative_paths + [self._baseline_root(port, baseline_name)] 76 77 def _join_directory(self, directory, baseline_name): 78 # This code is complicated because both the directory name and the baseline_name have the virtual 79 # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name. 80 # For example, virtual/softwarecompositing/foo-expected.png corresponds to compostiting/foo-expected.png and 81 # the baseline directories are like platform/mac/virtual/softwarecompositing. So, to get the path 82 # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory. 83 virtual_suite = self._port_factory.get().lookup_virtual_suite(baseline_name) 84 if virtual_suite: 85 baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:] 86 else: 87 baseline_name_without_virtual = baseline_name 88 return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual) 89 90 def read_results_by_directory(self, baseline_name): 91 results_by_directory = {} 92 directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port_name, baseline_name) for port_name in self._port_names])) 93 94 for directory in directories: 95 path = self._join_directory(directory, baseline_name) 96 if self._filesystem.exists(path): 97 results_by_directory[directory] = self._filesystem.sha1(path) 98 return results_by_directory 99 100 def _results_by_port_name(self, results_by_directory, baseline_name): 101 results_by_port_name = {} 102 for port_name in self._port_names: 103 for directory in self._relative_baseline_search_paths(port_name, baseline_name): 104 if directory in results_by_directory: 105 results_by_port_name[port_name] = results_by_directory[directory] 106 break 107 return results_by_port_name 108 109 @memoized 110 def _directories_immediately_preceding_root(self, baseline_name): 111 directories = set() 112 for port_name in self._port_names: 113 port = self._port_factory.get(port_name) 114 directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], port.webkit_base()) 115 directories.add(directory) 116 return directories 117 118 def _optimize_result_for_root(self, new_results_by_directory, baseline_name): 119 # The root directory (i.e. LayoutTests) is the only one that doesn't correspond 120 # to a specific platform. As such, it's the only one where the baseline in fallback directories 121 # immediately before it can be promoted up, i.e. if win and mac 122 # have the same baseline, then it can be promoted up to be the LayoutTests baseline. 123 # All other baselines can only be removed if they're redundant with a baseline earlier 124 # in the fallback order. They can never promoted up. 125 directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name) 126 127 shared_result = None 128 root_baseline_unused = False 129 for directory in directories_immediately_preceding_root: 130 this_result = new_results_by_directory.get(directory) 131 132 # If any of these directories don't have a baseline, there's no optimization we can do. 133 if not this_result: 134 return 135 136 if not shared_result: 137 shared_result = this_result 138 elif shared_result != this_result: 139 root_baseline_unused = True 140 141 baseline_root = self._baseline_root(self._port_factory.get(), baseline_name) 142 143 # The root baseline is unused if all the directories immediately preceding the root 144 # have a baseline, but have different baselines, so the baselines can't be promoted up. 145 if root_baseline_unused: 146 if baseline_root in new_results_by_directory: 147 del new_results_by_directory[baseline_root] 148 return 149 150 new_results_by_directory[baseline_root] = shared_result 151 for directory in directories_immediately_preceding_root: 152 del new_results_by_directory[directory] 153 154 def _find_optimal_result_placement(self, baseline_name): 155 results_by_directory = self.read_results_by_directory(baseline_name) 156 results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name) 157 port_names_by_result = _invert_dictionary(results_by_port_name) 158 159 new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name) 160 self._optimize_result_for_root(new_results_by_directory, baseline_name) 161 162 return results_by_directory, new_results_by_directory 163 164 def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name): 165 new_results_by_directory = copy.copy(results_by_directory) 166 for port_name in self._port_names: 167 current_result = results_by_port_name.get(port_name) 168 169 # This happens if we're missing baselines for a port. 170 if not current_result: 171 continue; 172 173 fallback_path = self._relative_baseline_search_paths(port_name, baseline_name) 174 current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory) 175 for index in range(current_index + 1, len(fallback_path)): 176 new_directory = fallback_path[index] 177 if not new_directory in new_results_by_directory: 178 # No result for this baseline in this directory. 179 continue 180 elif new_results_by_directory[new_directory] == current_result: 181 # Result for new_directory are redundant with the result earlier in the fallback order. 182 if current_directory in new_results_by_directory: 183 del new_results_by_directory[current_directory] 184 else: 185 # The new_directory contains a different result, so stop trying to push results up. 186 break 187 188 return new_results_by_directory 189 190 def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory): 191 for index, directory in enumerate(fallback_path): 192 if directory in results_by_directory and (results_by_directory[directory] == current_result): 193 return index, directory 194 assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory) 195 196 def _platform(self, filename): 197 platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep 198 if filename.startswith(platform_dir): 199 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0] 200 platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir) 201 if filename.startswith(platform_dir): 202 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0] 203 return '(generic)' 204 205 def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory): 206 data_for_result = {} 207 for directory, result in results_by_directory.items(): 208 if not result in data_for_result: 209 source = self._join_directory(directory, baseline_name) 210 data_for_result[result] = self._filesystem.read_binary_file(source) 211 212 file_names = [] 213 for directory, result in results_by_directory.items(): 214 if new_results_by_directory.get(directory) != result: 215 file_names.append(self._join_directory(directory, baseline_name)) 216 if file_names: 217 _log.debug(" Deleting:") 218 for platform_dir in sorted(self._platform(filename) for filename in file_names): 219 _log.debug(" " + platform_dir) 220 self._scm.delete_list(file_names) 221 else: 222 _log.debug(" (Nothing to delete)") 223 224 file_names = [] 225 for directory, result in new_results_by_directory.items(): 226 if results_by_directory.get(directory) != result: 227 destination = self._join_directory(directory, baseline_name) 228 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0]) 229 self._filesystem.write_binary_file(destination, data_for_result[result]) 230 file_names.append(destination) 231 232 if file_names: 233 _log.debug(" Adding:") 234 for platform_dir in sorted(self._platform(filename) for filename in file_names): 235 _log.debug(" " + platform_dir) 236 self._scm.add_list(file_names) 237 else: 238 _log.debug(" (Nothing to add)") 239 240 def write_by_directory(self, results_by_directory, writer, indent): 241 for path in sorted(results_by_directory): 242 writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6])) 243 244 def _optimize_subtree(self, baseline_name): 245 basename = self._filesystem.basename(baseline_name) 246 results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name) 247 248 if new_results_by_directory == results_by_directory: 249 if new_results_by_directory: 250 _log.debug(" %s: (already optimal)" % basename) 251 self.write_by_directory(results_by_directory, _log.debug, " ") 252 else: 253 _log.debug(" %s: (no baselines found)" % basename) 254 # This is just used for unittests. Intentionally set it to the old data if we don't modify anything. 255 self.new_results_by_directory.append(results_by_directory) 256 return True 257 258 if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name): 259 # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs 260 # instead of committing incorrect baselines. 261 _log.error(" %s: optimization failed" % basename) 262 self.write_by_directory(results_by_directory, _log.warning, " ") 263 return False 264 265 _log.debug(" %s:" % basename) 266 _log.debug(" Before: ") 267 self.write_by_directory(results_by_directory, _log.debug, " ") 268 _log.debug(" After: ") 269 self.write_by_directory(new_results_by_directory, _log.debug, " ") 270 271 self._move_baselines(baseline_name, results_by_directory, new_results_by_directory) 272 return True 273 274 def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name): 275 default_port = self._port_factory.get() 276 virtual_root_expected_baseline_path = self._filesystem.join(default_port.layout_tests_dir(), baseline_name) 277 if not self._filesystem.exists(virtual_root_expected_baseline_path): 278 return 279 root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path) 280 281 results_by_directory = self.read_results_by_directory(non_virtual_baseline_name) 282 # See if all the immediate predecessors of the virtual root have the same expected result. 283 for port_name in self._port_names: 284 directories = self._relative_baseline_search_paths(port_name, non_virtual_baseline_name) 285 for directory in directories: 286 if directory not in results_by_directory: 287 continue 288 if results_by_directory[directory] != root_sha1: 289 return 290 break 291 292 _log.debug("Deleting redundant virtual root expected result.") 293 self._scm.delete(virtual_root_expected_baseline_path) 294 295 def optimize(self, baseline_name): 296 # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path. 297 # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for 298 # a visual representation of this. 299 # 300 # So, we can optimize the virtual path, then the virtual root and then the regular path. 301 302 _log.debug("Optimizing regular fallback path.") 303 result = self._optimize_subtree(baseline_name) 304 non_virtual_baseline_name = self._port_factory.get().lookup_virtual_test_base(baseline_name) 305 if not non_virtual_baseline_name: 306 return result 307 308 self._optimize_virtual_root(baseline_name, non_virtual_baseline_name) 309 310 _log.debug("Optimizing non-virtual fallback path.") 311 result |= self._optimize_subtree(non_virtual_baseline_name) 312 return result 313