Home | History | Annotate | Download | only in checkout
      1 # Copyright (C) 2011, Google Inc. All rights reserved.
      2 #
      3 # Redistribution and use in source and binary forms, with or without
      4 # modification, are permitted provided that the following conditions are
      5 # met:
      6 #
      7 #     * Redistributions of source code must retain the above copyright
      8 # notice, this list of conditions and the following disclaimer.
      9 #     * Redistributions in binary form must reproduce the above
     10 # copyright notice, this list of conditions and the following disclaimer
     11 # in the documentation and/or other materials provided with the
     12 # distribution.
     13 #     * Neither the name of Google Inc. nor the names of its
     14 # contributors may be used to endorse or promote products derived from
     15 # this software without specific prior written permission.
     16 #
     17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 import copy
     30 import logging
     31 
     32 from webkitpy.common.memoized import memoized
     33 
     34 _log = logging.getLogger(__name__)
     35 
     36 
     37 # FIXME: Should this function be somewhere more general?
     38 def _invert_dictionary(dictionary):
     39     inverted_dictionary = {}
     40     for key, value in dictionary.items():
     41         if inverted_dictionary.get(value):
     42             inverted_dictionary[value].append(key)
     43         else:
     44             inverted_dictionary[value] = [key]
     45     return inverted_dictionary
     46 
     47 
     48 class BaselineOptimizer(object):
     49     ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
     50 
     51     def __init__(self, host, port_names):
     52         self._filesystem = host.filesystem
     53         self._port_factory = host.port_factory
     54         self._scm = host.scm()
     55         self._port_names = port_names
     56         # Only used by unittests.
     57         self.new_results_by_directory = []
     58 
     59     def _baseline_root(self, port, baseline_name):
     60         virtual_suite = port.lookup_virtual_suite(baseline_name)
     61         if virtual_suite:
     62             return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name)
     63         return self.ROOT_LAYOUT_TESTS_DIRECTORY
     64 
     65     def _baseline_search_path(self, port, baseline_name):
     66         virtual_suite = port.lookup_virtual_suite(baseline_name)
     67         if virtual_suite:
     68             return port.virtual_baseline_search_path(baseline_name)
     69         return port.baseline_search_path()
     70 
     71     @memoized
     72     def _relative_baseline_search_paths(self, port_name, baseline_name):
     73         port = self._port_factory.get(port_name)
     74         relative_paths = [self._filesystem.relpath(path, port.webkit_base()) for path in self._baseline_search_path(port, baseline_name)]
     75         return relative_paths + [self._baseline_root(port, baseline_name)]
     76 
     77     def _join_directory(self, directory, baseline_name):
     78         # This code is complicated because both the directory name and the baseline_name have the virtual
     79         # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name.
     80         # For example, virtual/softwarecompositing/foo-expected.png corresponds to compostiting/foo-expected.png and
     81         # the baseline directories are like platform/mac/virtual/softwarecompositing. So, to get the path
     82         # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory.
     83         virtual_suite = self._port_factory.get().lookup_virtual_suite(baseline_name)
     84         if virtual_suite:
     85             baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:]
     86         else:
     87             baseline_name_without_virtual = baseline_name
     88         return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual)
     89 
     90     def read_results_by_directory(self, baseline_name):
     91         results_by_directory = {}
     92         directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port_name, baseline_name) for port_name in self._port_names]))
     93 
     94         for directory in directories:
     95             path = self._join_directory(directory, baseline_name)
     96             if self._filesystem.exists(path):
     97                 results_by_directory[directory] = self._filesystem.sha1(path)
     98         return results_by_directory
     99 
    100     def _results_by_port_name(self, results_by_directory, baseline_name):
    101         results_by_port_name = {}
    102         for port_name in self._port_names:
    103             for directory in self._relative_baseline_search_paths(port_name, baseline_name):
    104                 if directory in results_by_directory:
    105                     results_by_port_name[port_name] = results_by_directory[directory]
    106                     break
    107         return results_by_port_name
    108 
    109     @memoized
    110     def _directories_immediately_preceding_root(self, baseline_name):
    111         directories = set()
    112         for port_name in self._port_names:
    113             port = self._port_factory.get(port_name)
    114             directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], port.webkit_base())
    115             directories.add(directory)
    116         return directories
    117 
    118     def _optimize_result_for_root(self, new_results_by_directory, baseline_name):
    119         # The root directory (i.e. LayoutTests) is the only one that doesn't correspond
    120         # to a specific platform. As such, it's the only one where the baseline in fallback directories
    121         # immediately before it can be promoted up, i.e. if win and mac
    122         # have the same baseline, then it can be promoted up to be the LayoutTests baseline.
    123         # All other baselines can only be removed if they're redundant with a baseline earlier
    124         # in the fallback order. They can never promoted up.
    125         directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name)
    126 
    127         shared_result = None
    128         root_baseline_unused = False
    129         for directory in directories_immediately_preceding_root:
    130             this_result = new_results_by_directory.get(directory)
    131 
    132             # If any of these directories don't have a baseline, there's no optimization we can do.
    133             if not this_result:
    134                 return
    135 
    136             if not shared_result:
    137                 shared_result = this_result
    138             elif shared_result != this_result:
    139                 root_baseline_unused = True
    140 
    141         baseline_root = self._baseline_root(self._port_factory.get(), baseline_name)
    142 
    143         # The root baseline is unused if all the directories immediately preceding the root
    144         # have a baseline, but have different baselines, so the baselines can't be promoted up.
    145         if root_baseline_unused:
    146             if baseline_root in new_results_by_directory:
    147                 del new_results_by_directory[baseline_root]
    148             return
    149 
    150         new_results_by_directory[baseline_root] = shared_result
    151         for directory in directories_immediately_preceding_root:
    152             del new_results_by_directory[directory]
    153 
    154     def _find_optimal_result_placement(self, baseline_name):
    155         results_by_directory = self.read_results_by_directory(baseline_name)
    156         results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name)
    157         port_names_by_result = _invert_dictionary(results_by_port_name)
    158 
    159         new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name)
    160         self._optimize_result_for_root(new_results_by_directory, baseline_name)
    161 
    162         return results_by_directory, new_results_by_directory
    163 
    164     def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name):
    165         new_results_by_directory = copy.copy(results_by_directory)
    166         for port_name in self._port_names:
    167             current_result = results_by_port_name.get(port_name)
    168 
    169             # This happens if we're missing baselines for a port.
    170             if not current_result:
    171                 continue;
    172 
    173             fallback_path = self._relative_baseline_search_paths(port_name, baseline_name)
    174             current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory)
    175             for index in range(current_index + 1, len(fallback_path)):
    176                 new_directory = fallback_path[index]
    177                 if not new_directory in new_results_by_directory:
    178                     # No result for this baseline in this directory.
    179                     continue
    180                 elif new_results_by_directory[new_directory] == current_result:
    181                     # Result for new_directory are redundant with the result earlier in the fallback order.
    182                     if current_directory in new_results_by_directory:
    183                         del new_results_by_directory[current_directory]
    184                 else:
    185                     # The new_directory contains a different result, so stop trying to push results up.
    186                     break
    187 
    188         return new_results_by_directory
    189 
    190     def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory):
    191         for index, directory in enumerate(fallback_path):
    192             if directory in results_by_directory and (results_by_directory[directory] == current_result):
    193                 return index, directory
    194         assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory)
    195 
    196     def _platform(self, filename):
    197         platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep
    198         if filename.startswith(platform_dir):
    199             return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
    200         platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir)
    201         if filename.startswith(platform_dir):
    202             return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
    203         return '(generic)'
    204 
    205     def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
    206         data_for_result = {}
    207         for directory, result in results_by_directory.items():
    208             if not result in data_for_result:
    209                 source = self._join_directory(directory, baseline_name)
    210                 data_for_result[result] = self._filesystem.read_binary_file(source)
    211 
    212         file_names = []
    213         for directory, result in results_by_directory.items():
    214             if new_results_by_directory.get(directory) != result:
    215                 file_names.append(self._join_directory(directory, baseline_name))
    216         if file_names:
    217             _log.debug("    Deleting:")
    218             for platform_dir in sorted(self._platform(filename) for filename in file_names):
    219                 _log.debug("      " + platform_dir)
    220             self._scm.delete_list(file_names)
    221         else:
    222             _log.debug("    (Nothing to delete)")
    223 
    224         file_names = []
    225         for directory, result in new_results_by_directory.items():
    226             if results_by_directory.get(directory) != result:
    227                 destination = self._join_directory(directory, baseline_name)
    228                 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
    229                 self._filesystem.write_binary_file(destination, data_for_result[result])
    230                 file_names.append(destination)
    231 
    232         if file_names:
    233             _log.debug("    Adding:")
    234             for platform_dir in sorted(self._platform(filename) for filename in file_names):
    235                 _log.debug("      " + platform_dir)
    236             self._scm.add_list(file_names)
    237         else:
    238             _log.debug("    (Nothing to add)")
    239 
    240     def write_by_directory(self, results_by_directory, writer, indent):
    241         for path in sorted(results_by_directory):
    242             writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6]))
    243 
    244     def _optimize_subtree(self, baseline_name):
    245         basename = self._filesystem.basename(baseline_name)
    246         results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
    247 
    248         if new_results_by_directory == results_by_directory:
    249             if new_results_by_directory:
    250                 _log.debug("  %s: (already optimal)" % basename)
    251                 self.write_by_directory(results_by_directory, _log.debug, "    ")
    252             else:
    253                 _log.debug("  %s: (no baselines found)" % basename)
    254             # This is just used for unittests. Intentionally set it to the old data if we don't modify anything.
    255             self.new_results_by_directory.append(results_by_directory)
    256             return True
    257 
    258         if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name):
    259             # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs
    260             # instead of committing incorrect baselines.
    261             _log.error("  %s: optimization failed" % basename)
    262             self.write_by_directory(results_by_directory, _log.warning, "      ")
    263             return False
    264 
    265         _log.debug("  %s:" % basename)
    266         _log.debug("    Before: ")
    267         self.write_by_directory(results_by_directory, _log.debug, "      ")
    268         _log.debug("    After: ")
    269         self.write_by_directory(new_results_by_directory, _log.debug, "      ")
    270 
    271         self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
    272         return True
    273 
    274     def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name):
    275         default_port = self._port_factory.get()
    276         virtual_root_expected_baseline_path = self._filesystem.join(default_port.layout_tests_dir(), baseline_name)
    277         if not self._filesystem.exists(virtual_root_expected_baseline_path):
    278             return
    279         root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path)
    280 
    281         results_by_directory = self.read_results_by_directory(non_virtual_baseline_name)
    282         # See if all the immediate predecessors of the virtual root have the same expected result.
    283         for port_name in self._port_names:
    284             directories = self._relative_baseline_search_paths(port_name, non_virtual_baseline_name)
    285             for directory in directories:
    286                 if directory not in results_by_directory:
    287                     continue
    288                 if results_by_directory[directory] != root_sha1:
    289                     return
    290                 break
    291 
    292         _log.debug("Deleting redundant virtual root expected result.")
    293         self._scm.delete(virtual_root_expected_baseline_path)
    294 
    295     def optimize(self, baseline_name):
    296         # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path.
    297         # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for
    298         # a visual representation of this.
    299         #
    300         # So, we can optimize the virtual path, then the virtual root and then the regular path.
    301 
    302         _log.debug("Optimizing regular fallback path.")
    303         result = self._optimize_subtree(baseline_name)
    304         non_virtual_baseline_name = self._port_factory.get().lookup_virtual_test_base(baseline_name)
    305         if not non_virtual_baseline_name:
    306             return result
    307 
    308         self._optimize_virtual_root(baseline_name, non_virtual_baseline_name)
    309 
    310         _log.debug("Optimizing non-virtual fallback path.")
    311         result |= self._optimize_subtree(non_virtual_baseline_name)
    312         return result
    313