Home | History | Annotate | Download | only in models
      1 # Copyright (C) 2010 Google Inc. All rights reserved.
      2 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor (at] inf.u-szeged.hu), University of Szeged
      3 #
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #     * Redistributions of source code must retain the above copyright
      9 # notice, this list of conditions and the following disclaimer.
     10 #     * Redistributions in binary form must reproduce the above
     11 # copyright notice, this list of conditions and the following disclaimer
     12 # in the documentation and/or other materials provided with the
     13 # distribution.
     14 #     * Neither the name of Google Inc. nor the names of its
     15 # contributors may be used to endorse or promote products derived from
     16 # this software without specific prior written permission.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 import logging
     31 import re
     32 import signal
     33 import time
     34 
     35 from webkitpy.layout_tests.models import test_expectations
     36 from webkitpy.layout_tests.models import test_failures
     37 
     38 
     39 _log = logging.getLogger(__name__)
     40 
     41 OK_EXIT_STATUS = 0
     42 
     43 # This matches what the shell does on POSIX.
     44 INTERRUPTED_EXIT_STATUS = signal.SIGINT + 128
     45 
     46 # POSIX limits status codes to 0-255. Normally run-webkit-tests returns the number
     47 # of tests that failed. These indicate exceptional conditions triggered by the
     48 # script itself, so we count backwards from 255 (aka -1) to enumerate them.
     49 #
     50 # FIXME: crbug.com/357866. We really shouldn't return the number of failures
     51 # in the exit code at all.
     52 EARLY_EXIT_STATUS = 251
     53 SYS_DEPS_EXIT_STATUS = 252
     54 NO_TESTS_EXIT_STATUS = 253
     55 NO_DEVICES_EXIT_STATUS = 254
     56 UNEXPECTED_ERROR_EXIT_STATUS = 255
     57 
     58 ERROR_CODES = (
     59     INTERRUPTED_EXIT_STATUS,
     60     EARLY_EXIT_STATUS,
     61     SYS_DEPS_EXIT_STATUS,
     62     NO_TESTS_EXIT_STATUS,
     63     NO_DEVICES_EXIT_STATUS,
     64     UNEXPECTED_ERROR_EXIT_STATUS,
     65 )
     66 
     67 # In order to avoid colliding with the above codes, we put a ceiling on
     68 # the value returned by num_regressions
     69 MAX_FAILURES_EXIT_STATUS = 101
     70 
     71 class TestRunException(Exception):
     72     def __init__(self, code, msg):
     73         self.code = code
     74         self.msg = msg
     75 
     76 
     77 class TestRunResults(object):
     78     def __init__(self, expectations, num_tests):
     79         self.total = num_tests
     80         self.remaining = self.total
     81         self.expectations = expectations
     82         self.expected = 0
     83         self.expected_failures = 0
     84         self.unexpected = 0
     85         self.unexpected_failures = 0
     86         self.unexpected_crashes = 0
     87         self.unexpected_timeouts = 0
     88         self.tests_by_expectation = {}
     89         self.tests_by_timeline = {}
     90         self.results_by_name = {}  # Map of test name to the last result for the test.
     91         self.all_results = []  # All results from a run, including every iteration of every test.
     92         self.unexpected_results_by_name = {}
     93         self.failures_by_name = {}
     94         self.total_failures = 0
     95         self.expected_skips = 0
     96         for expectation in test_expectations.TestExpectations.EXPECTATIONS.values():
     97             self.tests_by_expectation[expectation] = set()
     98         for timeline in test_expectations.TestExpectations.TIMELINES.values():
     99             self.tests_by_timeline[timeline] = expectations.get_tests_with_timeline(timeline)
    100         self.slow_tests = set()
    101         self.interrupted = False
    102         self.keyboard_interrupted = False
    103         self.run_time = 0  # The wall clock time spent running the tests (layout_test_runner.run()).
    104 
    105     def add(self, test_result, expected, test_is_slow):
    106         result_type_for_stats = test_result.type
    107         if test_expectations.WONTFIX in self.expectations.model().get_expectations(test_result.test_name):
    108             result_type_for_stats = test_expectations.WONTFIX
    109         self.tests_by_expectation[result_type_for_stats].add(test_result.test_name)
    110 
    111         self.results_by_name[test_result.test_name] = test_result
    112         if test_result.type != test_expectations.SKIP:
    113             self.all_results.append(test_result)
    114         self.remaining -= 1
    115         if len(test_result.failures):
    116             self.total_failures += 1
    117             self.failures_by_name[test_result.test_name] = test_result.failures
    118         if expected:
    119             self.expected += 1
    120             if test_result.type == test_expectations.SKIP:
    121                 self.expected_skips += 1
    122             elif test_result.type != test_expectations.PASS:
    123                 self.expected_failures += 1
    124         else:
    125             self.unexpected_results_by_name[test_result.test_name] = test_result
    126             self.unexpected += 1
    127             if len(test_result.failures):
    128                 self.unexpected_failures += 1
    129             if test_result.type == test_expectations.CRASH:
    130                 self.unexpected_crashes += 1
    131             elif test_result.type == test_expectations.TIMEOUT:
    132                 self.unexpected_timeouts += 1
    133         if test_is_slow:
    134             self.slow_tests.add(test_result.test_name)
    135 
    136 
    137 class RunDetails(object):
    138     def __init__(self, exit_code, summarized_full_results=None, summarized_failing_results=None, initial_results=None, retry_results=None, enabled_pixel_tests_in_retry=False):
    139         self.exit_code = exit_code
    140         self.summarized_full_results = summarized_full_results
    141         self.summarized_failing_results = summarized_failing_results
    142         self.initial_results = initial_results
    143         self.retry_results = retry_results
    144         self.enabled_pixel_tests_in_retry = enabled_pixel_tests_in_retry
    145 
    146 
    147 def _interpret_test_failures(failures):
    148     test_dict = {}
    149     failure_types = [type(failure) for failure in failures]
    150     # FIXME: get rid of all this is_* values once there is a 1:1 map between
    151     # TestFailure type and test_expectations.EXPECTATION.
    152     if test_failures.FailureMissingAudio in failure_types:
    153         test_dict['is_missing_audio'] = True
    154 
    155     if test_failures.FailureMissingResult in failure_types:
    156         test_dict['is_missing_text'] = True
    157 
    158     if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
    159         test_dict['is_missing_image'] = True
    160 
    161     if test_failures.FailureTestHarnessAssertion in failure_types:
    162         test_dict['is_testharness_test'] = True
    163 
    164     return test_dict
    165 
    166 
    167 def _chromium_commit_position(scm, path):
    168     log = scm.most_recent_log_matching('Cr-Commit-Position:', path)
    169     match = re.search('^\s*Cr-Commit-Position:.*@\{#(?P<commit_position>\d+)\}', log, re.MULTILINE)
    170     if not match:
    171         return ""
    172     return str(match.group('commit_position'))
    173 
    174 
    175 def summarize_results(port_obj, expectations, initial_results, retry_results, enabled_pixel_tests_in_retry, only_include_failing=False):
    176     """Returns a dictionary containing a summary of the test runs, with the following fields:
    177         'version': a version indicator
    178         'fixable': The number of fixable tests (NOW - PASS)
    179         'skipped': The number of skipped tests (NOW & SKIPPED)
    180         'num_regressions': The number of non-flaky failures
    181         'num_flaky': The number of flaky failures
    182         'num_passes': The number of unexpected passes
    183         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
    184     """
    185     results = {}
    186     results['version'] = 3
    187 
    188     tbe = initial_results.tests_by_expectation
    189     tbt = initial_results.tests_by_timeline
    190     results['fixable'] = len(tbt[test_expectations.NOW] - tbe[test_expectations.PASS])
    191     # FIXME: Remove this. It is redundant with results['num_failures_by_type'].
    192     results['skipped'] = len(tbt[test_expectations.NOW] & tbe[test_expectations.SKIP])
    193 
    194     num_passes = 0
    195     num_flaky = 0
    196     num_regressions = 0
    197     keywords = {}
    198     for expecation_string, expectation_enum in test_expectations.TestExpectations.EXPECTATIONS.iteritems():
    199         keywords[expectation_enum] = expecation_string.upper()
    200 
    201     num_failures_by_type = {}
    202     for expectation in initial_results.tests_by_expectation:
    203         tests = initial_results.tests_by_expectation[expectation]
    204         if expectation != test_expectations.WONTFIX:
    205             tests &= tbt[test_expectations.NOW]
    206         num_failures_by_type[keywords[expectation]] = len(tests)
    207     # The number of failures by type.
    208     results['num_failures_by_type'] = num_failures_by_type
    209 
    210     tests = {}
    211 
    212     for test_name, result in initial_results.results_by_name.iteritems():
    213         expected = expectations.get_expectations_string(test_name)
    214         result_type = result.type
    215         actual = [keywords[result_type]]
    216 
    217         if only_include_failing and result.type == test_expectations.SKIP:
    218             continue
    219 
    220         if result_type == test_expectations.PASS:
    221             num_passes += 1
    222             if not result.has_stderr and only_include_failing:
    223                 continue
    224         elif result_type != test_expectations.SKIP and test_name in initial_results.unexpected_results_by_name:
    225             if retry_results:
    226                 if test_name not in retry_results.unexpected_results_by_name:
    227                     # The test failed unexpectedly at first, but ran as expected the second time -> flaky.
    228                     actual.extend(expectations.get_expectations_string(test_name).split(" "))
    229                     num_flaky += 1
    230                 else:
    231                     retry_result_type = retry_results.unexpected_results_by_name[test_name].type
    232                     if retry_result_type == test_expectations.PASS:
    233                         #  The test failed unexpectedly at first, then passed unexpectedly -> unexpected pass.
    234                         num_passes += 1
    235                         if not result.has_stderr and only_include_failing:
    236                             continue
    237                     else:
    238                         # The test failed unexpectedly both times -> regression.
    239                         num_regressions += 1
    240                         if not keywords[retry_result_type] in actual:
    241                             actual.append(keywords[retry_result_type])
    242             else:
    243                 # The test failed unexpectedly, but we didn't do any retries -> regression.
    244                 num_regressions += 1
    245 
    246         test_dict = {}
    247 
    248         rounded_run_time = round(result.test_run_time, 1)
    249         if rounded_run_time:
    250             test_dict['time'] = rounded_run_time
    251 
    252         if result.has_stderr:
    253             test_dict['has_stderr'] = True
    254 
    255         bugs = expectations.model().get_expectation_line(test_name).bugs
    256         if bugs:
    257             test_dict['bugs'] = bugs
    258 
    259         if result.reftest_type:
    260             test_dict.update(reftest_type=list(result.reftest_type))
    261 
    262         test_dict['expected'] = expected
    263         test_dict['actual'] = " ".join(actual)
    264 
    265         def is_expected(actual_result):
    266             return expectations.matches_an_expected_result(test_name, result_type,
    267                 port_obj.get_option('pixel_tests') or result.reftest_type,
    268                 port_obj.get_option('enable_sanitizer'))
    269 
    270         # To avoid bloating the output results json too much, only add an entry for whether the failure is unexpected.
    271         if not all(is_expected(actual_result) for actual_result in actual):
    272             test_dict['is_unexpected'] = True
    273 
    274         test_dict.update(_interpret_test_failures(result.failures))
    275 
    276         if retry_results:
    277             retry_result = retry_results.unexpected_results_by_name.get(test_name)
    278             if retry_result:
    279                 test_dict.update(_interpret_test_failures(retry_result.failures))
    280 
    281         if (result.has_repaint_overlay):
    282             test_dict['has_repaint_overlay'] = True
    283 
    284         # Store test hierarchically by directory. e.g.
    285         # foo/bar/baz.html: test_dict
    286         # foo/bar/baz1.html: test_dict
    287         #
    288         # becomes
    289         # foo: {
    290         #     bar: {
    291         #         baz.html: test_dict,
    292         #         baz1.html: test_dict
    293         #     }
    294         # }
    295         parts = test_name.split('/')
    296         current_map = tests
    297         for i, part in enumerate(parts):
    298             if i == (len(parts) - 1):
    299                 current_map[part] = test_dict
    300                 break
    301             if part not in current_map:
    302                 current_map[part] = {}
    303             current_map = current_map[part]
    304 
    305     results['tests'] = tests
    306     # FIXME: Remove this. It is redundant with results['num_failures_by_type'].
    307     results['num_passes'] = num_passes
    308     results['num_flaky'] = num_flaky
    309     # FIXME: Remove this. It is redundant with results['num_failures_by_type'].
    310     results['num_regressions'] = num_regressions
    311     results['interrupted'] = initial_results.interrupted  # Does results.html have enough information to compute this itself? (by checking total number of results vs. total number of tests?)
    312     results['layout_tests_dir'] = port_obj.layout_tests_dir()
    313     results['has_wdiff'] = port_obj.wdiff_available()
    314     results['has_pretty_patch'] = port_obj.pretty_patch_available()
    315     results['pixel_tests_enabled'] = port_obj.get_option('pixel_tests')
    316     results['seconds_since_epoch'] = int(time.time())
    317     results['build_number'] = port_obj.get_option('build_number')
    318     results['builder_name'] = port_obj.get_option('builder_name')
    319 
    320     # Don't do this by default since it takes >100ms.
    321     # It's only used for uploading data to the flakiness dashboard.
    322     results['chromium_revision'] = ''
    323     results['blink_revision'] = ''
    324     if port_obj.get_option('builder_name'):
    325         for (name, path) in port_obj.repository_paths():
    326             scm = port_obj.host.scm_for_path(path)
    327             if scm:
    328                 if name.lower() == 'chromium':
    329                     rev = _chromium_commit_position(scm, path)
    330                 else:
    331                     rev = scm.svn_revision(path)
    332             if rev:
    333                 results[name.lower() + '_revision'] = rev
    334             else:
    335                 _log.warn('Failed to determine svn revision for %s, '
    336                           'leaving "%s_revision" key blank in full_results.json.'
    337                           % (path, name))
    338 
    339     return results
    340