Home | History | Annotate | Download | only in layout_package
      1 #!/usr/bin/env python
      2 # Copyright (C) 2010 Google Inc. All rights reserved.
      3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor (at] inf.u-szeged.hu), University of Szeged
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 # notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 # copyright notice, this list of conditions and the following disclaimer
     13 # in the documentation and/or other materials provided with the
     14 # distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 """
     32 The TestRunner class runs a series of tests (TestType interface) against a set
     33 of test files.  If a test file fails a TestType, it returns a list TestFailure
     34 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
     35 create a final report.
     36 """
     37 
     38 from __future__ import with_statement
     39 
     40 import copy
     41 import errno
     42 import logging
     43 import math
     44 import Queue
     45 import random
     46 import sys
     47 import time
     48 
     49 from webkitpy.layout_tests.layout_package import json_layout_results_generator
     50 from webkitpy.layout_tests.layout_package import json_results_generator
     51 from webkitpy.layout_tests.layout_package import printing
     52 from webkitpy.layout_tests.layout_package import test_expectations
     53 from webkitpy.layout_tests.layout_package import test_failures
     54 from webkitpy.layout_tests.layout_package import test_results
     55 from webkitpy.layout_tests.layout_package import test_results_uploader
     56 from webkitpy.layout_tests.layout_package.result_summary import ResultSummary
     57 from webkitpy.layout_tests.layout_package.test_input import TestInput
     58 
     59 from webkitpy.thirdparty import simplejson
     60 from webkitpy.tool import grammar
     61 
     62 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
     63 
     64 # Builder base URL where we have the archived test results.
     65 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
     66 
     67 TestExpectationsFile = test_expectations.TestExpectationsFile
     68 
     69 
     70 def summarize_results(port_obj, expectations, result_summary, retry_summary, test_timings, only_unexpected):
     71     """Summarize any unexpected results as a dict.
     72 
     73     FIXME: split this data structure into a separate class?
     74 
     75     Args:
     76         port_obj: interface to port-specific hooks
     77         expectations: test_expectations.TestExpectations object
     78         result_summary: summary object from initial test runs
     79         retry_summary: summary object from final test run of retried tests
     80         test_timings: a list of TestResult objects which contain test runtimes in seconds
     81         only_unexpected: whether to return a summary only for the unexpected results
     82     Returns:
     83         A dictionary containing a summary of the unexpected results from the
     84         run, with the following fields:
     85         'version': a version indicator (1 in this version)
     86         'fixable': # of fixable tests (NOW - PASS)
     87         'skipped': # of skipped tests (NOW & SKIPPED)
     88         'num_regressions': # of non-flaky failures
     89         'num_flaky': # of flaky failures
     90         'num_passes': # of unexpected passes
     91         'tests': a dict of tests -> {'expected': '...', 'actual': '...', 'time_ms': ...}
     92     """
     93     results = {}
     94     results['version'] = 1
     95 
     96     test_timings_map = dict((test_result.filename, test_result.test_run_time) for test_result in test_timings)
     97 
     98     tbe = result_summary.tests_by_expectation
     99     tbt = result_summary.tests_by_timeline
    100     results['fixable'] = len(tbt[test_expectations.NOW] -
    101                                 tbe[test_expectations.PASS])
    102     results['skipped'] = len(tbt[test_expectations.NOW] &
    103                                 tbe[test_expectations.SKIP])
    104 
    105     num_passes = 0
    106     num_flaky = 0
    107     num_regressions = 0
    108     keywords = {}
    109     for expecation_string, expectation_enum in TestExpectationsFile.EXPECTATIONS.iteritems():
    110         keywords[expectation_enum] = expecation_string.upper()
    111 
    112     for modifier_string, modifier_enum in TestExpectationsFile.MODIFIERS.iteritems():
    113         keywords[modifier_enum] = modifier_string.upper()
    114 
    115     tests = {}
    116     original_results = result_summary.unexpected_results if only_unexpected else result_summary.results
    117 
    118     for filename, result in original_results.iteritems():
    119         # Note that if a test crashed in the original run, we ignore
    120         # whether or not it crashed when we retried it (if we retried it),
    121         # and always consider the result not flaky.
    122         test = port_obj.relative_test_filename(filename)
    123         expected = expectations.get_expectations_string(filename)
    124         result_type = result.type
    125         actual = [keywords[result_type]]
    126 
    127         if result_type == test_expectations.PASS:
    128             num_passes += 1
    129         elif result_type == test_expectations.CRASH:
    130             num_regressions += 1
    131         elif filename in result_summary.unexpected_results:
    132             if filename not in retry_summary.unexpected_results:
    133                 actual.extend(expectations.get_expectations_string(filename).split(" "))
    134                 num_flaky += 1
    135             else:
    136                 retry_result_type = retry_summary.unexpected_results[filename].type
    137                 if result_type != retry_result_type:
    138                     actual.append(keywords[retry_result_type])
    139                     num_flaky += 1
    140                 else:
    141                     num_regressions += 1
    142 
    143         tests[test] = {}
    144         tests[test]['expected'] = expected
    145         tests[test]['actual'] = " ".join(actual)
    146         # FIXME: Set this correctly once https://webkit.org/b/37739 is fixed
    147         # and only set it if there actually is stderr data.
    148         tests[test]['has_stderr'] = False
    149 
    150         failure_types = [type(f) for f in result.failures]
    151         if test_failures.FailureMissingAudio in failure_types:
    152             tests[test]['is_missing_audio'] = True
    153 
    154         if test_failures.FailureReftestMismatch in failure_types:
    155             tests[test]['is_reftest'] = True
    156 
    157         for f in result.failures:
    158             if 'is_reftest' in result.failures:
    159                 tests[test]['is_reftest'] = True
    160 
    161         if test_failures.FailureReftestMismatchDidNotOccur in failure_types:
    162             tests[test]['is_mismatch_reftest'] = True
    163 
    164         if test_failures.FailureMissingResult in failure_types:
    165             tests[test]['is_missing_text'] = True
    166 
    167         if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
    168             tests[test]['is_missing_image'] = True
    169 
    170         if filename in test_timings_map:
    171             time_seconds = test_timings_map[filename]
    172             tests[test]['time_ms'] = int(1000 * time_seconds)
    173 
    174     results['tests'] = tests
    175     results['num_passes'] = num_passes
    176     results['num_flaky'] = num_flaky
    177     results['num_regressions'] = num_regressions
    178     # FIXME: If non-chromium ports start using an expectations file,
    179     # we should make this check more robust.
    180     results['uses_expectations_file'] = port_obj.name().find('chromium') != -1
    181     results['layout_tests_dir'] = port_obj.layout_tests_dir()
    182     results['has_wdiff'] = port_obj.wdiff_available()
    183     results['has_pretty_patch'] = port_obj.pretty_patch_available()
    184 
    185     return results
    186 
    187 
    188 class TestRunInterruptedException(Exception):
    189     """Raised when a test run should be stopped immediately."""
    190     def __init__(self, reason):
    191         self.reason = reason
    192 
    193     def __reduce__(self):
    194         return self.__class__, (self.reason,)
    195 
    196 
    197 class TestRunner:
    198     """A class for managing running a series of tests on a series of layout
    199     test files."""
    200 
    201 
    202     # The per-test timeout in milliseconds, if no --time-out-ms option was
    203     # given to run_webkit_tests. This should correspond to the default timeout
    204     # in DumpRenderTree.
    205     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
    206 
    207     def __init__(self, port, options, printer):
    208         """Initialize test runner data structures.
    209 
    210         Args:
    211           port: an object implementing port-specific
    212           options: a dictionary of command line options
    213           printer: a Printer object to record updates to.
    214         """
    215         self._port = port
    216         self._fs = port._filesystem
    217         self._options = options
    218         self._printer = printer
    219         self._message_broker = None
    220 
    221         self.HTTP_SUBDIR = self._fs.join('', 'http', '')
    222         self.WEBSOCKET_SUBDIR = self._fs.join('', 'websocket', '')
    223         self.LAYOUT_TESTS_DIRECTORY = "LayoutTests" + self._fs.sep
    224 
    225 
    226         # disable wss server. need to install pyOpenSSL on buildbots.
    227         # self._websocket_secure_server = websocket_server.PyWebSocket(
    228         #        options.results_directory, use_tls=True, port=9323)
    229 
    230         # a set of test files, and the same tests as a list
    231         self._test_files = set()
    232         self._test_files_list = None
    233         self._result_queue = Queue.Queue()
    234         self._retrying = False
    235         self._results_directory = self._port.results_directory()
    236 
    237     def collect_tests(self, args, last_unexpected_results):
    238         """Find all the files to test.
    239 
    240         Args:
    241           args: list of test arguments from the command line
    242           last_unexpected_results: list of unexpected results to retest, if any
    243 
    244         """
    245         paths = self._strip_test_dir_prefixes(args)
    246         paths += last_unexpected_results
    247         if self._options.test_list:
    248             paths += self._strip_test_dir_prefixes(read_test_files(self._fs, self._options.test_list))
    249         self._test_files = self._port.tests(paths)
    250 
    251     def _strip_test_dir_prefixes(self, paths):
    252         return [self._strip_test_dir_prefix(path) for path in paths if path]
    253 
    254     def _strip_test_dir_prefix(self, path):
    255         if path.startswith(self.LAYOUT_TESTS_DIRECTORY):
    256             return path[len(self.LAYOUT_TESTS_DIRECTORY):]
    257         return path
    258 
    259     def lint(self):
    260         lint_failed = False
    261         for test_configuration in self._port.all_test_configurations():
    262             try:
    263                 self.lint_expectations(test_configuration)
    264             except test_expectations.ParseError:
    265                 lint_failed = True
    266                 self._printer.write("")
    267 
    268         if lint_failed:
    269             _log.error("Lint failed.")
    270             return -1
    271 
    272         _log.info("Lint succeeded.")
    273         return 0
    274 
    275     def lint_expectations(self, config):
    276         port = self._port
    277         test_expectations.TestExpectations(
    278             port,
    279             None,
    280             port.test_expectations(),
    281             config,
    282             self._options.lint_test_files,
    283             port.test_expectations_overrides())
    284 
    285     def parse_expectations(self):
    286         """Parse the expectations from the test_list files and return a data
    287         structure holding them. Throws an error if the test_list files have
    288         invalid syntax."""
    289         port = self._port
    290         self._expectations = test_expectations.TestExpectations(
    291             port,
    292             self._test_files,
    293             port.test_expectations(),
    294             port.test_configuration(),
    295             self._options.lint_test_files,
    296             port.test_expectations_overrides())
    297 
    298     # FIXME: This method is way too long and needs to be broken into pieces.
    299     def prepare_lists_and_print_output(self):
    300         """Create appropriate subsets of test lists and returns a
    301         ResultSummary object. Also prints expected test counts.
    302         """
    303 
    304         # Remove skipped - both fixable and ignored - files from the
    305         # top-level list of files to test.
    306         num_all_test_files = len(self._test_files)
    307         self._printer.print_expected("Found:  %d tests" %
    308                                      (len(self._test_files)))
    309         if not num_all_test_files:
    310             _log.critical('No tests to run.')
    311             return None
    312 
    313         skipped = set()
    314         if num_all_test_files > 1 and not self._options.force:
    315             skipped = self._expectations.get_tests_with_result_type(
    316                            test_expectations.SKIP)
    317             self._test_files -= skipped
    318 
    319         # Create a sorted list of test files so the subset chunk,
    320         # if used, contains alphabetically consecutive tests.
    321         self._test_files_list = list(self._test_files)
    322         if self._options.randomize_order:
    323             random.shuffle(self._test_files_list)
    324         else:
    325             self._test_files_list.sort()
    326 
    327         # If the user specifies they just want to run a subset of the tests,
    328         # just grab a subset of the non-skipped tests.
    329         if self._options.run_chunk or self._options.run_part:
    330             chunk_value = self._options.run_chunk or self._options.run_part
    331             test_files = self._test_files_list
    332             try:
    333                 (chunk_num, chunk_len) = chunk_value.split(":")
    334                 chunk_num = int(chunk_num)
    335                 assert(chunk_num >= 0)
    336                 test_size = int(chunk_len)
    337                 assert(test_size > 0)
    338             except:
    339                 _log.critical("invalid chunk '%s'" % chunk_value)
    340                 return None
    341 
    342             # Get the number of tests
    343             num_tests = len(test_files)
    344 
    345             # Get the start offset of the slice.
    346             if self._options.run_chunk:
    347                 chunk_len = test_size
    348                 # In this case chunk_num can be really large. We need
    349                 # to make the slave fit in the current number of tests.
    350                 slice_start = (chunk_num * chunk_len) % num_tests
    351             else:
    352                 # Validate the data.
    353                 assert(test_size <= num_tests)
    354                 assert(chunk_num <= test_size)
    355 
    356                 # To count the chunk_len, and make sure we don't skip
    357                 # some tests, we round to the next value that fits exactly
    358                 # all the parts.
    359                 rounded_tests = num_tests
    360                 if rounded_tests % test_size != 0:
    361                     rounded_tests = (num_tests + test_size -
    362                                      (num_tests % test_size))
    363 
    364                 chunk_len = rounded_tests / test_size
    365                 slice_start = chunk_len * (chunk_num - 1)
    366                 # It does not mind if we go over test_size.
    367 
    368             # Get the end offset of the slice.
    369             slice_end = min(num_tests, slice_start + chunk_len)
    370 
    371             files = test_files[slice_start:slice_end]
    372 
    373             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
    374                 (slice_end - slice_start), slice_start, slice_end, num_tests)
    375             self._printer.print_expected(tests_run_msg)
    376 
    377             # If we reached the end and we don't have enough tests, we run some
    378             # from the beginning.
    379             if slice_end - slice_start < chunk_len:
    380                 extra = chunk_len - (slice_end - slice_start)
    381                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
    382                             extra)
    383                 self._printer.print_expected(extra_msg)
    384                 tests_run_msg += "\n" + extra_msg
    385                 files.extend(test_files[0:extra])
    386             tests_run_filename = self._fs.join(self._results_directory, "tests_run.txt")
    387             self._fs.write_text_file(tests_run_filename, tests_run_msg)
    388 
    389             len_skip_chunk = int(len(files) * len(skipped) /
    390                                  float(len(self._test_files)))
    391             skip_chunk_list = list(skipped)[0:len_skip_chunk]
    392             skip_chunk = set(skip_chunk_list)
    393 
    394             # Update expectations so that the stats are calculated correctly.
    395             # We need to pass a list that includes the right # of skipped files
    396             # to ParseExpectations so that ResultSummary() will get the correct
    397             # stats. So, we add in the subset of skipped files, and then
    398             # subtract them back out.
    399             self._test_files_list = files + skip_chunk_list
    400             self._test_files = set(self._test_files_list)
    401 
    402             self.parse_expectations()
    403 
    404             self._test_files = set(files)
    405             self._test_files_list = files
    406         else:
    407             skip_chunk = skipped
    408 
    409         result_summary = ResultSummary(self._expectations,
    410             self._test_files | skip_chunk)
    411         self._print_expected_results_of_type(result_summary,
    412             test_expectations.PASS, "passes")
    413         self._print_expected_results_of_type(result_summary,
    414             test_expectations.FAIL, "failures")
    415         self._print_expected_results_of_type(result_summary,
    416             test_expectations.FLAKY, "flaky")
    417         self._print_expected_results_of_type(result_summary,
    418             test_expectations.SKIP, "skipped")
    419 
    420         if self._options.force:
    421             self._printer.print_expected('Running all tests, including '
    422                                          'skips (--force)')
    423         else:
    424             # Note that we don't actually run the skipped tests (they were
    425             # subtracted out of self._test_files, above), but we stub out the
    426             # results here so the statistics can remain accurate.
    427             for test in skip_chunk:
    428                 result = test_results.TestResult(test)
    429                 result.type = test_expectations.SKIP
    430                 result_summary.add(result, expected=True)
    431         self._printer.print_expected('')
    432 
    433         # Check to make sure we didn't filter out all of the tests.
    434         if not len(self._test_files):
    435             _log.info("All tests are being skipped")
    436             return None
    437 
    438         return result_summary
    439 
    440     def _get_dir_for_test_file(self, test_file):
    441         """Returns the highest-level directory by which to shard the given
    442         test file."""
    443         index = test_file.rfind(self._fs.sep + self.LAYOUT_TESTS_DIRECTORY)
    444 
    445         test_file = test_file[index + len(self.LAYOUT_TESTS_DIRECTORY):]
    446         test_file_parts = test_file.split(self._fs.sep, 1)
    447         directory = test_file_parts[0]
    448         test_file = test_file_parts[1]
    449 
    450         # The http tests are very stable on mac/linux.
    451         # TODO(ojan): Make the http server on Windows be apache so we can
    452         # turn shard the http tests there as well. Switching to apache is
    453         # what made them stable on linux/mac.
    454         return_value = directory
    455         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
    456                 and test_file.find(self._fs.sep) >= 0):
    457             test_file_parts = test_file.split(self._fs.sep, 1)
    458             directory = test_file_parts[0]
    459             return_value = self._fs.join(return_value, directory)
    460             test_file = test_file_parts[1]
    461 
    462         return return_value
    463 
    464     def _get_test_input_for_file(self, test_file):
    465         """Returns the appropriate TestInput object for the file. Mostly this
    466         is used for looking up the timeout value (in ms) to use for the given
    467         test."""
    468         if self._test_is_slow(test_file):
    469             return TestInput(test_file, self._options.slow_time_out_ms)
    470         return TestInput(test_file, self._options.time_out_ms)
    471 
    472     def _test_requires_lock(self, test_file):
    473         """Return True if the test needs to be locked when
    474         running multiple copies of NRWTs."""
    475         split_path = test_file.split(self._port._filesystem.sep)
    476         return 'http' in split_path or 'websocket' in split_path
    477 
    478     def _test_is_slow(self, test_file):
    479         return self._expectations.has_modifier(test_file,
    480                                                test_expectations.SLOW)
    481 
    482     def _shard_tests(self, test_files, use_real_shards):
    483         """Groups tests into batches.
    484         This helps ensure that tests that depend on each other (aka bad tests!)
    485         continue to run together as most cross-tests dependencies tend to
    486         occur within the same directory. If use_real_shards is False, we
    487         put each (non-HTTP/websocket) test into its own shard for maximum
    488         concurrency instead of trying to do any sort of real sharding.
    489 
    490         Return:
    491             A list of lists of TestInput objects.
    492         """
    493         # FIXME: when we added http locking, we changed how this works such
    494         # that we always lump all of the HTTP threads into a single shard.
    495         # That will slow down experimental-fully-parallel, but it's unclear
    496         # what the best alternative is completely revamping how we track
    497         # when to grab the lock.
    498 
    499         test_lists = []
    500         tests_to_http_lock = []
    501         if not use_real_shards:
    502             for test_file in test_files:
    503                 test_input = self._get_test_input_for_file(test_file)
    504                 if self._test_requires_lock(test_file):
    505                     tests_to_http_lock.append(test_input)
    506                 else:
    507                     test_lists.append((".", [test_input]))
    508         else:
    509             tests_by_dir = {}
    510             for test_file in test_files:
    511                 directory = self._get_dir_for_test_file(test_file)
    512                 test_input = self._get_test_input_for_file(test_file)
    513                 if self._test_requires_lock(test_file):
    514                     tests_to_http_lock.append(test_input)
    515                 else:
    516                     tests_by_dir.setdefault(directory, [])
    517                     tests_by_dir[directory].append(test_input)
    518             # Sort by the number of tests in the dir so that the ones with the
    519             # most tests get run first in order to maximize parallelization.
    520             # Number of tests is a good enough, but not perfect, approximation
    521             # of how long that set of tests will take to run. We can't just use
    522             # a PriorityQueue until we move to Python 2.6.
    523             for directory in tests_by_dir:
    524                 test_list = tests_by_dir[directory]
    525                 test_list_tuple = (directory, test_list)
    526                 test_lists.append(test_list_tuple)
    527             test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
    528 
    529         # Put the http tests first. There are only a couple hundred of them,
    530         # but each http test takes a very long time to run, so sorting by the
    531         # number of tests doesn't accurately capture how long they take to run.
    532         if tests_to_http_lock:
    533             test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
    534 
    535         return test_lists
    536 
    537     def _contains_tests(self, subdir):
    538         for test_file in self._test_files:
    539             if test_file.find(subdir) >= 0:
    540                 return True
    541         return False
    542 
    543     def _num_workers(self, num_shards):
    544         num_workers = min(int(self._options.child_processes), num_shards)
    545         driver_name = self._port.driver_name()
    546         if num_workers == 1:
    547             self._printer.print_config("Running 1 %s over %s" %
    548                 (driver_name, grammar.pluralize('shard', num_shards)))
    549         else:
    550             self._printer.print_config("Running %d %ss in parallel over %d shards" %
    551                 (num_workers, driver_name, num_shards))
    552         return num_workers
    553 
    554     def _run_tests(self, file_list, result_summary):
    555         """Runs the tests in the file_list.
    556 
    557         Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
    558             test_timings, individual_test_timings)
    559             interrupted is whether the run was interrupted
    560             keyboard_interrupted is whether the interruption was because someone
    561               typed Ctrl^C
    562             thread_timings is a list of dicts with the total runtime
    563               of each thread with 'name', 'num_tests', 'total_time' properties
    564             test_timings is a list of timings for each sharded subdirectory
    565               of the form [time, directory_name, num_tests]
    566             individual_test_timings is a list of run times for each test
    567               in the form {filename:filename, test_run_time:test_run_time}
    568             result_summary: summary object to populate with the results
    569         """
    570         raise NotImplementedError()
    571 
    572     def update(self):
    573         self.update_summary(self._current_result_summary)
    574 
    575     def _collect_timing_info(self, threads):
    576         test_timings = {}
    577         individual_test_timings = []
    578         thread_timings = []
    579 
    580         for thread in threads:
    581             thread_timings.append({'name': thread.getName(),
    582                                    'num_tests': thread.get_num_tests(),
    583                                    'total_time': thread.get_total_time()})
    584             test_timings.update(thread.get_test_group_timing_stats())
    585             individual_test_timings.extend(thread.get_test_results())
    586 
    587         return (thread_timings, test_timings, individual_test_timings)
    588 
    589     def needs_http(self):
    590         """Returns whether the test runner needs an HTTP server."""
    591         return self._contains_tests(self.HTTP_SUBDIR)
    592 
    593     def needs_websocket(self):
    594         """Returns whether the test runner needs a WEBSOCKET server."""
    595         return self._contains_tests(self.WEBSOCKET_SUBDIR)
    596 
    597     def set_up_run(self):
    598         """Configures the system to be ready to run tests.
    599 
    600         Returns a ResultSummary object if we should continue to run tests,
    601         or None if we should abort.
    602 
    603         """
    604         # This must be started before we check the system dependencies,
    605         # since the helper may do things to make the setup correct.
    606         self._printer.print_update("Starting helper ...")
    607         self._port.start_helper()
    608 
    609         # Check that the system dependencies (themes, fonts, ...) are correct.
    610         if not self._options.nocheck_sys_deps:
    611             self._printer.print_update("Checking system dependencies ...")
    612             if not self._port.check_sys_deps(self.needs_http()):
    613                 self._port.stop_helper()
    614                 return None
    615 
    616         if self._options.clobber_old_results:
    617             self._clobber_old_results()
    618 
    619         # Create the output directory if it doesn't already exist.
    620         self._port.maybe_make_directory(self._results_directory)
    621 
    622         self._port.setup_test_run()
    623 
    624         self._printer.print_update("Preparing tests ...")
    625         result_summary = self.prepare_lists_and_print_output()
    626         if not result_summary:
    627             return None
    628 
    629         return result_summary
    630 
    631     def run(self, result_summary):
    632         """Run all our tests on all our test files.
    633 
    634         For each test file, we run each test type. If there are any failures,
    635         we collect them for reporting.
    636 
    637         Args:
    638           result_summary: a summary object tracking the test results.
    639 
    640         Return:
    641           The number of unexpected results (0 == success)
    642         """
    643         # gather_test_files() must have been called first to initialize us.
    644         # If we didn't find any files to test, we've errored out already in
    645         # prepare_lists_and_print_output().
    646         assert(len(self._test_files))
    647 
    648         start_time = time.time()
    649 
    650         interrupted, keyboard_interrupted, thread_timings, test_timings, \
    651             individual_test_timings = (
    652             self._run_tests(self._test_files_list, result_summary))
    653 
    654         # We exclude the crashes from the list of results to retry, because
    655         # we want to treat even a potentially flaky crash as an error.
    656         failures = self._get_failures(result_summary, include_crashes=False)
    657         retry_summary = result_summary
    658         while (len(failures) and self._options.retry_failures and
    659             not self._retrying and not interrupted):
    660             _log.info('')
    661             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
    662             _log.info('')
    663             self._retrying = True
    664             retry_summary = ResultSummary(self._expectations, failures.keys())
    665             # Note that we intentionally ignore the return value here.
    666             self._run_tests(failures.keys(), retry_summary)
    667             failures = self._get_failures(retry_summary, include_crashes=True)
    668 
    669         end_time = time.time()
    670 
    671         self._print_timing_statistics(end_time - start_time,
    672                                       thread_timings, test_timings,
    673                                       individual_test_timings,
    674                                       result_summary)
    675 
    676         self._print_result_summary(result_summary)
    677 
    678         sys.stdout.flush()
    679         sys.stderr.flush()
    680 
    681         self._printer.print_one_line_summary(result_summary.total,
    682                                              result_summary.expected,
    683                                              result_summary.unexpected)
    684 
    685         unexpected_results = summarize_results(self._port,
    686             self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=True)
    687         self._printer.print_unexpected_results(unexpected_results)
    688 
    689         # FIXME: remove record_results. It's just used for testing. There's no need
    690         # for it to be a commandline argument.
    691         if (self._options.record_results and not self._options.dry_run and
    692             not keyboard_interrupted):
    693             # Write the same data to log files and upload generated JSON files
    694             # to appengine server.
    695             summarized_results = summarize_results(self._port,
    696                 self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=False)
    697             self._upload_json_files(unexpected_results, summarized_results, result_summary,
    698                                     individual_test_timings)
    699 
    700         # Write the summary to disk (results.html) and display it if requested.
    701         if not self._options.dry_run:
    702             self._copy_results_html_file()
    703             if self._options.show_results:
    704                 self._show_results_html_file(result_summary)
    705 
    706         # Now that we've completed all the processing we can, we re-raise
    707         # a KeyboardInterrupt if necessary so the caller can handle it.
    708         if keyboard_interrupted:
    709             raise KeyboardInterrupt
    710 
    711         # Ignore flaky failures and unexpected passes so we don't turn the
    712         # bot red for those.
    713         return unexpected_results['num_regressions']
    714 
    715     def clean_up_run(self):
    716         """Restores the system after we're done running tests."""
    717 
    718         _log.debug("flushing stdout")
    719         sys.stdout.flush()
    720         _log.debug("flushing stderr")
    721         sys.stderr.flush()
    722         _log.debug("stopping helper")
    723         self._port.stop_helper()
    724 
    725     def update_summary(self, result_summary):
    726         """Update the summary and print results with any completed tests."""
    727         while True:
    728             try:
    729                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
    730             except Queue.Empty:
    731                 return
    732 
    733             self._update_summary_with_result(result_summary, result)
    734 
    735     def _update_summary_with_result(self, result_summary, result):
    736         expected = self._expectations.matches_an_expected_result(
    737             result.filename, result.type, self._options.pixel_tests)
    738         result_summary.add(result, expected)
    739         exp_str = self._expectations.get_expectations_string(
    740             result.filename)
    741         got_str = self._expectations.expectation_to_string(result.type)
    742         self._printer.print_test_result(result, expected, exp_str, got_str)
    743         self._printer.print_progress(result_summary, self._retrying,
    744                                         self._test_files_list)
    745 
    746         def interrupt_if_at_failure_limit(limit, count, message):
    747             if limit and count >= limit:
    748                 raise TestRunInterruptedException(message % count)
    749 
    750         interrupt_if_at_failure_limit(
    751             self._options.exit_after_n_failures,
    752             result_summary.unexpected_failures,
    753             "Aborting run since %d failures were reached")
    754         interrupt_if_at_failure_limit(
    755             self._options.exit_after_n_crashes_or_timeouts,
    756             result_summary.unexpected_crashes_or_timeouts,
    757             "Aborting run since %d crashes or timeouts were reached")
    758 
    759     def _clobber_old_results(self):
    760         # Just clobber the actual test results directories since the other
    761         # files in the results directory are explicitly used for cross-run
    762         # tracking.
    763         self._printer.print_update("Clobbering old results in %s" %
    764                                    self._results_directory)
    765         layout_tests_dir = self._port.layout_tests_dir()
    766         possible_dirs = self._port.test_dirs()
    767         for dirname in possible_dirs:
    768             if self._fs.isdir(self._fs.join(layout_tests_dir, dirname)):
    769                 self._fs.rmtree(self._fs.join(self._results_directory, dirname))
    770 
    771     def _get_failures(self, result_summary, include_crashes):
    772         """Filters a dict of results and returns only the failures.
    773 
    774         Args:
    775           result_summary: the results of the test run
    776           include_crashes: whether crashes are included in the output.
    777             We use False when finding the list of failures to retry
    778             to see if the results were flaky. Although the crashes may also be
    779             flaky, we treat them as if they aren't so that they're not ignored.
    780         Returns:
    781           a dict of files -> results
    782         """
    783         failed_results = {}
    784         for test, result in result_summary.unexpected_results.iteritems():
    785             if (result.type == test_expectations.PASS or
    786                 result.type == test_expectations.CRASH and not include_crashes):
    787                 continue
    788             failed_results[test] = result.type
    789 
    790         return failed_results
    791 
    792     def _char_for_result(self, result):
    793         result = result.lower()
    794         if result in TestExpectationsFile.EXPECTATIONS:
    795             result_enum_value = TestExpectationsFile.EXPECTATIONS[result]
    796         else:
    797             result_enum_value = TestExpectationsFile.MODIFIERS[result]
    798         return json_layout_results_generator.JSONLayoutResultsGenerator.FAILURE_TO_CHAR[result_enum_value]
    799 
    800     def _upload_json_files(self, unexpected_results, summarized_results, result_summary,
    801                            individual_test_timings):
    802         """Writes the results of the test run as JSON files into the results
    803         dir and upload the files to the appengine server.
    804 
    805         There are three different files written into the results dir:
    806           unexpected_results.json: A short list of any unexpected results.
    807             This is used by the buildbots to display results.
    808           expectations.json: This is used by the flakiness dashboard.
    809           results.json: A full list of the results - used by the flakiness
    810             dashboard and the aggregate results dashboard.
    811 
    812         Args:
    813           unexpected_results: dict of unexpected results
    814           summarized_results: dict of results
    815           result_summary: full summary object
    816           individual_test_timings: list of test times (used by the flakiness
    817             dashboard).
    818         """
    819         _log.debug("Writing JSON files in %s." % self._results_directory)
    820 
    821         unexpected_json_path = self._fs.join(self._results_directory, "unexpected_results.json")
    822         json_results_generator.write_json(self._fs, unexpected_results, unexpected_json_path)
    823 
    824         full_results_path = self._fs.join(self._results_directory, "full_results.json")
    825         json_results_generator.write_json(self._fs, summarized_results, full_results_path)
    826 
    827         # Write a json file of the test_expectations.txt file for the layout
    828         # tests dashboard.
    829         expectations_path = self._fs.join(self._results_directory, "expectations.json")
    830         expectations_json = \
    831             self._expectations.get_expectations_json_for_all_platforms()
    832         self._fs.write_text_file(expectations_path,
    833                                  u"ADD_EXPECTATIONS(%s);" % expectations_json)
    834 
    835         generator = json_layout_results_generator.JSONLayoutResultsGenerator(
    836             self._port, self._options.builder_name, self._options.build_name,
    837             self._options.build_number, self._results_directory,
    838             BUILDER_BASE_URL, individual_test_timings,
    839             self._expectations, result_summary, self._test_files_list,
    840             self._options.test_results_server,
    841             "layout-tests",
    842             self._options.master_name)
    843 
    844         _log.debug("Finished writing JSON files.")
    845 
    846         json_files = ["expectations.json", "incremental_results.json", "full_results.json"]
    847 
    848         generator.upload_json_files(json_files)
    849 
    850     def _print_config(self):
    851         """Prints the configuration for the test run."""
    852         p = self._printer
    853         p.print_config("Using port '%s'" % self._port.name())
    854         p.print_config("Test configuration: %s" % self._port.test_configuration())
    855         p.print_config("Placing test results in %s" % self._results_directory)
    856         if self._options.new_baseline:
    857             p.print_config("Placing new baselines in %s" %
    858                            self._port.baseline_path())
    859         p.print_config("Using %s build" % self._options.configuration)
    860         if self._options.pixel_tests:
    861             p.print_config("Pixel tests enabled")
    862         else:
    863             p.print_config("Pixel tests disabled")
    864 
    865         p.print_config("Regular timeout: %s, slow test timeout: %s" %
    866                        (self._options.time_out_ms,
    867                         self._options.slow_time_out_ms))
    868 
    869         p.print_config('Command line: ' +
    870                        ' '.join(self._port.driver_cmd_line()))
    871         p.print_config("Worker model: %s" % self._options.worker_model)
    872         p.print_config("")
    873 
    874     def _print_expected_results_of_type(self, result_summary,
    875                                         result_type, result_type_str):
    876         """Print the number of the tests in a given result class.
    877 
    878         Args:
    879           result_summary - the object containing all the results to report on
    880           result_type - the particular result type to report in the summary.
    881           result_type_str - a string description of the result_type.
    882         """
    883         tests = self._expectations.get_tests_with_result_type(result_type)
    884         now = result_summary.tests_by_timeline[test_expectations.NOW]
    885         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
    886 
    887         # We use a fancy format string in order to print the data out in a
    888         # nicely-aligned table.
    889         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
    890                   % (self._num_digits(now), self._num_digits(wontfix)))
    891         self._printer.print_expected(fmtstr %
    892             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
    893 
    894     def _num_digits(self, num):
    895         """Returns the number of digits needed to represent the length of a
    896         sequence."""
    897         ndigits = 1
    898         if len(num):
    899             ndigits = int(math.log10(len(num))) + 1
    900         return ndigits
    901 
    902     def _print_timing_statistics(self, total_time, thread_timings,
    903                                directory_test_timings, individual_test_timings,
    904                                result_summary):
    905         """Record timing-specific information for the test run.
    906 
    907         Args:
    908           total_time: total elapsed time (in seconds) for the test run
    909           thread_timings: wall clock time each thread ran for
    910           directory_test_timings: timing by directory
    911           individual_test_timings: timing by file
    912           result_summary: summary object for the test run
    913         """
    914         self._printer.print_timing("Test timing:")
    915         self._printer.print_timing("  %6.2f total testing time" % total_time)
    916         self._printer.print_timing("")
    917         self._printer.print_timing("Thread timing:")
    918         cuml_time = 0
    919         for t in thread_timings:
    920             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
    921                   (t['name'], t['num_tests'], t['total_time']))
    922             cuml_time += t['total_time']
    923         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
    924               (cuml_time, cuml_time / int(self._options.child_processes)))
    925         self._printer.print_timing("")
    926 
    927         self._print_aggregate_test_statistics(individual_test_timings)
    928         self._print_individual_test_times(individual_test_timings,
    929                                           result_summary)
    930         self._print_directory_timings(directory_test_timings)
    931 
    932     def _print_aggregate_test_statistics(self, individual_test_timings):
    933         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
    934         Args:
    935           individual_test_timings: List of TestResults for all tests.
    936         """
    937         times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings]
    938         self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):",
    939                                                 times_for_dump_render_tree)
    940 
    941     def _print_individual_test_times(self, individual_test_timings,
    942                                   result_summary):
    943         """Prints the run times for slow, timeout and crash tests.
    944         Args:
    945           individual_test_timings: List of TestStats for all tests.
    946           result_summary: summary object for test run
    947         """
    948         # Reverse-sort by the time spent in DumpRenderTree.
    949         individual_test_timings.sort(lambda a, b:
    950             cmp(b.test_run_time, a.test_run_time))
    951 
    952         num_printed = 0
    953         slow_tests = []
    954         timeout_or_crash_tests = []
    955         unexpected_slow_tests = []
    956         for test_tuple in individual_test_timings:
    957             filename = test_tuple.filename
    958             is_timeout_crash_or_slow = False
    959             if self._test_is_slow(filename):
    960                 is_timeout_crash_or_slow = True
    961                 slow_tests.append(test_tuple)
    962 
    963             if filename in result_summary.failures:
    964                 result = result_summary.results[filename].type
    965                 if (result == test_expectations.TIMEOUT or
    966                     result == test_expectations.CRASH):
    967                     is_timeout_crash_or_slow = True
    968                     timeout_or_crash_tests.append(test_tuple)
    969 
    970             if (not is_timeout_crash_or_slow and
    971                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
    972                 num_printed = num_printed + 1
    973                 unexpected_slow_tests.append(test_tuple)
    974 
    975         self._printer.print_timing("")
    976         self._print_test_list_timing("%s slowest tests that are not "
    977             "marked as SLOW and did not timeout/crash:" %
    978             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
    979         self._printer.print_timing("")
    980         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
    981         self._printer.print_timing("")
    982         self._print_test_list_timing("Tests that timed out or crashed:",
    983                                      timeout_or_crash_tests)
    984         self._printer.print_timing("")
    985 
    986     def _print_test_list_timing(self, title, test_list):
    987         """Print timing info for each test.
    988 
    989         Args:
    990           title: section heading
    991           test_list: tests that fall in this section
    992         """
    993         if self._printer.disabled('slowest'):
    994             return
    995 
    996         self._printer.print_timing(title)
    997         for test_tuple in test_list:
    998             filename = test_tuple.filename[len(
    999                 self._port.layout_tests_dir()) + 1:]
   1000             filename = filename.replace('\\', '/')
   1001             test_run_time = round(test_tuple.test_run_time, 1)
   1002             self._printer.print_timing("  %s took %s seconds" %
   1003                                        (filename, test_run_time))
   1004 
   1005     def _print_directory_timings(self, directory_test_timings):
   1006         """Print timing info by directory for any directories that
   1007         take > 10 seconds to run.
   1008 
   1009         Args:
   1010           directory_test_timing: time info for each directory
   1011         """
   1012         timings = []
   1013         for directory in directory_test_timings:
   1014             num_tests, time_for_directory = directory_test_timings[directory]
   1015             timings.append((round(time_for_directory, 1), directory,
   1016                             num_tests))
   1017         timings.sort()
   1018 
   1019         self._printer.print_timing("Time to process slowest subdirectories:")
   1020         min_seconds_to_print = 10
   1021         for timing in timings:
   1022             if timing[0] > min_seconds_to_print:
   1023                 self._printer.print_timing(
   1024                     "  %s took %s seconds to run %s tests." % (timing[1],
   1025                     timing[0], timing[2]))
   1026         self._printer.print_timing("")
   1027 
   1028     def _print_statistics_for_test_timings(self, title, timings):
   1029         """Prints the median, mean and standard deviation of the values in
   1030         timings.
   1031 
   1032         Args:
   1033           title: Title for these timings.
   1034           timings: A list of floats representing times.
   1035         """
   1036         self._printer.print_timing(title)
   1037         timings.sort()
   1038 
   1039         num_tests = len(timings)
   1040         if not num_tests:
   1041             return
   1042         percentile90 = timings[int(.9 * num_tests)]
   1043         percentile99 = timings[int(.99 * num_tests)]
   1044 
   1045         if num_tests % 2 == 1:
   1046             median = timings[((num_tests - 1) / 2) - 1]
   1047         else:
   1048             lower = timings[num_tests / 2 - 1]
   1049             upper = timings[num_tests / 2]
   1050             median = (float(lower + upper)) / 2
   1051 
   1052         mean = sum(timings) / num_tests
   1053 
   1054         for time in timings:
   1055             sum_of_deviations = math.pow(time - mean, 2)
   1056 
   1057         std_deviation = math.sqrt(sum_of_deviations / num_tests)
   1058         self._printer.print_timing("  Median:          %6.3f" % median)
   1059         self._printer.print_timing("  Mean:            %6.3f" % mean)
   1060         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
   1061         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
   1062         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
   1063         self._printer.print_timing("")
   1064 
   1065     def _print_result_summary(self, result_summary):
   1066         """Print a short summary about how many tests passed.
   1067 
   1068         Args:
   1069           result_summary: information to log
   1070         """
   1071         failed = len(result_summary.failures)
   1072         skipped = len(
   1073             result_summary.tests_by_expectation[test_expectations.SKIP])
   1074         total = result_summary.total
   1075         passed = total - failed - skipped
   1076         pct_passed = 0.0
   1077         if total > 0:
   1078             pct_passed = float(passed) * 100 / total
   1079 
   1080         self._printer.print_actual("")
   1081         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
   1082                      (passed, total, pct_passed))
   1083         self._printer.print_actual("")
   1084         self._print_result_summary_entry(result_summary,
   1085             test_expectations.NOW, "Tests to be fixed")
   1086 
   1087         self._printer.print_actual("")
   1088         self._print_result_summary_entry(result_summary,
   1089             test_expectations.WONTFIX,
   1090             "Tests that will only be fixed if they crash (WONTFIX)")
   1091         self._printer.print_actual("")
   1092 
   1093     def _print_result_summary_entry(self, result_summary, timeline,
   1094                                     heading):
   1095         """Print a summary block of results for a particular timeline of test.
   1096 
   1097         Args:
   1098           result_summary: summary to print results for
   1099           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
   1100           heading: a textual description of the timeline
   1101         """
   1102         total = len(result_summary.tests_by_timeline[timeline])
   1103         not_passing = (total -
   1104            len(result_summary.tests_by_expectation[test_expectations.PASS] &
   1105                result_summary.tests_by_timeline[timeline]))
   1106         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
   1107 
   1108         for result in TestExpectationsFile.EXPECTATION_ORDER:
   1109             if result == test_expectations.PASS:
   1110                 continue
   1111             results = (result_summary.tests_by_expectation[result] &
   1112                        result_summary.tests_by_timeline[timeline])
   1113             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
   1114             if not_passing and len(results):
   1115                 pct = len(results) * 100.0 / not_passing
   1116                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
   1117                     (len(results), desc[len(results) != 1], pct))
   1118 
   1119     def _copy_results_html_file(self):
   1120         base_dir = self._port.path_from_webkit_base('Tools', 'Scripts', 'webkitpy', 'layout_tests', 'layout_package')
   1121         results_file = self._fs.join(base_dir, 'json_results.html')
   1122         # FIXME: What should we do if this doesn't exist (e.g., in unit tests)?
   1123         if self._fs.exists(results_file):
   1124             self._fs.copyfile(results_file, self._fs.join(self._results_directory, "results.html"))
   1125 
   1126     def _show_results_html_file(self, result_summary):
   1127         """Shows the results.html page."""
   1128         if self._options.full_results_html:
   1129             test_files = result_summary.failures.keys()
   1130         else:
   1131             unexpected_failures = self._get_failures(result_summary, include_crashes=True)
   1132             test_files = unexpected_failures.keys()
   1133 
   1134         if not len(test_files):
   1135             return
   1136 
   1137         results_filename = self._fs.join(self._results_directory, "results.html")
   1138         self._port.show_results_html_file(results_filename)
   1139 
   1140 
   1141 def read_test_files(fs, files):
   1142     tests = []
   1143     for file in files:
   1144         try:
   1145             file_contents = fs.read_text_file(file).split('\n')
   1146             for line in file_contents:
   1147                 line = test_expectations.strip_comments(line)
   1148                 if line:
   1149                     tests.append(line)
   1150         except IOError, e:
   1151             if e.errno == errno.ENOENT:
   1152                 _log.critical('')
   1153                 _log.critical('--test-list file "%s" not found' % file)
   1154             raise
   1155     return tests
   1156