Home | History | Annotate | Download | only in dynamic_suite
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import abc
      6 import datetime
      7 import difflib
      8 import functools
      9 import hashlib
     10 import logging
     11 import operator
     12 import os
     13 import re
     14 import sys
     15 import warnings
     16 
     17 import common
     18 
     19 from autotest_lib.frontend.afe.json_rpc import proxy
     20 from autotest_lib.client.common_lib import enum
     21 from autotest_lib.client.common_lib import error
     22 from autotest_lib.client.common_lib import global_config
     23 from autotest_lib.client.common_lib import priorities
     24 from autotest_lib.client.common_lib import time_utils
     25 from autotest_lib.client.common_lib import utils
     26 from autotest_lib.frontend.afe import model_attributes
     27 from autotest_lib.frontend.afe.json_rpc import proxy
     28 from autotest_lib.server.cros import provision
     29 from autotest_lib.server.cros.dynamic_suite import constants
     30 from autotest_lib.server.cros.dynamic_suite import control_file_getter
     31 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     32 from autotest_lib.server.cros.dynamic_suite import job_status
     33 from autotest_lib.server.cros.dynamic_suite import suite_common
     34 from autotest_lib.server.cros.dynamic_suite import tools
     35 from autotest_lib.server.cros.dynamic_suite.job_status import Status
     36 
     37 try:
     38     from chromite.lib import boolparse_lib
     39     from chromite.lib import cros_logging as logging
     40 except ImportError:
     41     print 'Unable to import chromite.'
     42     print 'This script must be either:'
     43     print '  - Be run in the chroot.'
     44     print '  - (not yet supported) be run after running '
     45     print '    ../utils/build_externals.py'
     46 
     47 _FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
     48                     'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
     49                     'sanity', 'push_to_prod']
     50 _AUTOTEST_DIR = global_config.global_config.get_config_value(
     51         'SCHEDULER', 'drone_installation_directory')
     52 
     53 
     54 class RetryHandler(object):
     55     """Maintain retry information.
     56 
     57     @var _retry_map: A dictionary that stores retry history.
     58             The key is afe job id. The value is a dictionary.
     59             {job_id: {'state':RetryHandler.States, 'retry_max':int}}
     60             - state:
     61                 The retry state of a job.
     62                 NOT_ATTEMPTED:
     63                     We haven't done anything about the job.
     64                 ATTEMPTED:
     65                     We've made an attempt to schedule a retry job. The
     66                     scheduling may or may not be successful, e.g.
     67                     it might encounter an rpc error. Note failure
     68                     in scheduling a retry is different from a retry job failure.
     69                     For each job, we only attempt to schedule a retry once.
     70                     For example, assume we have a test with JOB_RETRIES=5 and
     71                     its second retry job failed. When we attempt to create
     72                     a third retry job to retry the second, we hit an rpc
     73                     error. In such case, we will give up on all following
     74                     retries.
     75                 RETRIED:
     76                     A retry job has already been successfully
     77                     scheduled.
     78             - retry_max:
     79                 The maximum of times the job can still
     80                 be retried, taking into account retries
     81                 that have occurred.
     82     @var _retry_level: A retry might be triggered only if the result
     83             is worse than the level.
     84     @var _max_retries: Maximum retry limit at suite level.
     85                      Regardless how many times each individual test
     86                      has been retried, the total number of retries happening in
     87                      the suite can't exceed _max_retries.
     88     """
     89 
     90     States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
     91                        start_value=1, step=1)
     92 
     93     def __init__(self, initial_jobs_to_tests, retry_level='WARN',
     94                  max_retries=None):
     95         """Initialize RetryHandler.
     96 
     97         @param initial_jobs_to_tests: A dictionary that maps a job id to
     98                 a ControlData object. This dictionary should contain
     99                 jobs that are originally scheduled by the suite.
    100         @param retry_level: A retry might be triggered only if the result is
    101                 worse than the level.
    102         @param max_retries: Integer, maxmium total retries allowed
    103                                   for the suite. Default to None, no max.
    104         """
    105         self._retry_map = {}
    106         self._retry_level = retry_level
    107         self._max_retries = (max_retries
    108                              if max_retries is not None else sys.maxint)
    109         for job_id, test in initial_jobs_to_tests.items():
    110             if test.job_retries > 0:
    111                 self._add_job(new_job_id=job_id,
    112                               retry_max=test.job_retries)
    113             else:
    114                 logging.debug("Test %s has no retries", test.name)
    115 
    116 
    117     def _add_job(self, new_job_id, retry_max):
    118         """Add a newly-created job to the retry map.
    119 
    120         @param new_job_id: The afe_job_id of a newly created job.
    121         @param retry_max: The maximum of times that we could retry
    122                           the test if the job fails.
    123 
    124         @raises ValueError if new_job_id is already in retry map.
    125 
    126         """
    127         if new_job_id in self._retry_map:
    128             raise ValueError('add_job called when job is already in retry map.')
    129 
    130         self._retry_map[new_job_id] = {
    131                 'state': self.States.NOT_ATTEMPTED,
    132                 'retry_max': retry_max}
    133 
    134 
    135     def _suite_max_reached(self):
    136         """Return whether maximum retry limit for a suite has been reached."""
    137         return self._max_retries <= 0
    138 
    139 
    140     def add_retry(self, old_job_id, new_job_id):
    141         """Record a retry.
    142 
    143         Update retry map with the retry information.
    144 
    145         @param old_job_id: The afe_job_id of the job that is retried.
    146         @param new_job_id: The afe_job_id of the retry job.
    147 
    148         @raises KeyError if old_job_id isn't in the retry map.
    149         @raises ValueError if we have already retried or made an attempt
    150                 to retry the old job.
    151 
    152         """
    153         old_record = self._retry_map[old_job_id]
    154         if old_record['state'] != self.States.NOT_ATTEMPTED:
    155             raise ValueError(
    156                     'We have already retried or attempted to retry job %d' %
    157                     old_job_id)
    158         old_record['state'] = self.States.RETRIED
    159         self._add_job(new_job_id=new_job_id,
    160                       retry_max=old_record['retry_max'] - 1)
    161         self._max_retries -= 1
    162 
    163 
    164     def set_attempted(self, job_id):
    165         """Set the state of the job to ATTEMPTED.
    166 
    167         @param job_id: afe_job_id of a job.
    168 
    169         @raises KeyError if job_id isn't in the retry map.
    170         @raises ValueError if the current state is not NOT_ATTEMPTED.
    171 
    172         """
    173         current_state = self._retry_map[job_id]['state']
    174         if current_state != self.States.NOT_ATTEMPTED:
    175             # We are supposed to retry or attempt to retry each job
    176             # only once. Raise an error if this is not the case.
    177             raise ValueError('Unexpected state transition: %s -> %s' %
    178                              (self.States.get_string(current_state),
    179                               self.States.get_string(self.States.ATTEMPTED)))
    180         else:
    181             self._retry_map[job_id]['state'] = self.States.ATTEMPTED
    182 
    183 
    184     def has_following_retry(self, result):
    185         """Check whether there will be a following retry.
    186 
    187         We have the following cases for a given job id (result.id),
    188         - no retry map entry -> retry not required, no following retry
    189         - has retry map entry:
    190             - already retried -> has following retry
    191             - has not retried
    192                 (this branch can be handled by checking should_retry(result))
    193                 - retry_max == 0 --> the last retry job, no more retry
    194                 - retry_max > 0
    195                    - attempted, but has failed in scheduling a
    196                      following retry due to rpc error  --> no more retry
    197                    - has not attempped --> has following retry if test failed.
    198 
    199         @param result: A result, encapsulating the status of the job.
    200 
    201         @returns: True, if there will be a following retry.
    202                   False otherwise.
    203 
    204         """
    205         return (result.test_executed
    206                 and result.id in self._retry_map
    207                 and (self._retry_map[result.id]['state'] == self.States.RETRIED
    208                      or self._should_retry(result)))
    209 
    210 
    211     def _should_retry(self, result):
    212         """Check whether we should retry a job based on its result.
    213 
    214         We will retry the job that corresponds to the result
    215         when all of the following are true.
    216         a) The test was actually executed, meaning that if
    217            a job was aborted before it could ever reach the state
    218            of 'Running', the job will not be retried.
    219         b) The result is worse than |self._retry_level| which
    220            defaults to 'WARN'.
    221         c) The test requires retry, i.e. the job has an entry in the retry map.
    222         d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
    223            Note that if a test has JOB_RETRIES=5, and the second time
    224            it was retried it hit an rpc error, we will give up on
    225            all following retries.
    226         e) The job has not reached its retry max, i.e. retry_max > 0
    227 
    228         @param result: A result, encapsulating the status of the job.
    229 
    230         @returns: True if we should retry the job.
    231 
    232         """
    233         return (
    234             result.test_executed
    235             and result.id in self._retry_map
    236             and not self._suite_max_reached()
    237             and result.is_worse_than(
    238                 job_status.Status(self._retry_level, '', 'reason'))
    239             and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
    240             and self._retry_map[result.id]['retry_max'] > 0
    241         )
    242 
    243     def _should_retry_local_job(self, job_id):
    244         """Check whether we should retry a job based on information available
    245         for a local job without a Result object.
    246 
    247         We will retry the job that corresponds to the result
    248         when all of the following are true.
    249         a) The test requires retry, i.e. the job has an entry in the retry map.
    250         b) We haven't made any retry attempt yet for this job, i.e.
    251            state == NOT_ATTEMPTED
    252            If the job is aborted,  we will give up on all following retries,
    253            regardless of max_retries.
    254         c) The job has not reached its retry max, i.e. retry_max > 0
    255 
    256         @param job_id: the id for the job, to look up relevant information.
    257 
    258         @returns: True if we should retry the job.
    259 
    260         """
    261         if self._suite_max_reached():
    262             logging.debug('suite max_retries reached, not retrying.')
    263             return False
    264         if job_id not in self._retry_map:
    265             logging.debug('job_id not in retry map, not retrying.')
    266             return False
    267         if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
    268             logging.debug("job state was %s not 'Not Attempted', not retrying",
    269                           self._retry_map[job_id]['state'])
    270             return False
    271         if self._retry_map[job_id]['retry_max'] <= 0:
    272             logging.debug('test-level retries exhausted, not retrying')
    273             return False
    274         return True
    275 
    276 
    277     def job_present(self, job_id):
    278         """Check whether a job id present in the retry map.
    279 
    280         @param job_id: afe_job_id of a job.
    281 
    282         @returns: A True if the job is present, False if not.
    283         """
    284         return bool(self._retry_map.get(job_id))
    285 
    286 
    287 
    288     def get_retry_max(self, job_id):
    289         """Get the maximum times the job can still be retried.
    290 
    291         @param job_id: afe_job_id of a job.
    292 
    293         @returns: An int, representing the maximum times the job can still be
    294                   retried.
    295         @raises KeyError if job_id isn't in the retry map.
    296 
    297         """
    298         return self._retry_map[job_id]['retry_max']
    299 
    300 
    301 class _SuiteChildJobCreator(object):
    302     """Create test jobs for a suite."""
    303 
    304     def __init__(
    305             self,
    306             tag,
    307             builds,
    308             board,
    309             afe=None,
    310             max_runtime_mins=24*60,
    311             timeout_mins=24*60,
    312             suite_job_id=None,
    313             ignore_deps=False,
    314             extra_deps=(),
    315             priority=priorities.Priority.DEFAULT,
    316             offload_failures_only=False,
    317             test_source_build=None,
    318             job_keyvals=None,
    319     ):
    320         """
    321         Constructor
    322 
    323         @param tag: a string with which to tag jobs run in this suite.
    324         @param builds: the builds on which we're running this suite.
    325         @param board: the board on which we're running this suite.
    326         @param afe: an instance of AFE as defined in server/frontend.py.
    327         @param max_runtime_mins: Maximum suite runtime, in minutes.
    328         @param timeout_mins: Maximum job lifetime, in minutes.
    329         @param suite_job_id: Job id that will act as parent id to all sub jobs.
    330                              Default: None
    331         @param ignore_deps: True if jobs should ignore the DEPENDENCIES
    332                             attribute and skip applying of dependency labels.
    333                             (Default:False)
    334         @param extra_deps: A list of strings which are the extra DEPENDENCIES
    335                            to add to each test being scheduled.
    336         @param priority: Integer priority level.  Higher is more important.
    337         @param offload_failures_only: Only enable gs_offloading for failed
    338                                       jobs.
    339         @param test_source_build: Build that contains the server-side test code.
    340         @param job_keyvals: General job keyvals to be inserted into keyval file,
    341                             which will be used by tko/parse later.
    342         """
    343         self._tag = tag
    344         self._builds = builds
    345         self._board = board
    346         self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
    347                                                          delay_sec=10,
    348                                                          debug=False)
    349         self._max_runtime_mins = max_runtime_mins
    350         self._timeout_mins = timeout_mins
    351         self._suite_job_id = suite_job_id
    352         self._ignore_deps = ignore_deps
    353         self._extra_deps = tuple(extra_deps)
    354         self._priority = priority
    355         self._offload_failures_only = offload_failures_only
    356         self._test_source_build = test_source_build
    357         self._job_keyvals = job_keyvals
    358 
    359 
    360     @property
    361     def cros_build(self):
    362         """Return the CrOS build or the first build in the builds dict."""
    363         # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
    364         # sure what the implications of this are, but it's probably not a
    365         # good thing.
    366         return self._builds.get(provision.CROS_VERSION_PREFIX,
    367                                 self._builds.values()[0])
    368 
    369 
    370     def create_job(self, test, retry_for=None):
    371         """
    372         Thin wrapper around frontend.AFE.create_job().
    373 
    374         @param test: ControlData object for a test to run.
    375         @param retry_for: If the to-be-created job is a retry for an
    376                           old job, the afe_job_id of the old job will
    377                           be passed in as |retry_for|, which will be
    378                           recorded in the new job's keyvals.
    379         @returns: A frontend.Job object with an added test_name member.
    380                   test_name is used to preserve the higher level TEST_NAME
    381                   name of the job.
    382         """
    383         # For a system running multiple suites which share tests, the priority
    384         # overridden may lead to unexpected scheduling order that adds extra
    385         # provision jobs.
    386         test_priority = self._priority
    387         if utils.is_moblab():
    388             test_priority = max(self._priority, test.priority)
    389 
    390         reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
    391                          else None)
    392 
    393         test_obj = self._afe.create_job(
    394             control_file=test.text,
    395             name=tools.create_job_name(
    396                     self._test_source_build or self.cros_build,
    397                     self._tag,
    398                     test.name),
    399             control_type=test.test_type.capitalize(),
    400             meta_hosts=[self._board]*test.sync_count,
    401             dependencies=self._create_job_deps(test),
    402             keyvals=self._create_keyvals_for_test_job(test, retry_for),
    403             max_runtime_mins=self._max_runtime_mins,
    404             timeout_mins=self._timeout_mins,
    405             parent_job_id=self._suite_job_id,
    406             reboot_before=reboot_before,
    407             run_reset=not test.fast,
    408             priority=test_priority,
    409             synch_count=test.sync_count,
    410             require_ssp=test.require_ssp)
    411 
    412         test_obj.test_name = test.name
    413         return test_obj
    414 
    415 
    416     def _create_job_deps(self, test):
    417         """Create job deps list for a test job.
    418 
    419         @returns: A list of dependency strings.
    420         """
    421         if self._ignore_deps:
    422             job_deps = []
    423         else:
    424             job_deps = list(test.dependencies)
    425         job_deps.extend(self._extra_deps)
    426         return job_deps
    427 
    428 
    429     def _create_keyvals_for_test_job(self, test, retry_for=None):
    430         """Create keyvals dict for creating a test job.
    431 
    432         @param test: ControlData object for a test to run.
    433         @param retry_for: If the to-be-created job is a retry for an
    434                           old job, the afe_job_id of the old job will
    435                           be passed in as |retry_for|, which will be
    436                           recorded in the new job's keyvals.
    437         @returns: A keyvals dict for creating the test job.
    438         """
    439         keyvals = {
    440             constants.JOB_BUILD_KEY: self.cros_build,
    441             constants.JOB_SUITE_KEY: self._tag,
    442             constants.JOB_EXPERIMENTAL_KEY: test.experimental,
    443             constants.JOB_BUILDS_KEY: self._builds
    444         }
    445         # test_source_build is saved to job_keyvals so scheduler can retrieve
    446         # the build name from database when compiling autoserv commandline.
    447         # This avoid a database change to add a new field in afe_jobs.
    448         #
    449         # Only add `test_source_build` to job keyvals if the build is different
    450         # from the CrOS build or the job uses more than one build, e.g., both
    451         # firmware and CrOS will be updated in the dut.
    452         # This is for backwards compatibility, so the update Autotest code can
    453         # compile an autoserv command line to run in a SSP container using
    454         # previous builds.
    455         if (self._test_source_build and
    456             (self.cros_build != self._test_source_build or
    457              len(self._builds) > 1)):
    458             keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
    459                     self._test_source_build
    460             for prefix, build in self._builds.iteritems():
    461                 if prefix == provision.FW_RW_VERSION_PREFIX:
    462                     keyvals[constants.FWRW_BUILD]= build
    463                 elif prefix == provision.FW_RO_VERSION_PREFIX:
    464                     keyvals[constants.FWRO_BUILD] = build
    465         # Add suite job id to keyvals so tko parser can read it from keyval
    466         # file.
    467         if self._suite_job_id:
    468             keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
    469         # We drop the old job's id in the new job's keyval file so that
    470         # later our tko parser can figure out the retry relationship and
    471         # invalidate the results of the old job in tko database.
    472         if retry_for:
    473             keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
    474         if self._offload_failures_only:
    475             keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
    476         if self._job_keyvals:
    477             for key in constants.INHERITED_KEYVALS:
    478                 if key in self._job_keyvals:
    479                     keyvals[key] = self._job_keyvals[key]
    480         return keyvals
    481 
    482 
    483 class _ControlFileRetriever(object):
    484     """Retrieves control files.
    485 
    486     This returns control data instances, unlike control file getters
    487     which simply return the control file text contents.
    488     """
    489 
    490     def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
    491                  test_args=None):
    492         """Initialize instance.
    493 
    494         @param cf_getter: a control_file_getter.ControlFileGetter used to list
    495                and fetch the content of control files
    496         @param forgiving_parser: If False, will raise ControlVariableExceptions
    497                                  if any are encountered when parsing control
    498                                  files. Note that this can raise an exception
    499                                  for syntax errors in unrelated files, because
    500                                  we parse them before applying the predicate.
    501         @param run_prod_code: If true, the retrieved tests will run the test
    502                               code that lives in prod aka the test code
    503                               currently on the lab servers by disabling
    504                               SSP for the discovered tests.
    505         @param test_args: A dict of args to be seeded in test control file under
    506                           the name |args_dict|.
    507         """
    508         self._cf_getter = cf_getter
    509         self._forgiving_parser = forgiving_parser
    510         self._run_prod_code = run_prod_code
    511         self._test_args = test_args
    512 
    513 
    514     def retrieve_for_test(self, test_name):
    515         """Retrieve a test's control data.
    516 
    517         This ignores forgiving_parser because we cannot return a
    518         forgiving value.
    519 
    520         @param test_name: Name of test to retrieve.
    521 
    522         @raises ControlVariableException: There is a syntax error in a
    523                                           control file.
    524 
    525         @returns a ControlData object
    526         """
    527         return suite_common.retrieve_control_data_for_test(
    528                 self._cf_getter, test_name)
    529 
    530 
    531     def retrieve_for_suite(self, suite_name=''):
    532         """Scan through all tests and find all tests.
    533 
    534         @param suite_name: If specified, this method will attempt to restrain
    535                            the search space to just this suite's control files.
    536 
    537         @raises ControlVariableException: If forgiving_parser is False and there
    538                                           is a syntax error in a control file.
    539 
    540         @returns a dictionary of ControlData objects that based on given
    541                  parameters.
    542         """
    543         tests = suite_common.retrieve_for_suite(
    544                 self._cf_getter, suite_name, self._forgiving_parser,
    545                 self._test_args)
    546         if self._run_prod_code:
    547             for test in tests.itervalues():
    548                 test.require_ssp = False
    549 
    550         return tests
    551 
    552 
    553 def list_all_suites(build, devserver, cf_getter=None):
    554     """
    555     Parses all ControlData objects with a SUITE tag and extracts all
    556     defined suite names.
    557 
    558     @param build: the build on which we're running this suite.
    559     @param devserver: the devserver which contains the build.
    560     @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
    561                       using DevServerGetter.
    562 
    563     @return list of suites
    564     """
    565     if cf_getter is None:
    566         cf_getter = _create_ds_getter(build, devserver)
    567 
    568     suites = set()
    569     predicate = lambda t: True
    570     for test in find_and_parse_tests(cf_getter, predicate):
    571         suites.update(test.suite_tag_parts)
    572     return list(suites)
    573 
    574 
    575 def test_file_similarity_predicate(test_file_pattern):
    576     """Returns predicate that gets the similarity based on a test's file
    577     name pattern.
    578 
    579     Builds a predicate that takes in a parsed control file (a ControlData)
    580     and returns a tuple of (file path, ratio), where ratio is the
    581     similarity between the test file name and the given test_file_pattern.
    582 
    583     @param test_file_pattern: regular expression (string) to match against
    584                               control file names.
    585     @return a callable that takes a ControlData and and returns a tuple of
    586             (file path, ratio), where ratio is the similarity between the
    587             test file name and the given test_file_pattern.
    588     """
    589     return lambda t: ((None, 0) if not hasattr(t, 'path') else
    590             (t.path, difflib.SequenceMatcher(a=t.path,
    591                                              b=test_file_pattern).ratio()))
    592 
    593 
    594 def test_name_similarity_predicate(test_name):
    595     """Returns predicate that matched based on a test's name.
    596 
    597     Builds a predicate that takes in a parsed control file (a ControlData)
    598     and returns a tuple of (test name, ratio), where ratio is the similarity
    599     between the test name and the given test_name.
    600 
    601     @param test_name: the test name to base the predicate on.
    602     @return a callable that takes a ControlData and returns a tuple of
    603             (test name, ratio), where ratio is the similarity between the
    604             test name and the given test_name.
    605     """
    606     return lambda t: ((None, 0) if not hasattr(t, 'name') else
    607             (t.name,
    608              difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
    609 
    610 
    611 def matches_attribute_expression_predicate(test_attr_boolstr):
    612     """Returns predicate that matches based on boolean expression of
    613     attributes.
    614 
    615     Builds a predicate that takes in a parsed control file (a ControlData)
    616     ans returns True if the test attributes satisfy the given attribute
    617     boolean expression.
    618 
    619     @param test_attr_boolstr: boolean expression of the attributes to be
    620                               test, like 'system:all and interval:daily'.
    621 
    622     @return a callable that takes a ControlData and returns True if the test
    623             attributes satisfy the given boolean expression.
    624     """
    625     return lambda t: boolparse_lib.BoolstrResult(
    626         test_attr_boolstr, t.attributes)
    627 
    628 
    629 def test_file_matches_pattern_predicate(test_file_pattern):
    630     """Returns predicate that matches based on a test's file name pattern.
    631 
    632     Builds a predicate that takes in a parsed control file (a ControlData)
    633     and returns True if the test's control file name matches the given
    634     regular expression.
    635 
    636     @param test_file_pattern: regular expression (string) to match against
    637                               control file names.
    638     @return a callable that takes a ControlData and and returns
    639             True if control file name matches the pattern.
    640     """
    641     return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
    642                                                      t.path)
    643 
    644 
    645 def test_name_matches_pattern_predicate(test_name_pattern):
    646     """Returns predicate that matches based on a test's name pattern.
    647 
    648     Builds a predicate that takes in a parsed control file (a ControlData)
    649     and returns True if the test name matches the given regular expression.
    650 
    651     @param test_name_pattern: regular expression (string) to match against
    652                               test names.
    653     @return a callable that takes a ControlData and returns
    654             True if the name fields matches the pattern.
    655     """
    656     return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
    657                                                      t.name)
    658 
    659 
    660 def test_name_equals_predicate(test_name):
    661     """Returns predicate that matched based on a test's name.
    662 
    663     Builds a predicate that takes in a parsed control file (a ControlData)
    664     and returns True if the test name is equal to |test_name|.
    665 
    666     @param test_name: the test name to base the predicate on.
    667     @return a callable that takes a ControlData and looks for |test_name|
    668             in that ControlData's name.
    669     """
    670     return lambda t: hasattr(t, 'name') and test_name == t.name
    671 
    672 
    673 def name_in_tag_similarity_predicate(name):
    674     """Returns predicate that takes a control file and gets the similarity
    675     of the suites in the control file and the given name.
    676 
    677     Builds a predicate that takes in a parsed control file (a ControlData)
    678     and returns a list of tuples of (suite name, ratio), where suite name
    679     is each suite listed in the control file, and ratio is the similarity
    680     between each suite and the given name.
    681 
    682     @param name: the suite name to base the predicate on.
    683     @return a callable that takes a ControlData and returns a list of tuples
    684             of (suite name, ratio), where suite name is each suite listed in
    685             the control file, and ratio is the similarity between each suite
    686             and the given name.
    687     """
    688     return lambda t: [(suite,
    689                        difflib.SequenceMatcher(a=suite, b=name).ratio())
    690                       for suite in t.suite_tag_parts] or [(None, 0)]
    691 
    692 
    693 def name_in_tag_predicate(name):
    694     """Returns predicate that takes a control file and looks for |name|.
    695 
    696     Builds a predicate that takes in a parsed control file (a ControlData)
    697     and returns True if the SUITE tag is present and contains |name|.
    698 
    699     @param name: the suite name to base the predicate on.
    700     @return a callable that takes a ControlData and looks for |name| in that
    701             ControlData object's suite member.
    702     """
    703     return suite_common.name_in_tag_predicate(name)
    704 
    705 
    706 def create_fs_getter(autotest_dir):
    707     """
    708     @param autotest_dir: the place to find autotests.
    709     @return a FileSystemGetter instance that looks under |autotest_dir|.
    710     """
    711     # currently hard-coded places to look for tests.
    712     subpaths = ['server/site_tests', 'client/site_tests',
    713                 'server/tests', 'client/tests']
    714     directories = [os.path.join(autotest_dir, p) for p in subpaths]
    715     return control_file_getter.FileSystemGetter(directories)
    716 
    717 
    718 def _create_ds_getter(build, devserver):
    719     """
    720     @param build: the build on which we're running this suite.
    721     @param devserver: the devserver which contains the build.
    722     @return a FileSystemGetter instance that looks under |autotest_dir|.
    723     """
    724     return control_file_getter.DevServerGetter(build, devserver)
    725 
    726 
    727 def _non_experimental_tests_predicate(test_data):
    728     """Test predicate for non-experimental tests."""
    729     return not test_data.experimental
    730 
    731 
    732 def find_and_parse_tests(cf_getter, predicate, suite_name='',
    733                          add_experimental=False, forgiving_parser=True,
    734                          run_prod_code=False, test_args=None):
    735     """
    736     Function to scan through all tests and find eligible tests.
    737 
    738     Search through all tests based on given cf_getter, suite_name,
    739     add_experimental and forgiving_parser, return the tests that match
    740     given predicate.
    741 
    742     @param cf_getter: a control_file_getter.ControlFileGetter used to list
    743            and fetch the content of control files
    744     @param predicate: a function that should return True when run over a
    745            ControlData representation of a control file that should be in
    746            this Suite.
    747     @param suite_name: If specified, this method will attempt to restrain
    748                        the search space to just this suite's control files.
    749     @param add_experimental: add tests with experimental attribute set.
    750     @param forgiving_parser: If False, will raise ControlVariableExceptions
    751                              if any are encountered when parsing control
    752                              files. Note that this can raise an exception
    753                              for syntax errors in unrelated files, because
    754                              we parse them before applying the predicate.
    755     @param run_prod_code: If true, the suite will run the test code that
    756                           lives in prod aka the test code currently on the
    757                           lab servers by disabling SSP for the discovered
    758                           tests.
    759     @param test_args: A dict of args to be seeded in test control file.
    760 
    761     @raises ControlVariableException: If forgiving_parser is False and there
    762                                       is a syntax error in a control file.
    763 
    764     @return list of ControlData objects that should be run, with control
    765             file text added in |text| attribute. Results are sorted based
    766             on the TIME setting in control file, slowest test comes first.
    767     """
    768     logging.debug('Getting control file list for suite: %s', suite_name)
    769     retriever = _ControlFileRetriever(cf_getter,
    770                                       forgiving_parser=forgiving_parser,
    771                                       run_prod_code=run_prod_code,
    772                                       test_args=test_args)
    773     tests = retriever.retrieve_for_suite(suite_name)
    774     if not add_experimental:
    775         predicate = _ComposedPredicate([predicate,
    776                                         _non_experimental_tests_predicate])
    777     return suite_common.filter_tests(tests, predicate)
    778 
    779 
    780 def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
    781     """
    782     Function to scan through all tests and find possible tests.
    783 
    784     Search through all tests based on given cf_getter, suite_name,
    785     add_experimental and forgiving_parser. Use the given predicate to
    786     calculate the similarity and return the top 10 matches.
    787 
    788     @param cf_getter: a control_file_getter.ControlFileGetter used to list
    789            and fetch the content of control files
    790     @param predicate: a function that should return a tuple of (name, ratio)
    791            when run over a ControlData representation of a control file that
    792            should be in this Suite. `name` is the key to be compared, e.g.,
    793            a suite name or test name. `ratio` is a value between [0,1]
    794            indicating the similarity of `name` and the value to be compared.
    795     @param suite_name: If specified, this method will attempt to restrain
    796                        the search space to just this suite's control files.
    797     @param count: Number of suggestions to return, default to 10.
    798 
    799     @return list of top names that similar to the given test, sorted by
    800             match ratio.
    801     """
    802     logging.debug('Getting control file list for suite: %s', suite_name)
    803     tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
    804     logging.debug('Parsed %s control files.', len(tests))
    805     similarities = {}
    806     for test in tests.itervalues():
    807         ratios = predicate(test)
    808         # Some predicates may return a list of tuples, e.g.,
    809         # name_in_tag_similarity_predicate. Convert all returns to a list.
    810         if not isinstance(ratios, list):
    811             ratios = [ratios]
    812         for name, ratio in ratios:
    813             similarities[name] = ratio
    814     return [s[0] for s in
    815             sorted(similarities.items(), key=operator.itemgetter(1),
    816                    reverse=True)][:count]
    817 
    818 
    819 def _deprecated_suite_method(func):
    820     """Decorator for deprecated Suite static methods.
    821 
    822     TODO(ayatane): This is used to decorate functions that are called as
    823     static methods on Suite.
    824     """
    825     @functools.wraps(func)
    826     def wrapper(*args, **kwargs):
    827         """Wraps |func| for warning."""
    828         warnings.warn('Calling method "%s" from Suite is deprecated' %
    829                       func.__name__)
    830         return func(*args, **kwargs)
    831     return staticmethod(wrapper)
    832 
    833 
    834 class _BaseSuite(object):
    835     """
    836     A suite of tests, defined by some predicate over control file variables.
    837 
    838     Given a place to search for control files a predicate to match the desired
    839     tests, can gather tests and fire off jobs to run them, and then wait for
    840     results.
    841 
    842     @var _predicate: a function that should return True when run over a
    843          ControlData representation of a control file that should be in
    844          this Suite.
    845     @var _tag: a string with which to tag jobs run in this suite.
    846     @var _builds: the builds on which we're running this suite.
    847     @var _afe: an instance of AFE as defined in server/frontend.py.
    848     @var _tko: an instance of TKO as defined in server/frontend.py.
    849     @var _jobs: currently scheduled jobs, if any.
    850     @var _jobs_to_tests: a dictionary that maps job ids to tests represented
    851                          ControlData objects.
    852     @var _retry: a bool value indicating whether jobs should be retried on
    853                  failure.
    854     @var _retry_handler: a RetryHandler object.
    855 
    856     """
    857 
    858 
    859     def __init__(
    860             self,
    861             tests,
    862             tag,
    863             builds,
    864             board,
    865             afe=None,
    866             tko=None,
    867             pool=None,
    868             results_dir=None,
    869             max_runtime_mins=24*60,
    870             timeout_mins=24*60,
    871             file_bugs=False,
    872             suite_job_id=None,
    873             ignore_deps=False,
    874             extra_deps=None,
    875             priority=priorities.Priority.DEFAULT,
    876             wait_for_results=True,
    877             job_retry=False,
    878             max_retries=sys.maxint,
    879             offload_failures_only=False,
    880             test_source_build=None,
    881             job_keyvals=None,
    882             child_dependencies=(),
    883             result_reporter=None,
    884     ):
    885         """Initialize instance.
    886 
    887         @param tests: Iterable of tests to run.
    888         @param tag: a string with which to tag jobs run in this suite.
    889         @param builds: the builds on which we're running this suite.
    890         @param board: the board on which we're running this suite.
    891         @param afe: an instance of AFE as defined in server/frontend.py.
    892         @param tko: an instance of TKO as defined in server/frontend.py.
    893         @param pool: Specify the pool of machines to use for scheduling
    894                 purposes.
    895         @param results_dir: The directory where the job can write results to.
    896                             This must be set if you want job_id of sub-jobs
    897                             list in the job keyvals.
    898         @param max_runtime_mins: Maximum suite runtime, in minutes.
    899         @param timeout: Maximum job lifetime, in hours.
    900         @param suite_job_id: Job id that will act as parent id to all sub jobs.
    901                              Default: None
    902         @param ignore_deps: True if jobs should ignore the DEPENDENCIES
    903                             attribute and skip applying of dependency labels.
    904                             (Default:False)
    905         @param extra_deps: A list of strings which are the extra DEPENDENCIES
    906                            to add to each test being scheduled.
    907         @param priority: Integer priority level.  Higher is more important.
    908         @param wait_for_results: Set to False to run the suite job without
    909                                  waiting for test jobs to finish. Default is
    910                                  True.
    911         @param job_retry: A bool value indicating whether jobs should be retried
    912                           on failure. If True, the field 'JOB_RETRIES' in
    913                           control files will be respected. If False, do not
    914                           retry.
    915         @param max_retries: Maximum retry limit at suite level.
    916                             Regardless how many times each individual test
    917                             has been retried, the total number of retries
    918                             happening in the suite can't exceed _max_retries.
    919                             Default to sys.maxint.
    920         @param offload_failures_only: Only enable gs_offloading for failed
    921                                       jobs.
    922         @param test_source_build: Build that contains the server-side test code.
    923         @param job_keyvals: General job keyvals to be inserted into keyval file,
    924                             which will be used by tko/parse later.
    925         @param child_dependencies: (optional) list of dependency strings
    926                 to be added as dependencies to child jobs.
    927         @param result_reporter: A _ResultReporter instance to report results. If
    928                 None, an _EmailReporter will be created.
    929         """
    930 
    931         self.tests = list(tests)
    932         self._tag = tag
    933         self._builds = builds
    934         self._results_dir = results_dir
    935         self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
    936                                                          delay_sec=10,
    937                                                          debug=False)
    938         self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
    939                                                          delay_sec=10,
    940                                                          debug=False)
    941         self._jobs = []
    942         self._jobs_to_tests = {}
    943 
    944         self._file_bugs = file_bugs
    945         self._suite_job_id = suite_job_id
    946         self._job_retry=job_retry
    947         self._max_retries = max_retries
    948         # RetryHandler to be initialized in schedule()
    949         self._retry_handler = None
    950         self.wait_for_results = wait_for_results
    951         self._job_keyvals = job_keyvals
    952         if result_reporter is None:
    953             self._result_reporter = _EmailReporter(self)
    954         else:
    955             self._result_reporter = result_reporter
    956 
    957         if extra_deps is None:
    958             extra_deps = []
    959         extra_deps.append(board)
    960         if pool:
    961             extra_deps.append(pool)
    962         extra_deps.extend(child_dependencies)
    963         self._dependencies = tuple(extra_deps)
    964 
    965         self._job_creator = _SuiteChildJobCreator(
    966             tag=tag,
    967             builds=builds,
    968             board=board,
    969             afe=afe,
    970             max_runtime_mins=max_runtime_mins,
    971             timeout_mins=timeout_mins,
    972             suite_job_id=suite_job_id,
    973             ignore_deps=ignore_deps,
    974             extra_deps=extra_deps,
    975             priority=priority,
    976             offload_failures_only=offload_failures_only,
    977             test_source_build=test_source_build,
    978             job_keyvals=job_keyvals,
    979         )
    980 
    981 
    982     def _schedule_test(self, record, test, retry_for=None):
    983         """Schedule a single test and return the job.
    984 
    985         Schedule a single test by creating a job, and then update relevant
    986         data structures that are used to keep track of all running jobs.
    987 
    988         Emits a TEST_NA status log entry if it failed to schedule the test due
    989         to NoEligibleHostException or a non-existent board label.
    990 
    991         Returns a frontend.Job object if the test is successfully scheduled.
    992         If scheduling failed due to NoEligibleHostException or a non-existent
    993         board label, returns None.
    994 
    995         @param record: A callable to use for logging.
    996                        prototype: record(base_job.status_log_entry)
    997         @param test: ControlData for a test to run.
    998         @param retry_for: If we are scheduling a test to retry an
    999                           old job, the afe_job_id of the old job
   1000                           will be passed in as |retry_for|.
   1001 
   1002         @returns: A frontend.Job object or None
   1003         """
   1004         msg = 'Scheduling %s' % test.name
   1005         if retry_for:
   1006             msg = msg + ', to retry afe job %d' % retry_for
   1007         logging.debug(msg)
   1008         begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
   1009         try:
   1010             job = self._job_creator.create_job(test, retry_for=retry_for)
   1011         except (error.NoEligibleHostException, proxy.ValidationError) as e:
   1012             if (isinstance(e, error.NoEligibleHostException)
   1013                 or (isinstance(e, proxy.ValidationError)
   1014                     and _is_nonexistent_board_error(e))):
   1015                 # Treat a dependency on a non-existent board label the same as
   1016                 # a dependency on a board that exists, but for which there's no
   1017                 # hardware.
   1018                 logging.debug('%s not applicable for this board/pool. '
   1019                               'Emitting TEST_NA.', test.name)
   1020                 Status('TEST_NA', test.name,
   1021                        'Skipping:  test not supported on this board/pool.',
   1022                        begin_time_str=begin_time_str).record_all(record)
   1023                 return None
   1024             else:
   1025                 raise e
   1026         except (error.RPCException, proxy.JSONRPCException):
   1027             if retry_for:
   1028                 # Mark that we've attempted to retry the old job.
   1029                 logging.debug("RPC exception occurred")
   1030                 self._retry_handler.set_attempted(job_id=retry_for)
   1031             raise
   1032         else:
   1033             self._jobs.append(job)
   1034             self._jobs_to_tests[job.id] = test
   1035             if retry_for:
   1036                 # A retry job was just created, record it.
   1037                 self._retry_handler.add_retry(
   1038                         old_job_id=retry_for, new_job_id=job.id)
   1039                 retry_count = (test.job_retries -
   1040                                self._retry_handler.get_retry_max(job.id))
   1041                 logging.debug('Job %d created to retry job %d. '
   1042                               'Have retried for %d time(s)',
   1043                               job.id, retry_for, retry_count)
   1044             self._remember_job_keyval(job)
   1045             return job
   1046 
   1047     def schedule(self, record):
   1048         """
   1049         Schedule jobs using |self._afe|.
   1050 
   1051         frontend.Job objects representing each scheduled job will be put in
   1052         |self._jobs|.
   1053 
   1054         @param record: A callable to use for logging.
   1055                        prototype: record(base_job.status_log_entry)
   1056         @returns: The number of tests that were scheduled.
   1057         """
   1058         scheduled_test_names = []
   1059         logging.debug('Discovered %d tests.', len(self.tests))
   1060 
   1061         Status('INFO', 'Start %s' % self._tag).record_result(record)
   1062         try:
   1063             # Write job_keyvals into keyval file.
   1064             if self._job_keyvals:
   1065                 utils.write_keyval(self._results_dir, self._job_keyvals)
   1066 
   1067             # TODO(crbug.com/730885): This is a hack to protect tests that are
   1068             # not usually retried from getting hit by a provision error when run
   1069             # as part of a suite. Remove this hack once provision is separated
   1070             # out in its own suite.
   1071             self._bump_up_test_retries(self.tests)
   1072             for test in self.tests:
   1073                 scheduled_job = self._schedule_test(record, test)
   1074                 if scheduled_job is not None:
   1075                     scheduled_test_names.append(test.name)
   1076 
   1077             # Write the num of scheduled tests and name of them to keyval file.
   1078             logging.debug('Scheduled %d tests, writing the total to keyval.',
   1079                           len(scheduled_test_names))
   1080             utils.write_keyval(
   1081                 self._results_dir,
   1082                 self._make_scheduled_tests_keyvals(scheduled_test_names))
   1083         except Exception:
   1084             logging.exception('Exception while scheduling suite')
   1085             Status('FAIL', self._tag,
   1086                    'Exception while scheduling suite').record_result(record)
   1087 
   1088         if self._job_retry:
   1089             logging.debug("Initializing RetryHandler for suite %s.", self._tag)
   1090             self._retry_handler = RetryHandler(
   1091                     initial_jobs_to_tests=self._jobs_to_tests,
   1092                     max_retries=self._max_retries)
   1093             logging.debug("retry map created: %s ",
   1094                           self._retry_handler._retry_map)
   1095         else:
   1096             logging.info("Will not retry jobs from suite %s.", self._tag)
   1097         return len(scheduled_test_names)
   1098 
   1099 
   1100     def _bump_up_test_retries(self, tests):
   1101         """Bump up individual test retries to match suite retry options."""
   1102         if not self._job_retry:
   1103             return
   1104 
   1105         for test in tests:
   1106             # We do honor if a test insists on JOB_RETRIES = 0.
   1107             if test.job_retries is None:
   1108                 logging.debug(
   1109                         'Test %s did not request retries, but suite requires '
   1110                         'retries. Bumping retries up to 1. '
   1111                         '(See crbug.com/730885)',
   1112                         test.name)
   1113                 test.job_retries = 1
   1114 
   1115 
   1116     def _make_scheduled_tests_keyvals(self, scheduled_test_names):
   1117         """Make a keyvals dict to write for scheduled test names.
   1118 
   1119         @param scheduled_test_names: A list of scheduled test name strings.
   1120 
   1121         @returns: A keyvals dict.
   1122         """
   1123         return {
   1124             constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
   1125             constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
   1126         }
   1127 
   1128 
   1129     def _should_report(self, result):
   1130         """
   1131         Returns True if this failure requires to be reported.
   1132 
   1133         @param result: A result, encapsulating the status of the failed job.
   1134         @return: True if we should report this failure.
   1135         """
   1136         return (self._file_bugs and result.test_executed and
   1137                 not result.is_testna() and
   1138                 result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
   1139 
   1140 
   1141     def _has_retry(self, result):
   1142         """
   1143         Return True if this result gets to retry.
   1144 
   1145         @param result: A result, encapsulating the status of the failed job.
   1146         @return: bool
   1147         """
   1148         return (self._job_retry
   1149                 and self._retry_handler.has_following_retry(result))
   1150 
   1151 
   1152     def wait(self, record):
   1153         """
   1154         Polls for the job statuses, using |record| to print status when each
   1155         completes.
   1156 
   1157         @param record: callable that records job status.
   1158                  prototype:
   1159                    record(base_job.status_log_entry)
   1160         """
   1161         waiter = job_status.JobResultWaiter(self._afe, self._tko)
   1162         try:
   1163             if self._suite_job_id:
   1164                 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
   1165             else:
   1166                 logging.warning('Unknown suite_job_id, falling back to less '
   1167                                 'efficient results_generator.')
   1168                 jobs = self._jobs
   1169             waiter.add_jobs(jobs)
   1170             for result in waiter.wait_for_results():
   1171                 self._handle_result(result=result, record=record, waiter=waiter)
   1172                 if self._finished_waiting():
   1173                     break
   1174         except Exception:  # pylint: disable=W0703
   1175             logging.exception('Exception waiting for results')
   1176             Status('FAIL', self._tag,
   1177                    'Exception waiting for results').record_result(record)
   1178 
   1179 
   1180     def _finished_waiting(self):
   1181         """Return whether the suite is finished waiting for child jobs."""
   1182         return False
   1183 
   1184 
   1185     def _handle_result(self, result, record, waiter):
   1186         """
   1187         Handle a test job result.
   1188 
   1189         @param result: Status instance for job.
   1190         @param record: callable that records job status.
   1191                  prototype:
   1192                    record(base_job.status_log_entry)
   1193         @param waiter: JobResultsWaiter instance.
   1194 
   1195         @instance_param _result_reporter: _ResultReporter instance.
   1196         """
   1197         self._record_result(result, record)
   1198         rescheduled = False
   1199         if self._job_retry and self._retry_handler._should_retry(result):
   1200             rescheduled = self._retry_result(result, record, waiter)
   1201         # TODO (crbug.com/751428): If the suite times out before a retry could
   1202         # finish, we would lose the chance to report errors from the original
   1203         # job.
   1204         if self._has_retry(result) and rescheduled:
   1205              return
   1206 
   1207         if self._should_report(result):
   1208             self._result_reporter.report(result)
   1209 
   1210     def _record_result(self, result, record):
   1211         """
   1212         Record a test job result.
   1213 
   1214         @param result: Status instance for job.
   1215         @param record: callable that records job status.
   1216                  prototype:
   1217                    record(base_job.status_log_entry)
   1218         """
   1219         result.record_all(record)
   1220         self._remember_job_keyval(result)
   1221 
   1222 
   1223     def _retry_result(self, result, record, waiter):
   1224         """
   1225         Retry a test job result.
   1226 
   1227         @param result: Status instance for job.
   1228         @param record: callable that records job status.
   1229                  prototype:
   1230                    record(base_job.status_log_entry)
   1231         @param waiter: JobResultsWaiter instance.
   1232         @returns: True if a job was scheduled for retry, False otherwise.
   1233         """
   1234         test = self._jobs_to_tests[result.id]
   1235         try:
   1236             # It only takes effect for CQ retriable job:
   1237             #   1) in first try, test.fast=True.
   1238             #   2) in second try, test will be run in normal mode, so reset
   1239             #       test.fast=False.
   1240             test.fast = False
   1241             new_job = self._schedule_test(
   1242                     record=record, test=test, retry_for=result.id)
   1243         except (error.RPCException, proxy.JSONRPCException) as e:
   1244             logging.error('Failed to schedule test: %s, Reason: %s',
   1245                           test.name, e)
   1246             return False
   1247         else:
   1248             waiter.add_job(new_job)
   1249             return bool(new_job)
   1250 
   1251     @property
   1252     def jobs(self):
   1253         """Give a copy of the associated jobs
   1254 
   1255         @returns: array of jobs"""
   1256         return [job for job in self._jobs]
   1257 
   1258 
   1259     @property
   1260     def _should_file_bugs(self):
   1261         """Return whether bugs should be filed.
   1262 
   1263         @returns: bool
   1264         """
   1265         # File bug when failure is one of the _FILE_BUG_SUITES,
   1266         # otherwise send an email to the owner anc cc.
   1267         return self._tag in _FILE_BUG_SUITES
   1268 
   1269 
   1270     def abort(self):
   1271         """
   1272         Abort all scheduled test jobs.
   1273         """
   1274         if self._jobs:
   1275             job_ids = [job.id for job in self._jobs]
   1276             self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
   1277 
   1278 
   1279     def _remember_job_keyval(self, job):
   1280         """
   1281         Record provided job as a suite job keyval, for later referencing.
   1282 
   1283         @param job: some representation of a job that has the attributes:
   1284                     id, test_name, and owner
   1285         """
   1286         if self._results_dir and job.id and job.owner and job.test_name:
   1287             job_id_owner = '%s-%s' % (job.id, job.owner)
   1288             logging.debug('Adding job keyval for %s=%s',
   1289                           job.test_name, job_id_owner)
   1290             utils.write_keyval(
   1291                 self._results_dir,
   1292                 {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
   1293 
   1294 
   1295 class Suite(_BaseSuite):
   1296     """
   1297     A suite of tests, defined by some predicate over control file variables.
   1298 
   1299     Given a place to search for control files a predicate to match the desired
   1300     tests, can gather tests and fire off jobs to run them, and then wait for
   1301     results.
   1302 
   1303     @var _predicate: a function that should return True when run over a
   1304          ControlData representation of a control file that should be in
   1305          this Suite.
   1306     @var _tag: a string with which to tag jobs run in this suite.
   1307     @var _builds: the builds on which we're running this suite.
   1308     @var _afe: an instance of AFE as defined in server/frontend.py.
   1309     @var _tko: an instance of TKO as defined in server/frontend.py.
   1310     @var _jobs: currently scheduled jobs, if any.
   1311     @var _jobs_to_tests: a dictionary that maps job ids to tests represented
   1312                          ControlData objects.
   1313     @var _cf_getter: a control_file_getter.ControlFileGetter
   1314     @var _retry: a bool value indicating whether jobs should be retried on
   1315                  failure.
   1316     @var _retry_handler: a RetryHandler object.
   1317 
   1318     """
   1319 
   1320     # TODO(ayatane): These methods are kept on the Suite class for
   1321     # backward compatibility.
   1322     find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
   1323     find_possible_tests = _deprecated_suite_method(find_possible_tests)
   1324     create_fs_getter = _deprecated_suite_method(create_fs_getter)
   1325     name_in_tag_predicate = _deprecated_suite_method(
   1326             suite_common.name_in_tag_predicate)
   1327     name_in_tag_similarity_predicate = _deprecated_suite_method(
   1328             name_in_tag_similarity_predicate)
   1329     test_name_equals_predicate = _deprecated_suite_method(
   1330             test_name_equals_predicate)
   1331     test_name_matches_pattern_predicate = _deprecated_suite_method(
   1332             test_name_matches_pattern_predicate)
   1333     test_file_matches_pattern_predicate = _deprecated_suite_method(
   1334             test_file_matches_pattern_predicate)
   1335     matches_attribute_expression_predicate = _deprecated_suite_method(
   1336             matches_attribute_expression_predicate)
   1337     test_name_similarity_predicate = _deprecated_suite_method(
   1338             test_name_similarity_predicate)
   1339     test_file_similarity_predicate = _deprecated_suite_method(
   1340             test_file_similarity_predicate)
   1341     list_all_suites = _deprecated_suite_method(list_all_suites)
   1342     get_test_source_build = _deprecated_suite_method(
   1343             suite_common.get_test_source_build)
   1344 
   1345 
   1346     @classmethod
   1347     def create_from_predicates(cls, predicates, builds, board, devserver,
   1348                                cf_getter=None, name='ad_hoc_suite',
   1349                                run_prod_code=False, **dargs):
   1350         """
   1351         Create a Suite using a given predicate test filters.
   1352 
   1353         Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
   1354         |autotest_dir| and will schedule them using |afe|.  Pulls control files
   1355         from the default dev server. Results will be pulled from |tko| upon
   1356         completion.
   1357 
   1358         @param predicates: A list of callables that accept ControlData
   1359                            representations of control files. A test will be
   1360                            included in suite if all callables in this list
   1361                            return True on the given control file.
   1362         @param builds: the builds on which we're running this suite. It's a
   1363                        dictionary of version_prefix:build.
   1364         @param board: the board on which we're running this suite.
   1365         @param devserver: the devserver which contains the build.
   1366         @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
   1367                           using DevServerGetter.
   1368         @param name: name of suite. Defaults to 'ad_hoc_suite'
   1369         @param run_prod_code: If true, the suite will run the tests that
   1370                               lives in prod aka the test code currently on the
   1371                               lab servers.
   1372         @param **dargs: Any other Suite constructor parameters, as described
   1373                         in Suite.__init__ docstring.
   1374         @return a Suite instance.
   1375         """
   1376         if cf_getter is None:
   1377             if run_prod_code:
   1378                 cf_getter = create_fs_getter(_AUTOTEST_DIR)
   1379             else:
   1380                 build = suite_common.get_test_source_build(builds, **dargs)
   1381                 cf_getter = _create_ds_getter(build, devserver)
   1382 
   1383         return cls(predicates,
   1384                    name, builds, board, cf_getter, run_prod_code, **dargs)
   1385 
   1386 
   1387     @classmethod
   1388     def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
   1389                          **dargs):
   1390         """
   1391         Create a Suite using a predicate based on the SUITE control file var.
   1392 
   1393         Makes a predicate based on |name| and uses it to instantiate a Suite
   1394         that looks for tests in |autotest_dir| and will schedule them using
   1395         |afe|.  Pulls control files from the default dev server.
   1396         Results will be pulled from |tko| upon completion.
   1397 
   1398         @param name: a value of the SUITE control file variable to search for.
   1399         @param builds: the builds on which we're running this suite. It's a
   1400                        dictionary of version_prefix:build.
   1401         @param board: the board on which we're running this suite.
   1402         @param devserver: the devserver which contains the build.
   1403         @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
   1404                           using DevServerGetter.
   1405         @param **dargs: Any other Suite constructor parameters, as described
   1406                         in Suite.__init__ docstring.
   1407         @return a Suite instance.
   1408         """
   1409         if cf_getter is None:
   1410             build = suite_common.get_test_source_build(builds, **dargs)
   1411             cf_getter = _create_ds_getter(build, devserver)
   1412 
   1413         return cls([suite_common.name_in_tag_predicate(name)],
   1414                    name, builds, board, cf_getter, **dargs)
   1415 
   1416 
   1417     def __init__(
   1418             self,
   1419             predicates,
   1420             tag,
   1421             builds,
   1422             board,
   1423             cf_getter,
   1424             run_prod_code=False,
   1425             afe=None,
   1426             tko=None,
   1427             pool=None,
   1428             results_dir=None,
   1429             max_runtime_mins=24*60,
   1430             timeout_mins=24*60,
   1431             file_bugs=False,
   1432             suite_job_id=None,
   1433             ignore_deps=False,
   1434             extra_deps=None,
   1435             priority=priorities.Priority.DEFAULT,
   1436             forgiving_parser=True,
   1437             wait_for_results=True,
   1438             job_retry=False,
   1439             max_retries=sys.maxint,
   1440             offload_failures_only=False,
   1441             test_source_build=None,
   1442             job_keyvals=None,
   1443             test_args=None,
   1444             child_dependencies=(),
   1445             result_reporter=None,
   1446     ):
   1447         """
   1448         Constructor
   1449 
   1450         @param predicates: A list of callables that accept ControlData
   1451                            representations of control files. A test will be
   1452                            included in suite if all callables in this list
   1453                            return True on the given control file.
   1454         @param tag: a string with which to tag jobs run in this suite.
   1455         @param builds: the builds on which we're running this suite.
   1456         @param board: the board on which we're running this suite.
   1457         @param cf_getter: a control_file_getter.ControlFileGetter
   1458         @param afe: an instance of AFE as defined in server/frontend.py.
   1459         @param tko: an instance of TKO as defined in server/frontend.py.
   1460         @param pool: Specify the pool of machines to use for scheduling
   1461                 purposes.
   1462         @param run_prod_code: If true, the suite will run the test code that
   1463                               lives in prod aka the test code currently on the
   1464                               lab servers.
   1465         @param results_dir: The directory where the job can write results to.
   1466                             This must be set if you want job_id of sub-jobs
   1467                             list in the job keyvals.
   1468         @param max_runtime_mins: Maximum suite runtime, in minutes.
   1469         @param timeout: Maximum job lifetime, in hours.
   1470         @param suite_job_id: Job id that will act as parent id to all sub jobs.
   1471                              Default: None
   1472         @param ignore_deps: True if jobs should ignore the DEPENDENCIES
   1473                             attribute and skip applying of dependency labels.
   1474                             (Default:False)
   1475         @param extra_deps: A list of strings which are the extra DEPENDENCIES
   1476                            to add to each test being scheduled.
   1477         @param priority: Integer priority level.  Higher is more important.
   1478         @param wait_for_results: Set to False to run the suite job without
   1479                                  waiting for test jobs to finish. Default is
   1480                                  True.
   1481         @param job_retry: A bool value indicating whether jobs should be retried
   1482                           on failure. If True, the field 'JOB_RETRIES' in
   1483                           control files will be respected. If False, do not
   1484                           retry.
   1485         @param max_retries: Maximum retry limit at suite level.
   1486                             Regardless how many times each individual test
   1487                             has been retried, the total number of retries
   1488                             happening in the suite can't exceed _max_retries.
   1489                             Default to sys.maxint.
   1490         @param offload_failures_only: Only enable gs_offloading for failed
   1491                                       jobs.
   1492         @param test_source_build: Build that contains the server-side test code.
   1493         @param job_keyvals: General job keyvals to be inserted into keyval file,
   1494                             which will be used by tko/parse later.
   1495         @param test_args: A dict of args passed all the way to each individual
   1496                           test that will be actually ran.
   1497         @param child_dependencies: (optional) list of dependency strings
   1498                 to be added as dependencies to child jobs.
   1499         @param result_reporter: A _ResultReporter instance to report results. If
   1500                 None, an _EmailReporter will be created.
   1501         """
   1502         tests = find_and_parse_tests(
   1503                 cf_getter,
   1504                 _ComposedPredicate(predicates),
   1505                 tag,
   1506                 forgiving_parser=forgiving_parser,
   1507                 run_prod_code=run_prod_code,
   1508                 test_args=test_args,
   1509         )
   1510         super(Suite, self).__init__(
   1511                 tests=tests,
   1512                 tag=tag,
   1513                 builds=builds,
   1514                 board=board,
   1515                 afe=afe,
   1516                 tko=tko,
   1517                 pool=pool,
   1518                 results_dir=results_dir,
   1519                 max_runtime_mins=max_runtime_mins,
   1520                 timeout_mins=timeout_mins,
   1521                 file_bugs=file_bugs,
   1522                 suite_job_id=suite_job_id,
   1523                 ignore_deps=ignore_deps,
   1524                 extra_deps=extra_deps,
   1525                 priority=priority,
   1526                 wait_for_results=wait_for_results,
   1527                 job_retry=job_retry,
   1528                 max_retries=max_retries,
   1529                 offload_failures_only=offload_failures_only,
   1530                 test_source_build=test_source_build,
   1531                 job_keyvals=job_keyvals,
   1532                 child_dependencies=child_dependencies,
   1533                 result_reporter=result_reporter,
   1534         )
   1535 
   1536 
   1537 class ProvisionSuite(_BaseSuite):
   1538     """
   1539     A suite for provisioning DUTs.
   1540 
   1541     This is done by creating dummy_Pass tests.
   1542     """
   1543 
   1544 
   1545     def __init__(
   1546             self,
   1547             tag,
   1548             builds,
   1549             board,
   1550             devserver,
   1551             num_required,
   1552             num_max=float('inf'),
   1553             cf_getter=None,
   1554             run_prod_code=False,
   1555             test_args=None,
   1556             test_source_build=None,
   1557             **kwargs):
   1558         """
   1559         Constructor
   1560 
   1561         @param tag: a string with which to tag jobs run in this suite.
   1562         @param builds: the builds on which we're running this suite.
   1563         @param board: the board on which we're running this suite.
   1564         @param devserver: the devserver which contains the build.
   1565         @param num_required: number of tests that must pass.  This is
   1566                              capped by the number of tests that are run.
   1567         @param num_max: max number of tests to make.  By default there
   1568                         is no cap, a test is created for each eligible host.
   1569         @param cf_getter: a control_file_getter.ControlFileGetter.
   1570         @param test_args: A dict of args passed all the way to each individual
   1571                           test that will be actually ran.
   1572         @param test_source_build: Build that contains the server-side test code.
   1573         @param kwargs: Various keyword arguments passed to
   1574                        _BaseSuite constructor.
   1575         """
   1576         super(ProvisionSuite, self).__init__(
   1577                 tests=[],
   1578                 tag=tag,
   1579                 builds=builds,
   1580                 board=board,
   1581                 **kwargs)
   1582         self._num_successful = 0
   1583         self._num_required = 0
   1584         self.tests = []
   1585 
   1586         static_deps = [dep for dep in self._dependencies
   1587                        if not provision.Provision.acts_on(dep)]
   1588         if 'pool:suites' in static_deps:
   1589             logging.info('Provision suite is disabled on suites pool')
   1590             return
   1591         logging.debug('Looking for hosts matching %r', static_deps)
   1592         hosts = self._afe.get_hosts(
   1593                 invalid=False, multiple_labels=static_deps)
   1594         logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
   1595         available_hosts = [h for h in hosts if h.is_available()]
   1596         logging.debug('Found %d available hosts for ProvisionSuite',
   1597                       len(available_hosts))
   1598         dummy_test = _load_dummy_test(
   1599                 builds, devserver, cf_getter,
   1600                 run_prod_code, test_args, test_source_build)
   1601         self.tests = [dummy_test] * min(len(available_hosts), num_max)
   1602         logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
   1603         self._num_required = min(num_required, len(self.tests))
   1604         logging.debug('Expecting %d tests to pass for ProvisionSuite',
   1605                       self._num_required)
   1606 
   1607     def _handle_result(self, result, record, waiter):
   1608         super(ProvisionSuite, self)._handle_result(result, record, waiter)
   1609         if result.is_good():
   1610             self._num_successful += 1
   1611 
   1612     def _finished_waiting(self):
   1613         return self._num_successful >= self._num_required
   1614 
   1615 
   1616 def _load_dummy_test(
   1617         builds,
   1618         devserver,
   1619         cf_getter=None,
   1620         run_prod_code=False,
   1621         test_args=None,
   1622         test_source_build=None):
   1623     """
   1624     Load and return the dummy pass test.
   1625 
   1626     @param builds: the builds on which we're running this suite.
   1627     @param devserver: the devserver which contains the build.
   1628     @param cf_getter: a control_file_getter.ControlFileGetter.
   1629     @param test_args: A dict of args passed all the way to each individual
   1630                       test that will be actually ran.
   1631     @param test_source_build: Build that contains the server-side test code.
   1632     """
   1633     if cf_getter is None:
   1634         if run_prod_code:
   1635             cf_getter = create_fs_getter(_AUTOTEST_DIR)
   1636         else:
   1637             build = suite_common.get_test_source_build(
   1638                     builds, test_source_build=test_source_build)
   1639             devserver.stage_artifacts(image=build,
   1640                                       artifacts=['control_files'])
   1641             cf_getter = _create_ds_getter(build, devserver)
   1642     retriever = _ControlFileRetriever(cf_getter,
   1643                                       run_prod_code=run_prod_code,
   1644                                       test_args=test_args)
   1645     return retriever.retrieve_for_test('dummy_Pass')
   1646 
   1647 
   1648 class _ComposedPredicate(object):
   1649     """Return the composition of the predicates.
   1650 
   1651     Predicates are functions that take a test control data object and
   1652     return True of that test is to be included.  The returned
   1653     predicate's set is the intersection of all of the input predicates'
   1654     sets (it returns True if all predicates return True).
   1655     """
   1656 
   1657     def __init__(self, predicates):
   1658         """Initialize instance.
   1659 
   1660         @param predicates: Iterable of predicates.
   1661         """
   1662         self._predicates = list(predicates)
   1663 
   1664     def __repr__(self):
   1665         return '{cls}({this._predicates!r})'.format(
   1666             cls=type(self).__name__,
   1667             this=self,
   1668         )
   1669 
   1670     def __call__(self, control_data_):
   1671         return all(f(control_data_) for f in self._predicates)
   1672 
   1673 
   1674 def _is_nonexistent_board_error(e):
   1675     """Return True if error is caused by nonexistent board label.
   1676 
   1677     As of this writing, the particular case we want looks like this:
   1678 
   1679      1) e.problem_keys is a dictionary
   1680      2) e.problem_keys['meta_hosts'] exists as the only key
   1681         in the dictionary.
   1682      3) e.problem_keys['meta_hosts'] matches this pattern:
   1683         "Label "board:.*" not found"
   1684 
   1685     We check for conditions 1) and 2) on the
   1686     theory that they're relatively immutable.
   1687     We don't check condition 3) because it seems
   1688     likely to be a maintenance burden, and for the
   1689     times when we're wrong, being right shouldn't
   1690     matter enough (we _hope_).
   1691 
   1692     @param e: proxy.ValidationError instance
   1693     @returns: boolean
   1694     """
   1695     return (isinstance(e.problem_keys, dict)
   1696             and len(e.problem_keys) == 1
   1697             and 'meta_hosts' in e.problem_keys)
   1698 
   1699 
   1700 class _ResultReporter(object):
   1701     """Abstract base class for reporting test results.
   1702 
   1703     Usually, this is used to report test failures.
   1704     """
   1705 
   1706     __metaclass__ = abc.ABCMeta
   1707 
   1708     @abc.abstractmethod
   1709     def report(self, result):
   1710         """Report test result.
   1711 
   1712         @param result: Status instance for job.
   1713         """
   1714 
   1715 
   1716 class _EmailReporter(_ResultReporter):
   1717     """Class that emails based on test failures."""
   1718 
   1719     # TODO(akeshet): Document what |bug_template| is actually supposed to come
   1720     # from, and rename it to something unrelated to "bugs" which are no longer
   1721     # relevant now that this is purely an email sender.
   1722     def __init__(self, suite, bug_template=None):
   1723         self._suite = suite
   1724         self._bug_template = bug_template or {}
   1725 
   1726     def _get_test_bug(self, result):
   1727         """Get TestBug for the given result.
   1728 
   1729         @param result: Status instance for a test job.
   1730         @returns: TestBug instance.
   1731         """
   1732         # reporting modules have dependency on external packages, e.g., httplib2
   1733         # Such dependency can cause issue to any module tries to import suite.py
   1734         # without building site-packages first. Since the reporting modules are
   1735         # only used in this function, move the imports here avoid the
   1736         # requirement of building site packages to use other functions in this
   1737         # module.
   1738         from autotest_lib.server.cros.dynamic_suite import reporting
   1739 
   1740         job_views = self._suite._tko.run('get_detailed_test_views',
   1741                                          afe_job_id=result.id)
   1742         return reporting.TestBug(self._suite._job_creator.cros_build,
   1743                 utils.get_chrome_version(job_views),
   1744                 self._suite._tag,
   1745                 result)
   1746 
   1747     def _get_bug_template(self, result):
   1748         """Get BugTemplate for test job.
   1749 
   1750         @param result: Status instance for job.
   1751         @param bug_template: A template dictionary specifying the default bug
   1752                              filing options for failures in this suite.
   1753         @returns: BugTemplate instance
   1754         """
   1755         # reporting modules have dependency on external packages, e.g., httplib2
   1756         # Such dependency can cause issue to any module tries to import suite.py
   1757         # without building site-packages first. Since the reporting modules are
   1758         # only used in this function, move the imports here avoid the
   1759         # requirement of building site packages to use other functions in this
   1760         # module.
   1761         from autotest_lib.server.cros.dynamic_suite import reporting_utils
   1762 
   1763         # Try to merge with bug template in test control file.
   1764         template = reporting_utils.BugTemplate(self._bug_template)
   1765         try:
   1766             test_data = self._suite._jobs_to_tests[result.id]
   1767             return template.finalize_bug_template(
   1768                     test_data.bug_template)
   1769         except AttributeError:
   1770             # Test control file does not have bug template defined.
   1771             return template.bug_template
   1772         except reporting_utils.InvalidBugTemplateException as e:
   1773             logging.error('Merging bug templates failed with '
   1774                           'error: %s An empty bug template will '
   1775                           'be used.', e)
   1776             return {}
   1777 
   1778     def report(self, result):
   1779         # reporting modules have dependency on external
   1780         # packages, e.g., httplib2 Such dependency can cause
   1781         # issue to any module tries to import suite.py without
   1782         # building site-packages first. Since the reporting
   1783         # modules are only used in this function, move the
   1784         # imports here avoid the requirement of building site
   1785         # packages to use other functions in this module.
   1786         from autotest_lib.server.cros.dynamic_suite import reporting
   1787 
   1788         reporting.send_email(
   1789                 self._get_test_bug(result),
   1790                 self._get_bug_template(result))
   1791