Home | History | Annotate | Download | only in dynamic_suite
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import datetime
      6 import difflib
      7 import hashlib
      8 import logging
      9 import operator
     10 import os
     11 import re
     12 import sys
     13 
     14 import common
     15 
     16 from autotest_lib.frontend.afe.json_rpc import proxy
     17 from autotest_lib.client.common_lib import control_data
     18 from autotest_lib.client.common_lib import enum
     19 from autotest_lib.client.common_lib import error
     20 from autotest_lib.client.common_lib import global_config
     21 from autotest_lib.client.common_lib import priorities
     22 from autotest_lib.client.common_lib import site_utils
     23 from autotest_lib.client.common_lib import time_utils
     24 from autotest_lib.client.common_lib import utils
     25 from autotest_lib.frontend.afe.json_rpc import proxy
     26 from autotest_lib.server.cros import provision
     27 from autotest_lib.server.cros.dynamic_suite import constants
     28 from autotest_lib.server.cros.dynamic_suite import control_file_getter
     29 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     30 from autotest_lib.server.cros.dynamic_suite import job_status
     31 from autotest_lib.server.cros.dynamic_suite import tools
     32 from autotest_lib.server.cros.dynamic_suite.job_status import Status
     33 
     34 try:
     35     from chromite.lib import boolparse_lib
     36     from chromite.lib import cros_logging as logging
     37 except ImportError:
     38     print 'Unable to import chromite.'
     39     print 'This script must be either:'
     40     print '  - Be run in the chroot.'
     41     print '  - (not yet supported) be run after running '
     42     print '    ../utils/build_externals.py'
     43 
     44 _FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
     45                     'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
     46                     'sanity', 'push_to_prod']
     47 _AUTOTEST_DIR = global_config.global_config.get_config_value(
     48         'SCHEDULER', 'drone_installation_directory')
     49 ENABLE_CONTROLS_IN_BATCH = global_config.global_config.get_config_value(
     50         'CROS', 'enable_getting_controls_in_batch', type=bool, default=False)
     51 
     52 class RetryHandler(object):
     53     """Maintain retry information.
     54 
     55     @var _retry_map: A dictionary that stores retry history.
     56             The key is afe job id. The value is a dictionary.
     57             {job_id: {'state':RetryHandler.States, 'retry_max':int}}
     58             - state:
     59                 The retry state of a job.
     60                 NOT_ATTEMPTED:
     61                     We haven't done anything about the job.
     62                 ATTEMPTED:
     63                     We've made an attempt to schedule a retry job. The
     64                     scheduling may or may not be successful, e.g.
     65                     it might encounter an rpc error. Note failure
     66                     in scheduling a retry is different from a retry job failure.
     67                     For each job, we only attempt to schedule a retry once.
     68                     For example, assume we have a test with JOB_RETRIES=5 and
     69                     its second retry job failed. When we attempt to create
     70                     a third retry job to retry the second, we hit an rpc
     71                     error. In such case, we will give up on all following
     72                     retries.
     73                 RETRIED:
     74                     A retry job has already been successfully
     75                     scheduled.
     76             - retry_max:
     77                 The maximum of times the job can still
     78                 be retried, taking into account retries
     79                 that have occurred.
     80     @var _retry_level: A retry might be triggered only if the result
     81             is worse than the level.
     82     @var _max_retries: Maximum retry limit at suite level.
     83                      Regardless how many times each individual test
     84                      has been retried, the total number of retries happening in
     85                      the suite can't exceed _max_retries.
     86     """
     87 
     88     States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
     89                        start_value=1, step=1)
     90 
     91     def __init__(self, initial_jobs_to_tests, retry_level='WARN',
     92                  max_retries=None):
     93         """Initialize RetryHandler.
     94 
     95         @param initial_jobs_to_tests: A dictionary that maps a job id to
     96                 a ControlData object. This dictionary should contain
     97                 jobs that are originally scheduled by the suite.
     98         @param retry_level: A retry might be triggered only if the result is
     99                 worse than the level.
    100         @param max_retries: Integer, maxmium total retries allowed
    101                                   for the suite. Default to None, no max.
    102         """
    103         self._retry_map = {}
    104         self._retry_level = retry_level
    105         self._max_retries = (max_retries
    106                              if max_retries is not None else sys.maxint)
    107         for job_id, test in initial_jobs_to_tests.items():
    108             if test.job_retries > 0:
    109                 self._add_job(new_job_id=job_id,
    110                               retry_max=test.job_retries)
    111 
    112 
    113     def _add_job(self, new_job_id, retry_max):
    114         """Add a newly-created job to the retry map.
    115 
    116         @param new_job_id: The afe_job_id of a newly created job.
    117         @param retry_max: The maximum of times that we could retry
    118                           the test if the job fails.
    119 
    120         @raises ValueError if new_job_id is already in retry map.
    121 
    122         """
    123         if new_job_id in self._retry_map:
    124             raise ValueError('add_job called when job is already in retry map.')
    125 
    126         self._retry_map[new_job_id] = {
    127                 'state': self.States.NOT_ATTEMPTED,
    128                 'retry_max': retry_max}
    129 
    130 
    131     def _suite_max_reached(self):
    132         """Return whether maximum retry limit for a suite has been reached."""
    133         return self._max_retries <= 0
    134 
    135 
    136     def add_retry(self, old_job_id, new_job_id):
    137         """Record a retry.
    138 
    139         Update retry map with the retry information.
    140 
    141         @param old_job_id: The afe_job_id of the job that is retried.
    142         @param new_job_id: The afe_job_id of the retry job.
    143 
    144         @raises KeyError if old_job_id isn't in the retry map.
    145         @raises ValueError if we have already retried or made an attempt
    146                 to retry the old job.
    147 
    148         """
    149         old_record = self._retry_map[old_job_id]
    150         if old_record['state'] != self.States.NOT_ATTEMPTED:
    151             raise ValueError(
    152                     'We have already retried or attempted to retry job %d' %
    153                     old_job_id)
    154         old_record['state'] = self.States.RETRIED
    155         self._add_job(new_job_id=new_job_id,
    156                       retry_max=old_record['retry_max'] - 1)
    157         self._max_retries -= 1
    158 
    159 
    160     def set_attempted(self, job_id):
    161         """Set the state of the job to ATTEMPTED.
    162 
    163         @param job_id: afe_job_id of a job.
    164 
    165         @raises KeyError if job_id isn't in the retry map.
    166         @raises ValueError if the current state is not NOT_ATTEMPTED.
    167 
    168         """
    169         current_state = self._retry_map[job_id]['state']
    170         if current_state != self.States.NOT_ATTEMPTED:
    171             # We are supposed to retry or attempt to retry each job
    172             # only once. Raise an error if this is not the case.
    173             raise ValueError('Unexpected state transition: %s -> %s' %
    174                              (self.States.get_string(current_state),
    175                               self.States.get_string(self.States.ATTEMPTED)))
    176         else:
    177             self._retry_map[job_id]['state'] = self.States.ATTEMPTED
    178 
    179 
    180     def has_following_retry(self, result):
    181         """Check whether there will be a following retry.
    182 
    183         We have the following cases for a given job id (result.id),
    184         - no retry map entry -> retry not required, no following retry
    185         - has retry map entry:
    186             - already retried -> has following retry
    187             - has not retried
    188                 (this branch can be handled by checking should_retry(result))
    189                 - retry_max == 0 --> the last retry job, no more retry
    190                 - retry_max > 0
    191                    - attempted, but has failed in scheduling a
    192                      following retry due to rpc error  --> no more retry
    193                    - has not attempped --> has following retry if test failed.
    194 
    195         @param result: A result, encapsulating the status of the job.
    196 
    197         @returns: True, if there will be a following retry.
    198                   False otherwise.
    199 
    200         """
    201         return (result.test_executed
    202                 and result.id in self._retry_map
    203                 and (self._retry_map[result.id]['state'] == self.States.RETRIED
    204                      or self._should_retry(result)))
    205 
    206 
    207     def _should_retry(self, result):
    208         """Check whether we should retry a job based on its result.
    209 
    210         This method only makes sense when called by has_following_retry().
    211 
    212         We will retry the job that corresponds to the result
    213         when all of the following are true.
    214         a) The test was actually executed, meaning that if
    215            a job was aborted before it could ever reach the state
    216            of 'Running', the job will not be retried.
    217         b) The result is worse than |self._retry_level| which
    218            defaults to 'WARN'.
    219         c) The test requires retry, i.e. the job has an entry in the retry map.
    220         d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
    221            Note that if a test has JOB_RETRIES=5, and the second time
    222            it was retried it hit an rpc error, we will give up on
    223            all following retries.
    224         e) The job has not reached its retry max, i.e. retry_max > 0
    225 
    226         @param result: A result, encapsulating the status of the job.
    227 
    228         @returns: True if we should retry the job.
    229 
    230         """
    231         assert result.test_executed
    232         assert result.id in self._retry_map
    233         return (
    234             not self._suite_max_reached()
    235             and result.is_worse_than(
    236                 job_status.Status(self._retry_level, '', 'reason'))
    237             and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
    238             and self._retry_map[result.id]['retry_max'] > 0
    239         )
    240 
    241 
    242     def get_retry_max(self, job_id):
    243         """Get the maximum times the job can still be retried.
    244 
    245         @param job_id: afe_job_id of a job.
    246 
    247         @returns: An int, representing the maximum times the job can still be
    248                   retried.
    249         @raises KeyError if job_id isn't in the retry map.
    250 
    251         """
    252         return self._retry_map[job_id]['retry_max']
    253 
    254 
    255 class _DynamicSuiteDiscoverer(object):
    256     """Test discoverer for dynamic suite tests."""
    257 
    258 
    259     def __init__(self, tests, add_experimental=True):
    260         """Initialize instance.
    261 
    262         @param tests: iterable of tests (ControlData objects)
    263         @param add_experimental: schedule experimental tests as well, or not.
    264         """
    265         self._tests = list(tests)
    266         self._add_experimental = add_experimental
    267 
    268 
    269     def discover_tests(self):
    270         """Return a list of tests to be scheduled for this suite.
    271 
    272         @returns: list of tests (ControlData objects)
    273         """
    274         tests = self.stable_tests
    275         if self._add_experimental:
    276             for test in self.unstable_tests:
    277                 if not test.name.startswith(constants.EXPERIMENTAL_PREFIX):
    278                     test.name = constants.EXPERIMENTAL_PREFIX + test.name
    279                 tests.append(test)
    280         return tests
    281 
    282 
    283     @property
    284     def stable_tests(self):
    285         """Non-experimental tests.
    286 
    287         @returns: list
    288         """
    289         return filter(lambda t: not t.experimental, self._tests)
    290 
    291 
    292     @property
    293     def unstable_tests(self):
    294         """Experimental tests.
    295 
    296         @returns: list
    297         """
    298         return filter(lambda t: t.experimental, self._tests)
    299 
    300 
    301 class Suite(object):
    302     """
    303     A suite of tests, defined by some predicate over control file variables.
    304 
    305     Given a place to search for control files a predicate to match the desired
    306     tests, can gather tests and fire off jobs to run them, and then wait for
    307     results.
    308 
    309     @var _predicate: a function that should return True when run over a
    310          ControlData representation of a control file that should be in
    311          this Suite.
    312     @var _tag: a string with which to tag jobs run in this suite.
    313     @var _builds: the builds on which we're running this suite.
    314     @var _afe: an instance of AFE as defined in server/frontend.py.
    315     @var _tko: an instance of TKO as defined in server/frontend.py.
    316     @var _jobs: currently scheduled jobs, if any.
    317     @var _jobs_to_tests: a dictionary that maps job ids to tests represented
    318                          ControlData objects.
    319     @var _cf_getter: a control_file_getter.ControlFileGetter
    320     @var _retry: a bool value indicating whether jobs should be retried on
    321                  failure.
    322     @var _retry_handler: a RetryHandler object.
    323 
    324     """
    325 
    326 
    327     @staticmethod
    328     def _create_ds_getter(build, devserver):
    329         """
    330         @param build: the build on which we're running this suite.
    331         @param devserver: the devserver which contains the build.
    332         @return a FileSystemGetter instance that looks under |autotest_dir|.
    333         """
    334         return control_file_getter.DevServerGetter(build, devserver)
    335 
    336 
    337     @staticmethod
    338     def create_fs_getter(autotest_dir):
    339         """
    340         @param autotest_dir: the place to find autotests.
    341         @return a FileSystemGetter instance that looks under |autotest_dir|.
    342         """
    343         # currently hard-coded places to look for tests.
    344         subpaths = ['server/site_tests', 'client/site_tests',
    345                     'server/tests', 'client/tests']
    346         directories = [os.path.join(autotest_dir, p) for p in subpaths]
    347         return control_file_getter.FileSystemGetter(directories)
    348 
    349 
    350     @staticmethod
    351     def name_in_tag_predicate(name):
    352         """Returns predicate that takes a control file and looks for |name|.
    353 
    354         Builds a predicate that takes in a parsed control file (a ControlData)
    355         and returns True if the SUITE tag is present and contains |name|.
    356 
    357         @param name: the suite name to base the predicate on.
    358         @return a callable that takes a ControlData and looks for |name| in that
    359                 ControlData object's suite member.
    360         """
    361         return lambda t: name in t.suite_tag_parts
    362 
    363 
    364     @staticmethod
    365     def name_in_tag_similarity_predicate(name):
    366         """Returns predicate that takes a control file and gets the similarity
    367         of the suites in the control file and the given name.
    368 
    369         Builds a predicate that takes in a parsed control file (a ControlData)
    370         and returns a list of tuples of (suite name, ratio), where suite name
    371         is each suite listed in the control file, and ratio is the similarity
    372         between each suite and the given name.
    373 
    374         @param name: the suite name to base the predicate on.
    375         @return a callable that takes a ControlData and returns a list of tuples
    376                 of (suite name, ratio), where suite name is each suite listed in
    377                 the control file, and ratio is the similarity between each suite
    378                 and the given name.
    379         """
    380         return lambda t: [(suite,
    381                            difflib.SequenceMatcher(a=suite, b=name).ratio())
    382                           for suite in t.suite_tag_parts] or [(None, 0)]
    383 
    384 
    385     @staticmethod
    386     def test_name_equals_predicate(test_name):
    387         """Returns predicate that matched based on a test's name.
    388 
    389         Builds a predicate that takes in a parsed control file (a ControlData)
    390         and returns True if the test name is equal to |test_name|.
    391 
    392         @param test_name: the test name to base the predicate on.
    393         @return a callable that takes a ControlData and looks for |test_name|
    394                 in that ControlData's name.
    395         """
    396         return lambda t: hasattr(t, 'name') and test_name == t.name
    397 
    398 
    399     @staticmethod
    400     def test_name_matches_pattern_predicate(test_name_pattern):
    401         """Returns predicate that matches based on a test's name pattern.
    402 
    403         Builds a predicate that takes in a parsed control file (a ControlData)
    404         and returns True if the test name matches the given regular expression.
    405 
    406         @param test_name_pattern: regular expression (string) to match against
    407                                   test names.
    408         @return a callable that takes a ControlData and returns
    409                 True if the name fields matches the pattern.
    410         """
    411         return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
    412                                                          t.name)
    413 
    414 
    415     @staticmethod
    416     def test_file_matches_pattern_predicate(test_file_pattern):
    417         """Returns predicate that matches based on a test's file name pattern.
    418 
    419         Builds a predicate that takes in a parsed control file (a ControlData)
    420         and returns True if the test's control file name matches the given
    421         regular expression.
    422 
    423         @param test_file_pattern: regular expression (string) to match against
    424                                   control file names.
    425         @return a callable that takes a ControlData and and returns
    426                 True if control file name matches the pattern.
    427         """
    428         return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
    429                                                          t.path)
    430 
    431 
    432     @staticmethod
    433     def matches_attribute_expression_predicate(test_attr_boolstr):
    434         """Returns predicate that matches based on boolean expression of
    435         attributes.
    436 
    437         Builds a predicate that takes in a parsed control file (a ControlData)
    438         ans returns True if the test attributes satisfy the given attribute
    439         boolean expression.
    440 
    441         @param test_attr_boolstr: boolean expression of the attributes to be
    442                                   test, like 'system:all and interval:daily'.
    443 
    444         @return a callable that takes a ControlData and returns True if the test
    445                 attributes satisfy the given boolean expression.
    446         """
    447         return lambda t: boolparse_lib.BoolstrResult(
    448             test_attr_boolstr, t.attributes)
    449 
    450     @staticmethod
    451     def test_name_similarity_predicate(test_name):
    452         """Returns predicate that matched based on a test's name.
    453 
    454         Builds a predicate that takes in a parsed control file (a ControlData)
    455         and returns a tuple of (test name, ratio), where ratio is the similarity
    456         between the test name and the given test_name.
    457 
    458         @param test_name: the test name to base the predicate on.
    459         @return a callable that takes a ControlData and returns a tuple of
    460                 (test name, ratio), where ratio is the similarity between the
    461                 test name and the given test_name.
    462         """
    463         return lambda t: ((None, 0) if not hasattr(t, 'name') else
    464                 (t.name,
    465                  difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
    466 
    467 
    468     @staticmethod
    469     def test_file_similarity_predicate(test_file_pattern):
    470         """Returns predicate that gets the similarity based on a test's file
    471         name pattern.
    472 
    473         Builds a predicate that takes in a parsed control file (a ControlData)
    474         and returns a tuple of (file path, ratio), where ratio is the
    475         similarity between the test file name and the given test_file_pattern.
    476 
    477         @param test_file_pattern: regular expression (string) to match against
    478                                   control file names.
    479         @return a callable that takes a ControlData and and returns a tuple of
    480                 (file path, ratio), where ratio is the similarity between the
    481                 test file name and the given test_file_pattern.
    482         """
    483         return lambda t: ((None, 0) if not hasattr(t, 'path') else
    484                 (t.path, difflib.SequenceMatcher(a=t.path,
    485                                                  b=test_file_pattern).ratio()))
    486 
    487 
    488     @classmethod
    489     def list_all_suites(cls, build, devserver, cf_getter=None):
    490         """
    491         Parses all ControlData objects with a SUITE tag and extracts all
    492         defined suite names.
    493 
    494         @param build: the build on which we're running this suite.
    495         @param devserver: the devserver which contains the build.
    496         @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
    497                           using DevServerGetter.
    498 
    499         @return list of suites
    500         """
    501         if cf_getter is None:
    502             cf_getter = cls._create_ds_getter(build, devserver)
    503 
    504         suites = set()
    505         predicate = lambda t: True
    506         for test in cls.find_and_parse_tests(cf_getter, predicate,
    507                                              add_experimental=True):
    508             suites.update(test.suite_tag_parts)
    509         return list(suites)
    510 
    511 
    512     @staticmethod
    513     def get_test_source_build(builds, **dargs):
    514         """Get the build of test code.
    515 
    516         Get the test source build from arguments. If parameter
    517         `test_source_build` is set and has a value, return its value. Otherwise
    518         returns the ChromeOS build name if it exists. If ChromeOS build is not
    519         specified either, raise SuiteArgumentException.
    520 
    521         @param builds: the builds on which we're running this suite. It's a
    522                        dictionary of version_prefix:build.
    523         @param **dargs: Any other Suite constructor parameters, as described
    524                         in Suite.__init__ docstring.
    525 
    526         @return: The build contains the test code.
    527         @raise: SuiteArgumentException if both test_source_build and ChromeOS
    528                 build are not specified.
    529 
    530         """
    531         if dargs.get('test_source_build', None):
    532             return dargs['test_source_build']
    533         test_source_build = builds.get(provision.CROS_VERSION_PREFIX, None)
    534         if not test_source_build:
    535             raise error.SuiteArgumentException(
    536                     'test_source_build must be specified if CrOS build is not '
    537                     'specified.')
    538         return test_source_build
    539 
    540 
    541     @classmethod
    542     def create_from_predicates(cls, predicates, builds, board, devserver,
    543                                cf_getter=None, name='ad_hoc_suite',
    544                                run_prod_code=False, **dargs):
    545         """
    546         Create a Suite using a given predicate test filters.
    547 
    548         Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
    549         |autotest_dir| and will schedule them using |afe|.  Pulls control files
    550         from the default dev server. Results will be pulled from |tko| upon
    551         completion.
    552 
    553         @param predicates: A list of callables that accept ControlData
    554                            representations of control files. A test will be
    555                            included in suite if all callables in this list
    556                            return True on the given control file.
    557         @param builds: the builds on which we're running this suite. It's a
    558                        dictionary of version_prefix:build.
    559         @param board: the board on which we're running this suite.
    560         @param devserver: the devserver which contains the build.
    561         @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
    562                           using DevServerGetter.
    563         @param name: name of suite. Defaults to 'ad_hoc_suite'
    564         @param run_prod_code: If true, the suite will run the tests that
    565                               lives in prod aka the test code currently on the
    566                               lab servers.
    567         @param **dargs: Any other Suite constructor parameters, as described
    568                         in Suite.__init__ docstring.
    569         @return a Suite instance.
    570         """
    571         if cf_getter is None:
    572             if run_prod_code:
    573                 cf_getter = cls.create_fs_getter(_AUTOTEST_DIR)
    574             else:
    575                 build = cls.get_test_source_build(builds, **dargs)
    576                 cf_getter = cls._create_ds_getter(build, devserver)
    577 
    578         return cls(predicates,
    579                    name, builds, board, cf_getter, run_prod_code, **dargs)
    580 
    581 
    582     @classmethod
    583     def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
    584                          **dargs):
    585         """
    586         Create a Suite using a predicate based on the SUITE control file var.
    587 
    588         Makes a predicate based on |name| and uses it to instantiate a Suite
    589         that looks for tests in |autotest_dir| and will schedule them using
    590         |afe|.  Pulls control files from the default dev server.
    591         Results will be pulled from |tko| upon completion.
    592 
    593         @param name: a value of the SUITE control file variable to search for.
    594         @param builds: the builds on which we're running this suite. It's a
    595                        dictionary of version_prefix:build.
    596         @param board: the board on which we're running this suite.
    597         @param devserver: the devserver which contains the build.
    598         @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
    599                           using DevServerGetter.
    600         @param **dargs: Any other Suite constructor parameters, as described
    601                         in Suite.__init__ docstring.
    602         @return a Suite instance.
    603         """
    604         if cf_getter is None:
    605             build = cls.get_test_source_build(builds, **dargs)
    606             cf_getter = cls._create_ds_getter(build, devserver)
    607 
    608         return cls([cls.name_in_tag_predicate(name)],
    609                    name, builds, board, cf_getter, **dargs)
    610 
    611 
    612     def __init__(
    613             self,
    614             predicates,
    615             tag,
    616             builds,
    617             board,
    618             cf_getter,
    619             run_prod_code=False,
    620             afe=None,
    621             tko=None,
    622             pool=None,
    623             results_dir=None,
    624             max_runtime_mins=24*60,
    625             timeout_mins=24*60,
    626             file_bugs=False,
    627             file_experimental_bugs=False,
    628             suite_job_id=None,
    629             ignore_deps=False,
    630             extra_deps=None,
    631             priority=priorities.Priority.DEFAULT,
    632             forgiving_parser=True,
    633             wait_for_results=True,
    634             job_retry=False,
    635             max_retries=sys.maxint,
    636             offload_failures_only=False,
    637             test_source_build=None,
    638             job_keyvals=None,
    639             test_args=None
    640     ):
    641         """
    642         Constructor
    643 
    644         @param predicates: A list of callables that accept ControlData
    645                            representations of control files. A test will be
    646                            included in suite is all callables in this list
    647                            return True on the given control file.
    648         @param tag: a string with which to tag jobs run in this suite.
    649         @param builds: the builds on which we're running this suite.
    650         @param board: the board on which we're running this suite.
    651         @param cf_getter: a control_file_getter.ControlFileGetter
    652         @param afe: an instance of AFE as defined in server/frontend.py.
    653         @param tko: an instance of TKO as defined in server/frontend.py.
    654         @param pool: Specify the pool of machines to use for scheduling
    655                 purposes.
    656         @param run_prod_code: If true, the suite will run the test code that
    657                               lives in prod aka the test code currently on the
    658                               lab servers.
    659         @param results_dir: The directory where the job can write results to.
    660                             This must be set if you want job_id of sub-jobs
    661                             list in the job keyvals.
    662         @param max_runtime_mins: Maximum suite runtime, in minutes.
    663         @param timeout: Maximum job lifetime, in hours.
    664         @param suite_job_id: Job id that will act as parent id to all sub jobs.
    665                              Default: None
    666         @param ignore_deps: True if jobs should ignore the DEPENDENCIES
    667                             attribute and skip applying of dependency labels.
    668                             (Default:False)
    669         @param extra_deps: A list of strings which are the extra DEPENDENCIES
    670                            to add to each test being scheduled.
    671         @param priority: Integer priority level.  Higher is more important.
    672         @param wait_for_results: Set to False to run the suite job without
    673                                  waiting for test jobs to finish. Default is
    674                                  True.
    675         @param job_retry: A bool value indicating whether jobs should be retired
    676                           on failure. If True, the field 'JOB_RETRIES' in
    677                           control files will be respected. If False, do not
    678                           retry.
    679         @param max_retries: Maximum retry limit at suite level.
    680                             Regardless how many times each individual test
    681                             has been retried, the total number of retries
    682                             happening in the suite can't exceed _max_retries.
    683                             Default to sys.maxint.
    684         @param offload_failures_only: Only enable gs_offloading for failed
    685                                       jobs.
    686         @param test_source_build: Build that contains the server-side test code.
    687         @param job_keyvals: General job keyvals to be inserted into keyval file,
    688                             which will be used by tko/parse later.
    689         @param test_args: A dict of args passed all the way to each individual
    690                           test that will be actually ran.
    691         """
    692         if extra_deps is None:
    693             extra_deps = []
    694 
    695         self._tag = tag
    696         self._builds = builds
    697         self._board = board
    698         self._cf_getter = cf_getter
    699         self._results_dir = results_dir
    700         self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
    701                                                          delay_sec=10,
    702                                                          debug=False)
    703         self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
    704                                                          delay_sec=10,
    705                                                          debug=False)
    706         self._pool = pool
    707         self._jobs = []
    708         self._jobs_to_tests = {}
    709         self.tests = self.find_and_parse_tests(
    710                 self._cf_getter,
    711                 lambda control_data: all(f(control_data) for f in predicates),
    712                 self._tag,
    713                 add_experimental=True,
    714                 forgiving_parser=forgiving_parser,
    715                 run_prod_code=run_prod_code,
    716                 test_args=test_args,
    717         )
    718 
    719         self._max_runtime_mins = max_runtime_mins
    720         self._timeout_mins = timeout_mins
    721         self._file_bugs = file_bugs
    722         self._file_experimental_bugs = file_experimental_bugs
    723         self._suite_job_id = suite_job_id
    724         self._ignore_deps = ignore_deps
    725         self._extra_deps = extra_deps
    726         self._priority = priority
    727         self._job_retry=job_retry
    728         self._max_retries = max_retries
    729         # RetryHandler to be initialized in schedule()
    730         self._retry_handler = None
    731         self.wait_for_results = wait_for_results
    732         self._offload_failures_only = offload_failures_only
    733         self._test_source_build = test_source_build
    734         self._job_keyvals = job_keyvals
    735         self._test_args = test_args
    736 
    737 
    738     @property
    739     def _cros_build(self):
    740         """Return the CrOS build or the first build in the builds dict."""
    741         # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
    742         # sure what the implications of this are, but it's probably not a
    743         # good thing.
    744         return self._builds.get(provision.CROS_VERSION_PREFIX,
    745                                 self._builds.values()[0])
    746 
    747 
    748     def _create_job(self, test, retry_for=None):
    749         """
    750         Thin wrapper around frontend.AFE.create_job().
    751 
    752         @param test: ControlData object for a test to run.
    753         @param retry_for: If the to-be-created job is a retry for an
    754                           old job, the afe_job_id of the old job will
    755                           be passed in as |retry_for|, which will be
    756                           recorded in the new job's keyvals.
    757         @returns: A frontend.Job object with an added test_name member.
    758                   test_name is used to preserve the higher level TEST_NAME
    759                   name of the job.
    760         """
    761         test_obj = self._afe.create_job(
    762             control_file=test.text,
    763             name=tools.create_job_name(
    764                     self._test_source_build or self._cros_build,
    765                     self._tag,
    766                     test.name),
    767             control_type=test.test_type.capitalize(),
    768             meta_hosts=[self._board]*test.sync_count,
    769             dependencies=self._create_job_deps(test),
    770             keyvals=self._create_keyvals_for_test_job(test, retry_for),
    771             max_runtime_mins=self._max_runtime_mins,
    772             timeout_mins=self._timeout_mins,
    773             parent_job_id=self._suite_job_id,
    774             test_retry=test.retries,
    775             priority=self._priority,
    776             synch_count=test.sync_count,
    777             require_ssp=test.require_ssp)
    778 
    779         test_obj.test_name = test.name
    780         return test_obj
    781 
    782 
    783     def _create_job_deps(self, test):
    784         """Create job deps list for a test job.
    785 
    786         @returns: A list of dependency strings.
    787         """
    788         if self._ignore_deps:
    789             job_deps = []
    790         else:
    791             job_deps = list(test.dependencies)
    792         job_deps.extend(self._extra_deps)
    793         if self._pool:
    794             job_deps.append(self._pool)
    795         job_deps.append(self._board)
    796         return job_deps
    797 
    798 
    799     def _create_keyvals_for_test_job(self, test, retry_for=None):
    800         """Create keyvals dict for creating a test job.
    801 
    802         @param test: ControlData object for a test to run.
    803         @param retry_for: If the to-be-created job is a retry for an
    804                           old job, the afe_job_id of the old job will
    805                           be passed in as |retry_for|, which will be
    806                           recorded in the new job's keyvals.
    807         @returns: A keyvals dict for creating the test job.
    808         """
    809         keyvals = {
    810             constants.JOB_BUILD_KEY: self._cros_build,
    811             constants.JOB_SUITE_KEY: self._tag,
    812             constants.JOB_EXPERIMENTAL_KEY: test.experimental,
    813             constants.JOB_BUILDS_KEY: self._builds
    814         }
    815         # test_source_build is saved to job_keyvals so scheduler can retrieve
    816         # the build name from database when compiling autoserv commandline.
    817         # This avoid a database change to add a new field in afe_jobs.
    818         #
    819         # Only add `test_source_build` to job keyvals if the build is different
    820         # from the CrOS build or the job uses more than one build, e.g., both
    821         # firmware and CrOS will be updated in the dut.
    822         # This is for backwards compatibility, so the update Autotest code can
    823         # compile an autoserv command line to run in a SSP container using
    824         # previous builds.
    825         if (self._test_source_build and
    826             (self._cros_build != self._test_source_build or
    827              len(self._builds) > 1)):
    828             keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
    829                     self._test_source_build
    830             for prefix, build in self._builds.iteritems():
    831                 if prefix == provision.FW_RW_VERSION_PREFIX:
    832                     keyvals[constants.FWRW_BUILD]= build
    833                 elif prefix == provision.FW_RO_VERSION_PREFIX:
    834                     keyvals[constants.FWRO_BUILD] = build
    835         # Add suite job id to keyvals so tko parser can read it from keyval
    836         # file.
    837         if self._suite_job_id:
    838             keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
    839         # We drop the old job's id in the new job's keyval file so that
    840         # later our tko parser can figure out the retry relationship and
    841         # invalidate the results of the old job in tko database.
    842         if retry_for:
    843             keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
    844         if self._offload_failures_only:
    845             keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
    846         return keyvals
    847 
    848 
    849     def _schedule_test(self, record, test, retry_for=None, ignore_errors=False):
    850         """Schedule a single test and return the job.
    851 
    852         Schedule a single test by creating a job, and then update relevant
    853         data structures that are used to keep track of all running jobs.
    854 
    855         Emits a TEST_NA status log entry if it failed to schedule the test due
    856         to NoEligibleHostException or a non-existent board label.
    857 
    858         Returns a frontend.Job object if the test is successfully scheduled.
    859         If scheduling failed due to NoEligibleHostException or a non-existent
    860         board label, returns None.  If ignore_errors is True, all unknown
    861         errors return None, otherwise the errors are raised as-is.
    862 
    863         @param record: A callable to use for logging.
    864                        prototype: record(base_job.status_log_entry)
    865         @param test: ControlData for a test to run.
    866         @param retry_for: If we are scheduling a test to retry an
    867                           old job, the afe_job_id of the old job
    868                           will be passed in as |retry_for|.
    869         @param ignore_errors: If True, when an rpc error occur, ignore
    870                              the error and will return None.
    871                              If False, rpc errors will be raised.
    872 
    873         @returns: A frontend.Job object or None
    874         """
    875         msg = 'Scheduling %s' % test.name
    876         if retry_for:
    877             msg = msg + ', to retry afe job %d' % retry_for
    878         logging.debug(msg)
    879         begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
    880         try:
    881             job = self._create_job(test, retry_for=retry_for)
    882         except (error.NoEligibleHostException, proxy.ValidationError) as e:
    883             if (isinstance(e, error.NoEligibleHostException)
    884                 or (isinstance(e, proxy.ValidationError)
    885                     and _is_nonexistent_board_error(e))):
    886                 # Treat a dependency on a non-existent board label the same as
    887                 # a dependency on a board that exists, but for which there's no
    888                 # hardware.
    889                 logging.debug('%s not applicable for this board/pool. '
    890                               'Emitting TEST_NA.', test.name)
    891                 Status('TEST_NA', test.name,
    892                        'Skipping:  test not supported on this board/pool.',
    893                        begin_time_str=begin_time_str).record_all(record)
    894                 return None
    895             else:
    896                 raise e
    897         except (error.RPCException, proxy.JSONRPCException) as e:
    898             if retry_for:
    899                 # Mark that we've attempted to retry the old job.
    900                 self._retry_handler.set_attempted(job_id=retry_for)
    901 
    902             if ignore_errors:
    903                 logging.error('Failed to schedule test: %s, Reason: %s',
    904                               test.name, e)
    905                 return None
    906             else:
    907                 raise e
    908         else:
    909             self._jobs.append(job)
    910             self._jobs_to_tests[job.id] = test
    911             if retry_for:
    912                 # A retry job was just created, record it.
    913                 self._retry_handler.add_retry(
    914                         old_job_id=retry_for, new_job_id=job.id)
    915                 retry_count = (test.job_retries -
    916                                self._retry_handler.get_retry_max(job.id))
    917                 logging.debug('Job %d created to retry job %d. '
    918                               'Have retried for %d time(s)',
    919                               job.id, retry_for, retry_count)
    920             self._remember_job_keyval(job)
    921             return job
    922 
    923 
    924     def schedule(self, record, add_experimental=True):
    925         #pylint: disable-msg=C0111
    926         """
    927         Schedule jobs using |self._afe|.
    928 
    929         frontend.Job objects representing each scheduled job will be put in
    930         |self._jobs|.
    931 
    932         @param record: A callable to use for logging.
    933                        prototype: record(base_job.status_log_entry)
    934         @param add_experimental: schedule experimental tests as well, or not.
    935         @returns: The number of tests that were scheduled.
    936         """
    937         scheduled_test_names = []
    938         discoverer = _DynamicSuiteDiscoverer(
    939                 tests=self.tests,
    940                 add_experimental=add_experimental)
    941         logging.debug('Discovered %d stable tests.',
    942                       len(discoverer.stable_tests))
    943         logging.debug('Discovered %d unstable tests.',
    944                       len(discoverer.unstable_tests))
    945 
    946         Status('INFO', 'Start %s' % self._tag).record_result(record)
    947         try:
    948             # Write job_keyvals into keyval file.
    949             if self._job_keyvals:
    950                 utils.write_keyval(self._results_dir, self._job_keyvals)
    951 
    952             for test in discoverer.discover_tests():
    953                 scheduled_job = self._schedule_test(record, test)
    954                 if scheduled_job is not None:
    955                     scheduled_test_names.append(test.name)
    956 
    957             # Write the num of scheduled tests and name of them to keyval file.
    958             logging.debug('Scheduled %d tests, writing the total to keyval.',
    959                           len(scheduled_test_names))
    960             utils.write_keyval(
    961                 self._results_dir,
    962                 self._make_scheduled_tests_keyvals(scheduled_test_names))
    963         except Exception:  # pylint: disable=W0703
    964             logging.exception('Exception while scheduling suite')
    965             Status('FAIL', self._tag,
    966                    'Exception while scheduling suite').record_result(record)
    967 
    968         if self._job_retry:
    969             self._retry_handler = RetryHandler(
    970                     initial_jobs_to_tests=self._jobs_to_tests,
    971                     max_retries=self._max_retries)
    972         return len(scheduled_test_names)
    973 
    974 
    975     def _make_scheduled_tests_keyvals(self, scheduled_test_names):
    976         """Make a keyvals dict to write for scheduled test names.
    977 
    978         @param scheduled_test_names: A list of scheduled test name strings.
    979 
    980         @returns: A keyvals dict.
    981         """
    982         return {
    983             constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
    984             constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
    985         }
    986 
    987 
    988     def _should_report(self, result):
    989         """
    990         Returns True if this failure requires to be reported.
    991 
    992         @param result: A result, encapsulating the status of the failed job.
    993         @return: True if we should report this failure.
    994         """
    995         if self._has_retry(result):
    996             return False
    997 
    998         is_not_experimental = (
    999             constants.EXPERIMENTAL_PREFIX not in result._test_name and
   1000             constants.EXPERIMENTAL_PREFIX not in result._job_name)
   1001 
   1002         return (self._file_bugs and result.test_executed and
   1003                 (is_not_experimental or self._file_experimental_bugs) and
   1004                 not result.is_testna() and
   1005                 result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
   1006 
   1007 
   1008     def _has_retry(self, result):
   1009         """
   1010         Return True if this result gets to retry.
   1011 
   1012         @param result: A result, encapsulating the status of the failed job.
   1013         @return: bool
   1014         """
   1015         return (self._job_retry
   1016                 and self._retry_handler.has_following_retry(result))
   1017 
   1018 
   1019     def wait(self, record, bug_template=None):
   1020         """
   1021         Polls for the job statuses, using |record| to print status when each
   1022         completes.
   1023 
   1024         @param record: callable that records job status.
   1025                  prototype:
   1026                    record(base_job.status_log_entry)
   1027         @param bug_template: A template dictionary specifying the default bug
   1028                              filing options for failures in this suite.
   1029         """
   1030         # reporting modules have dependency on external packages, e.g., httplib2
   1031         # Such dependency can cause issue to any module tries to import suite.py
   1032         # without building site-packages first. Since the reporting modules are
   1033         # only used in this function, move the imports here avoid the
   1034         # requirement of building site packages to use other functions in this
   1035         # module.
   1036         from autotest_lib.server.cros.dynamic_suite import reporting
   1037 
   1038         if bug_template is None:
   1039             bug_template = {}
   1040 
   1041         if self._file_bugs:
   1042             bug_reporter = reporting.Reporter()
   1043         else:
   1044             bug_reporter = reporting.NullReporter()
   1045         try:
   1046             if self._suite_job_id:
   1047                 results_generator = job_status.wait_for_child_results(
   1048                         self._afe, self._tko, self._suite_job_id)
   1049             else:
   1050                 logging.warning('Unknown suite_job_id, falling back to less '
   1051                                 'efficient results_generator.')
   1052                 results_generator = job_status.wait_for_results(self._afe,
   1053                                                                 self._tko,
   1054                                                                 self._jobs)
   1055             for result in results_generator:
   1056                 self._record_result(
   1057                     result=result,
   1058                     record=record,
   1059                     results_generator=results_generator,
   1060                     bug_reporter=bug_reporter,
   1061                     bug_template=bug_template)
   1062 
   1063         except Exception:  # pylint: disable=W0703
   1064             logging.exception('Exception waiting for results')
   1065             Status('FAIL', self._tag,
   1066                    'Exception waiting for results').record_result(record)
   1067 
   1068 
   1069     def _record_result(self, result, record, results_generator, bug_reporter,
   1070                          bug_template):
   1071         """
   1072         Record a single test job result.
   1073 
   1074         @param result: Status instance for job.
   1075         @param record: callable that records job status.
   1076                  prototype:
   1077                    record(base_job.status_log_entry)
   1078         @param results_generator: Results generator for sending job retries.
   1079         @param bug_reporter: Reporter instance for reporting bugs.
   1080         @param bug_template: A template dictionary specifying the default bug
   1081                              filing options for failures in this suite.
   1082         """
   1083         result.record_all(record)
   1084         self._remember_job_keyval(result)
   1085 
   1086         if self._has_retry(result):
   1087             new_job = self._schedule_test(
   1088                     record=record, test=self._jobs_to_tests[result.id],
   1089                     retry_for=result.id, ignore_errors=True)
   1090             if new_job:
   1091                 results_generator.send([new_job])
   1092 
   1093         # TODO (fdeng): If the suite times out before a retry could
   1094         # finish, we would lose the chance to file a bug for the
   1095         # original job.
   1096         if self._should_report(result):
   1097             if self._should_file_bugs:
   1098                 self._file_bug(result, bug_reporter, bug_template)
   1099             else:
   1100                 # reporting modules have dependency on external
   1101                 # packages, e.g., httplib2 Such dependency can cause
   1102                 # issue to any module tries to import suite.py without
   1103                 # building site-packages first. Since the reporting
   1104                 # modules are only used in this function, move the
   1105                 # imports here avoid the requirement of building site
   1106                 # packages to use other functions in this module.
   1107                 from autotest_lib.server.cros.dynamic_suite import reporting
   1108 
   1109                 reporting.send_email(
   1110                         self._get_test_bug(result),
   1111                         self._get_bug_template(result, bug_template))
   1112 
   1113 
   1114     def _get_bug_template(self, result, bug_template):
   1115         """Get BugTemplate for test job.
   1116 
   1117         @param result: Status instance for job.
   1118         @param bug_template: A template dictionary specifying the default bug
   1119                              filing options for failures in this suite.
   1120         @returns: BugTemplate instance
   1121         """
   1122         # reporting modules have dependency on external packages, e.g., httplib2
   1123         # Such dependency can cause issue to any module tries to import suite.py
   1124         # without building site-packages first. Since the reporting modules are
   1125         # only used in this function, move the imports here avoid the
   1126         # requirement of building site packages to use other functions in this
   1127         # module.
   1128         from autotest_lib.server.cros.dynamic_suite import reporting_utils
   1129 
   1130         # Try to merge with bug template in test control file.
   1131         template = reporting_utils.BugTemplate(bug_template)
   1132         try:
   1133             test_data = self._jobs_to_tests[result.id]
   1134             return template.finalize_bug_template(
   1135                     test_data.bug_template)
   1136         except AttributeError:
   1137             # Test control file does not have bug template defined.
   1138             return template.bug_template
   1139         except reporting_utils.InvalidBugTemplateException as e:
   1140             logging.error('Merging bug templates failed with '
   1141                           'error: %s An empty bug template will '
   1142                           'be used.', e)
   1143             return {}
   1144 
   1145 
   1146     def _get_test_bug(self, result):
   1147         """Get TestBug for the given result.
   1148 
   1149         @param result: Status instance for a test job.
   1150         @returns: TestBug instance.
   1151         """
   1152         # reporting modules have dependency on external packages, e.g., httplib2
   1153         # Such dependency can cause issue to any module tries to import suite.py
   1154         # without building site-packages first. Since the reporting modules are
   1155         # only used in this function, move the imports here avoid the
   1156         # requirement of building site packages to use other functions in this
   1157         # module.
   1158         from autotest_lib.server.cros.dynamic_suite import reporting
   1159 
   1160         job_views = self._tko.run('get_detailed_test_views',
   1161                                   afe_job_id=result.id)
   1162         return reporting.TestBug(self._cros_build,
   1163                 site_utils.get_chrome_version(job_views),
   1164                 self._tag,
   1165                 result)
   1166 
   1167 
   1168     @property
   1169     def _should_file_bugs(self):
   1170         """Return whether bugs should be filed.
   1171 
   1172         @returns: bool
   1173         """
   1174         # File bug when failure is one of the _FILE_BUG_SUITES,
   1175         # otherwise send an email to the owner anc cc.
   1176         return self._tag in _FILE_BUG_SUITES
   1177 
   1178 
   1179     def _file_bug(self, result, bug_reporter, bug_template):
   1180         """File a bug for a test job result.
   1181 
   1182         @param result: Status instance for job.
   1183         @param bug_reporter: Reporter instance for reporting bugs.
   1184         @param bug_template: A template dictionary specifying the default bug
   1185                              filing options for failures in this suite.
   1186         """
   1187         bug_id, bug_count = bug_reporter.report(
   1188                 self._get_test_bug(result),
   1189                 self._get_bug_template(result, bug_template))
   1190 
   1191         # We use keyvals to communicate bugs filed with run_suite.
   1192         if bug_id is not None:
   1193             bug_keyvals = tools.create_bug_keyvals(
   1194                     result.id, result.test_name,
   1195                     (bug_id, bug_count))
   1196             try:
   1197                 utils.write_keyval(self._results_dir,
   1198                                    bug_keyvals)
   1199             except ValueError:
   1200                 logging.error('Unable to log bug keyval for:%s',
   1201                               result.test_name)
   1202 
   1203 
   1204     def abort(self):
   1205         """
   1206         Abort all scheduled test jobs.
   1207         """
   1208         if self._jobs:
   1209             job_ids = [job.id for job in self._jobs]
   1210             self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
   1211 
   1212 
   1213     def _remember_job_keyval(self, job):
   1214         """
   1215         Record provided job as a suite job keyval, for later referencing.
   1216 
   1217         @param job: some representation of a job that has the attributes:
   1218                     id, test_name, and owner
   1219         """
   1220         if self._results_dir and job.id and job.owner and job.test_name:
   1221             job_id_owner = '%s-%s' % (job.id, job.owner)
   1222             logging.debug('Adding job keyval for %s=%s',
   1223                           job.test_name, job_id_owner)
   1224             utils.write_keyval(
   1225                 self._results_dir,
   1226                 {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
   1227 
   1228 
   1229     @staticmethod
   1230     def _find_all_tests(cf_getter, suite_name='', add_experimental=False,
   1231                         forgiving_parser=True, run_prod_code=False,
   1232                         test_args=None):
   1233         """
   1234         Function to scan through all tests and find all tests.
   1235 
   1236         When this method is called with a file system ControlFileGetter, or
   1237         enable_controls_in_batch is set as false, this function will looks at
   1238         control files returned by cf_getter.get_control_file_list() for tests.
   1239 
   1240         If cf_getter is a File system ControlFileGetter, it performs a full
   1241         parse of the root directory associated with the getter. This is the
   1242         case when it's invoked from suite_preprocessor.
   1243 
   1244         If cf_getter is a devserver getter it looks up the suite_name in a
   1245         suite to control file map generated at build time, and parses the
   1246         relevant control files alone. This lookup happens on the devserver,
   1247         so as far as this method is concerned, both cases are equivalent. If
   1248         enable_controls_in_batch is switched on, this function will call
   1249         cf_getter.get_suite_info() to get a dict of control files and contents
   1250         in batch.
   1251 
   1252         @param cf_getter: a control_file_getter.ControlFileGetter used to list
   1253                and fetch the content of control files
   1254         @param suite_name: If specified, this method will attempt to restrain
   1255                            the search space to just this suite's control files.
   1256         @param add_experimental: add tests with experimental attribute set.
   1257         @param forgiving_parser: If False, will raise ControlVariableExceptions
   1258                                  if any are encountered when parsing control
   1259                                  files. Note that this can raise an exception
   1260                                  for syntax errors in unrelated files, because
   1261                                  we parse them before applying the predicate.
   1262         @param run_prod_code: If true, the suite will run the test code that
   1263                               lives in prod aka the test code currently on the
   1264                               lab servers by disabling SSP for the discovered
   1265                               tests.
   1266         @param test_args: A dict of args to be seeded in test control file under
   1267                           the name |args_dict|.
   1268 
   1269         @raises ControlVariableException: If forgiving_parser is False and there
   1270                                           is a syntax error in a control file.
   1271 
   1272         @returns a dictionary of ControlData objects that based on given
   1273                  parameters.
   1274         """
   1275         logging.debug('Getting control file list for suite: %s', suite_name)
   1276         tests = {}
   1277         use_batch = (ENABLE_CONTROLS_IN_BATCH and hasattr(
   1278                 cf_getter, '_dev_server'))
   1279         if use_batch:
   1280             suite_info = cf_getter.get_suite_info(suite_name=suite_name)
   1281             files = suite_info.keys()
   1282         else:
   1283             files = cf_getter.get_control_file_list(suite_name=suite_name)
   1284 
   1285 
   1286         logging.debug('Parsing control files ...')
   1287         matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
   1288         for file in filter(lambda f: not matcher.match(f), files):
   1289             if use_batch:
   1290                 text = suite_info[file]
   1291             else:
   1292                 text = cf_getter.get_control_file_contents(file)
   1293             # Seed test_args into the control file.
   1294             if test_args:
   1295                 text = tools.inject_vars(test_args, text)
   1296             try:
   1297                 found_test = control_data.parse_control_string(
   1298                         text, raise_warnings=True, path=file)
   1299                 if not add_experimental and found_test.experimental:
   1300                     continue
   1301                 found_test.text = text
   1302                 if run_prod_code:
   1303                     found_test.require_ssp = False
   1304                 tests[file] = found_test
   1305             except control_data.ControlVariableException, e:
   1306                 if not forgiving_parser:
   1307                     msg = "Failed parsing %s\n%s" % (file, e)
   1308                     raise control_data.ControlVariableException(msg)
   1309                 logging.warning("Skipping %s\n%s", file, e)
   1310             except Exception, e:
   1311                 logging.error("Bad %s\n%s", file, e)
   1312         return tests
   1313 
   1314 
   1315     @classmethod
   1316     def find_and_parse_tests(cls, cf_getter, predicate, suite_name='',
   1317                              add_experimental=False, forgiving_parser=True,
   1318                              run_prod_code=False, test_args=None):
   1319         """
   1320         Function to scan through all tests and find eligible tests.
   1321 
   1322         Search through all tests based on given cf_getter, suite_name,
   1323         add_experimental and forgiving_parser, return the tests that match
   1324         given predicate.
   1325 
   1326         @param cf_getter: a control_file_getter.ControlFileGetter used to list
   1327                and fetch the content of control files
   1328         @param predicate: a function that should return True when run over a
   1329                ControlData representation of a control file that should be in
   1330                this Suite.
   1331         @param suite_name: If specified, this method will attempt to restrain
   1332                            the search space to just this suite's control files.
   1333         @param add_experimental: add tests with experimental attribute set.
   1334         @param forgiving_parser: If False, will raise ControlVariableExceptions
   1335                                  if any are encountered when parsing control
   1336                                  files. Note that this can raise an exception
   1337                                  for syntax errors in unrelated files, because
   1338                                  we parse them before applying the predicate.
   1339         @param run_prod_code: If true, the suite will run the test code that
   1340                               lives in prod aka the test code currently on the
   1341                               lab servers by disabling SSP for the discovered
   1342                               tests.
   1343         @param test_args: A dict of args to be seeded in test control file.
   1344 
   1345         @raises ControlVariableException: If forgiving_parser is False and there
   1346                                           is a syntax error in a control file.
   1347 
   1348         @return list of ControlData objects that should be run, with control
   1349                 file text added in |text| attribute. Results are sorted based
   1350                 on the TIME setting in control file, slowest test comes first.
   1351         """
   1352         tests = cls._find_all_tests(cf_getter, suite_name, add_experimental,
   1353                                     forgiving_parser,
   1354                                     run_prod_code=run_prod_code,
   1355                                     test_args=test_args)
   1356         logging.debug('Parsed %s control files.', len(tests))
   1357         tests = [test for test in tests.itervalues() if predicate(test)]
   1358         tests.sort(key=lambda t:
   1359                    control_data.ControlData.get_test_time_index(t.time),
   1360                    reverse=True)
   1361         return tests
   1362 
   1363 
   1364     @classmethod
   1365     def find_possible_tests(cls, cf_getter, predicate, suite_name='', count=10):
   1366         """
   1367         Function to scan through all tests and find possible tests.
   1368 
   1369         Search through all tests based on given cf_getter, suite_name,
   1370         add_experimental and forgiving_parser. Use the given predicate to
   1371         calculate the similarity and return the top 10 matches.
   1372 
   1373         @param cf_getter: a control_file_getter.ControlFileGetter used to list
   1374                and fetch the content of control files
   1375         @param predicate: a function that should return a tuple of (name, ratio)
   1376                when run over a ControlData representation of a control file that
   1377                should be in this Suite. `name` is the key to be compared, e.g.,
   1378                a suite name or test name. `ratio` is a value between [0,1]
   1379                indicating the similarity of `name` and the value to be compared.
   1380         @param suite_name: If specified, this method will attempt to restrain
   1381                            the search space to just this suite's control files.
   1382         @param count: Number of suggestions to return, default to 10.
   1383 
   1384         @return list of top names that similar to the given test, sorted by
   1385                 match ratio.
   1386         """
   1387         tests = cls._find_all_tests(cf_getter, suite_name,
   1388                                     add_experimental=True,
   1389                                     forgiving_parser=True)
   1390         logging.debug('Parsed %s control files.', len(tests))
   1391         similarities = {}
   1392         for test in tests.itervalues():
   1393             ratios = predicate(test)
   1394             # Some predicates may return a list of tuples, e.g.,
   1395             # name_in_tag_similarity_predicate. Convert all returns to a list.
   1396             if not isinstance(ratios, list):
   1397                 ratios = [ratios]
   1398             for name, ratio in ratios:
   1399                 similarities[name] = ratio
   1400         return [s[0] for s in
   1401                 sorted(similarities.items(), key=operator.itemgetter(1),
   1402                        reverse=True)][:count]
   1403 
   1404 
   1405 def _is_nonexistent_board_error(e):
   1406     """Return True if error is caused by nonexistent board label.
   1407 
   1408     As of this writing, the particular case we want looks like this:
   1409 
   1410      1) e.problem_keys is a dictionary
   1411      2) e.problem_keys['meta_hosts'] exists as the only key
   1412         in the dictionary.
   1413      3) e.problem_keys['meta_hosts'] matches this pattern:
   1414         "Label "board:.*" not found"
   1415 
   1416     We check for conditions 1) and 2) on the
   1417     theory that they're relatively immutable.
   1418     We don't check condition 3) because it seems
   1419     likely to be a maintenance burden, and for the
   1420     times when we're wrong, being right shouldn't
   1421     matter enough (we _hope_).
   1422 
   1423     @param e: proxy.ValidationError instance
   1424     @returns: boolean
   1425     """
   1426     return (isinstance(e.problem_keys, dict)
   1427             and len(e.problem_keys) == 1
   1428             and 'meta_hosts' in e.problem_keys)
   1429