Home | History | Annotate | Download | only in server
      1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 
      6 import contextlib
      7 import grp
      8 import httplib
      9 import json
     10 import logging
     11 import os
     12 import random
     13 import re
     14 import time
     15 import traceback
     16 import urllib2
     17 
     18 import common
     19 from autotest_lib.client.bin.result_tools import utils as result_utils
     20 from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
     21 from autotest_lib.client.bin.result_tools import view as result_view
     22 from autotest_lib.client.common_lib import utils
     23 from autotest_lib.client.common_lib import error
     24 from autotest_lib.client.common_lib import file_utils
     25 from autotest_lib.client.common_lib import global_config
     26 from autotest_lib.client.common_lib import host_queue_entry_states
     27 from autotest_lib.client.common_lib import host_states
     28 from autotest_lib.server.cros import provision
     29 from autotest_lib.server.cros.dynamic_suite import constants
     30 from autotest_lib.server.cros.dynamic_suite import job_status
     31 
     32 try:
     33     from chromite.lib import metrics
     34 except ImportError:
     35     metrics = utils.metrics_mock
     36 
     37 
     38 CONFIG = global_config.global_config
     39 
     40 _SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
     41 _LAB_SHERIFF_JS = CONFIG.get_config_value(
     42         'NOTIFICATIONS', 'lab_sheriffs', default='')
     43 _CHROMIUM_BUILD_URL = CONFIG.get_config_value(
     44         'NOTIFICATIONS', 'chromium_build_url', default='')
     45 
     46 LAB_GOOD_STATES = ('open', 'throttled')
     47 
     48 ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
     49         'CROS', 'enable_drone_in_restricted_subnet', type=bool,
     50         default=False)
     51 
     52 # Wait at most 10 mins for duts to go idle.
     53 IDLE_DUT_WAIT_TIMEOUT = 600
     54 
     55 # Mapping between board name and build target. This is for special case handling
     56 # for certain Android board that the board name and build target name does not
     57 # match.
     58 ANDROID_TARGET_TO_BOARD_MAP = {
     59         'seed_l8150': 'gm4g_sprout',
     60         'bat_land': 'bat'
     61         }
     62 ANDROID_BOARD_TO_TARGET_MAP = {
     63         'gm4g_sprout': 'seed_l8150',
     64         'bat': 'bat_land'
     65         }
     66 # Prefix for the metrics name for result size information.
     67 RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
     68 
     69 class TestLabException(Exception):
     70     """Exception raised when the Test Lab blocks a test or suite."""
     71     pass
     72 
     73 
     74 class ParseBuildNameException(Exception):
     75     """Raised when ParseBuildName() cannot parse a build name."""
     76     pass
     77 
     78 
     79 class Singleton(type):
     80     """Enforce that only one client class is instantiated per process."""
     81     _instances = {}
     82 
     83     def __call__(cls, *args, **kwargs):
     84         """Fetch the instance of a class to use for subsequent calls."""
     85         if cls not in cls._instances:
     86             cls._instances[cls] = super(Singleton, cls).__call__(
     87                     *args, **kwargs)
     88         return cls._instances[cls]
     89 
     90 class EmptyAFEHost(object):
     91     """Object to represent an AFE host object when there is no AFE."""
     92 
     93     def __init__(self):
     94         """
     95         We'll be setting the instance attributes as we use them.  Right now
     96         we only use attributes and labels but as time goes by and other
     97         attributes are used from an actual AFE Host object (check
     98         rpc_interfaces.get_hosts()), we'll add them in here so users won't be
     99         perplexed why their host's afe_host object complains that attribute
    100         doesn't exist.
    101         """
    102         self.attributes = {}
    103         self.labels = []
    104 
    105 
    106 def ParseBuildName(name):
    107     """Format a build name, given board, type, milestone, and manifest num.
    108 
    109     @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
    110                  relative build name, e.g. 'x86-alex-release/LATEST'
    111 
    112     @return board: board the manifest is for, e.g. x86-alex.
    113     @return type: one of 'release', 'factory', or 'firmware'
    114     @return milestone: (numeric) milestone the manifest was associated with.
    115                         Will be None for relative build names.
    116     @return manifest: manifest number, e.g. '2015.0.0'.
    117                       Will be None for relative build names.
    118 
    119     """
    120     match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
    121                      r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
    122                      r'(?P<manifest>[\d.ab-]+)|LATEST)',
    123                      name)
    124     if match and len(match.groups()) >= 5:
    125         return (match.group('board'), match.group('type'),
    126                 match.group('milestone'), match.group('manifest'))
    127     raise ParseBuildNameException('%s is a malformed build name.' % name)
    128 
    129 
    130 def get_labels_from_afe(hostname, label_prefix, afe):
    131     """Retrieve a host's specific labels from the AFE.
    132 
    133     Looks for the host labels that have the form <label_prefix>:<value>
    134     and returns the "<value>" part of the label. None is returned
    135     if there is not a label matching the pattern
    136 
    137     @param hostname: hostname of given DUT.
    138     @param label_prefix: prefix of label to be matched, e.g., |board:|
    139     @param afe: afe instance.
    140 
    141     @returns A list of labels that match the prefix or 'None'
    142 
    143     """
    144     labels = afe.get_labels(name__startswith=label_prefix,
    145                             host__hostname__in=[hostname])
    146     if labels:
    147         return [l.name.split(label_prefix, 1)[1] for l in labels]
    148 
    149 
    150 def get_label_from_afe(hostname, label_prefix, afe):
    151     """Retrieve a host's specific label from the AFE.
    152 
    153     Looks for a host label that has the form <label_prefix>:<value>
    154     and returns the "<value>" part of the label. None is returned
    155     if there is not a label matching the pattern
    156 
    157     @param hostname: hostname of given DUT.
    158     @param label_prefix: prefix of label to be matched, e.g., |board:|
    159     @param afe: afe instance.
    160     @returns the label that matches the prefix or 'None'
    161 
    162     """
    163     labels = get_labels_from_afe(hostname, label_prefix, afe)
    164     if labels and len(labels) == 1:
    165         return labels[0]
    166 
    167 
    168 def get_board_from_afe(hostname, afe):
    169     """Retrieve given host's board from its labels in the AFE.
    170 
    171     Looks for a host label of the form "board:<board>", and
    172     returns the "<board>" part of the label.  `None` is returned
    173     if there is not a single, unique label matching the pattern.
    174 
    175     @param hostname: hostname of given DUT.
    176     @param afe: afe instance.
    177     @returns board from label, or `None`.
    178 
    179     """
    180     return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
    181 
    182 
    183 def get_build_from_afe(hostname, afe):
    184     """Retrieve the current build for given host from the AFE.
    185 
    186     Looks through the host's labels in the AFE to determine its build.
    187 
    188     @param hostname: hostname of given DUT.
    189     @param afe: afe instance.
    190     @returns The current build or None if it could not find it or if there
    191              were multiple build labels assigned to this host.
    192 
    193     """
    194     for prefix in [provision.CROS_VERSION_PREFIX,
    195                    provision.ANDROID_BUILD_VERSION_PREFIX]:
    196         build = get_label_from_afe(hostname, prefix + ':', afe)
    197         if build:
    198             return build
    199     return None
    200 
    201 
    202 # TODO(fdeng): fix get_sheriffs crbug.com/483254
    203 def get_sheriffs(lab_only=False):
    204     """
    205     Polls the javascript file that holds the identity of the sheriff and
    206     parses it's output to return a list of chromium sheriff email addresses.
    207     The javascript file can contain the ldap of more than one sheriff, eg:
    208     document.write('sheriff_one, sheriff_two').
    209 
    210     @param lab_only: if True, only pulls lab sheriff.
    211     @return: A list of chroium.org sheriff email addresses to cc on the bug.
    212              An empty list if failed to parse the javascript.
    213     """
    214     sheriff_ids = []
    215     sheriff_js_list = _LAB_SHERIFF_JS.split(',')
    216     if not lab_only:
    217         sheriff_js_list.extend(_SHERIFF_JS.split(','))
    218 
    219     for sheriff_js in sheriff_js_list:
    220         try:
    221             url_content = utils.urlopen('%s%s'% (
    222                 _CHROMIUM_BUILD_URL, sheriff_js)).read()
    223         except (ValueError, IOError) as e:
    224             logging.warning('could not parse sheriff from url %s%s: %s',
    225                              _CHROMIUM_BUILD_URL, sheriff_js, str(e))
    226         except (urllib2.URLError, httplib.HTTPException) as e:
    227             logging.warning('unexpected error reading from url "%s%s": %s',
    228                              _CHROMIUM_BUILD_URL, sheriff_js, str(e))
    229         else:
    230             ldaps = re.search(r"document.write\('(.*)'\)", url_content)
    231             if not ldaps:
    232                 logging.warning('Could not retrieve sheriff ldaps for: %s',
    233                                  url_content)
    234                 continue
    235             sheriff_ids += ['%s (at] chromium.org' % alias.replace(' ', '')
    236                             for alias in ldaps.group(1).split(',')]
    237     return sheriff_ids
    238 
    239 
    240 def remote_wget(source_url, dest_path, ssh_cmd):
    241     """wget source_url from localhost to dest_path on remote host using ssh.
    242 
    243     @param source_url: The complete url of the source of the package to send.
    244     @param dest_path: The path on the remote host's file system where we would
    245         like to store the package.
    246     @param ssh_cmd: The ssh command to use in performing the remote wget.
    247     """
    248     wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
    249                 (source_url, ssh_cmd, dest_path))
    250     utils.run(wget_cmd)
    251 
    252 
    253 _MAX_LAB_STATUS_ATTEMPTS = 5
    254 def _get_lab_status(status_url):
    255     """Grabs the current lab status and message.
    256 
    257     @returns The JSON object obtained from the given URL.
    258 
    259     """
    260     retry_waittime = 1
    261     for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
    262         try:
    263             response = urllib2.urlopen(status_url)
    264         except IOError as e:
    265             logging.debug('Error occurred when grabbing the lab status: %s.',
    266                           e)
    267             time.sleep(retry_waittime)
    268             continue
    269         # Check for successful response code.
    270         if response.getcode() == 200:
    271             return json.load(response)
    272         time.sleep(retry_waittime)
    273     return None
    274 
    275 
    276 def _decode_lab_status(lab_status, build):
    277     """Decode lab status, and report exceptions as needed.
    278 
    279     Take a deserialized JSON object from the lab status page, and
    280     interpret it to determine the actual lab status.  Raise
    281     exceptions as required to report when the lab is down.
    282 
    283     @param build: build name that we want to check the status of.
    284 
    285     @raises TestLabException Raised if a request to test for the given
    286                              status and build should be blocked.
    287     """
    288     # First check if the lab is up.
    289     if not lab_status['general_state'] in LAB_GOOD_STATES:
    290         raise TestLabException('Chromium OS Test Lab is closed: '
    291                                '%s.' % lab_status['message'])
    292 
    293     # Check if the build we wish to use is disabled.
    294     # Lab messages should be in the format of:
    295     #    Lab is 'status' [regex ...] (comment)
    296     # If the build name matches any regex, it will be blocked.
    297     build_exceptions = re.search('\[(.*)\]', lab_status['message'])
    298     if not build_exceptions or not build:
    299         return
    300     for build_pattern in build_exceptions.group(1).split():
    301         if re.match(build_pattern, build):
    302             raise TestLabException('Chromium OS Test Lab is closed: '
    303                                    '%s matches %s.' % (
    304                                            build, build_pattern))
    305     return
    306 
    307 
    308 def is_in_lab():
    309     """Check if current Autotest instance is in lab
    310 
    311     @return: True if the Autotest instance is in lab.
    312     """
    313     test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
    314     return test_server_name.startswith('cautotest')
    315 
    316 
    317 def check_lab_status(build):
    318     """Check if the lab status allows us to schedule for a build.
    319 
    320     Checks if the lab is down, or if testing for the requested build
    321     should be blocked.
    322 
    323     @param build: Name of the build to be scheduled for testing.
    324 
    325     @raises TestLabException Raised if a request to test for the given
    326                              status and build should be blocked.
    327 
    328     """
    329     # Ensure we are trying to schedule on the actual lab.
    330     if not is_in_lab():
    331         return
    332 
    333     # Download the lab status from its home on the web.
    334     status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
    335     json_status = _get_lab_status(status_url)
    336     if json_status is None:
    337         # We go ahead and say the lab is open if we can't get the status.
    338         logging.warning('Could not get a status from %s', status_url)
    339         return
    340     _decode_lab_status(json_status, build)
    341 
    342 
    343 def lock_host_with_labels(afe, lock_manager, labels):
    344     """Lookup and lock one host that matches the list of input labels.
    345 
    346     @param afe: An instance of the afe class, as defined in server.frontend.
    347     @param lock_manager: A lock manager capable of locking hosts, eg the
    348         one defined in server.cros.host_lock_manager.
    349     @param labels: A list of labels to look for on hosts.
    350 
    351     @return: The hostname of a host matching all labels, and locked through the
    352         lock_manager. The hostname will be as specified in the database the afe
    353         object is associated with, i.e if it exists in afe_hosts with a .cros
    354         suffix, the hostname returned will contain a .cros suffix.
    355 
    356     @raises: error.NoEligibleHostException: If no hosts matching the list of
    357         input labels are available.
    358     @raises: error.TestError: If unable to lock a host matching the labels.
    359     """
    360     potential_hosts = afe.get_hosts(multiple_labels=labels)
    361     if not potential_hosts:
    362         raise error.NoEligibleHostException(
    363                 'No devices found with labels %s.' % labels)
    364 
    365     # This prevents errors where a fault might seem repeatable
    366     # because we lock, say, the same packet capturer for each test run.
    367     random.shuffle(potential_hosts)
    368     for host in potential_hosts:
    369         if lock_manager.lock([host.hostname]):
    370             logging.info('Locked device %s with labels %s.',
    371                          host.hostname, labels)
    372             return host.hostname
    373         else:
    374             logging.info('Unable to lock device %s with labels %s.',
    375                          host.hostname, labels)
    376 
    377     raise error.TestError('Could not lock a device with labels %s' % labels)
    378 
    379 
    380 def get_test_views_from_tko(suite_job_id, tko):
    381     """Get test name and result for given suite job ID.
    382 
    383     @param suite_job_id: ID of suite job.
    384     @param tko: an instance of TKO as defined in server/frontend.py.
    385     @return: A dictionary of test status keyed by test name, e.g.,
    386              {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'}
    387     @raise: Exception when there is no test view found.
    388 
    389     """
    390     views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
    391     relevant_views = filter(job_status.view_is_relevant, views)
    392     if not relevant_views:
    393         raise Exception('Failed to retrieve job results.')
    394 
    395     test_views = {}
    396     for view in relevant_views:
    397         test_views[view['test_name']] = view['status']
    398 
    399     return test_views
    400 
    401 
    402 def get_data_key(prefix, suite, build, board):
    403     """
    404     Constructs a key string from parameters.
    405 
    406     @param prefix: Prefix for the generating key.
    407     @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
    408     @param build: The build string. This string should have a consistent
    409         format eg: x86-mario-release/R26-3570.0.0. If the format of this
    410         string changes such that we can't determine build_type or branch
    411         we give up and use the parametes we're sure of instead (suite,
    412         board). eg:
    413             1. build = x86-alex-pgo-release/R26-3570.0.0
    414                branch = 26
    415                build_type = pgo-release
    416             2. build = lumpy-paladin/R28-3993.0.0-rc5
    417                branch = 28
    418                build_type = paladin
    419     @param board: The board that this suite ran on.
    420     @return: The key string used for a dictionary.
    421     """
    422     try:
    423         _board, build_type, branch = ParseBuildName(build)[:3]
    424     except ParseBuildNameException as e:
    425         logging.error(str(e))
    426         branch = 'Unknown'
    427         build_type = 'Unknown'
    428     else:
    429         embedded_str = re.search(r'x86-\w+-(.*)', _board)
    430         if embedded_str:
    431             build_type = embedded_str.group(1) + '-' + build_type
    432 
    433     data_key_dict = {
    434         'prefix': prefix,
    435         'board': board,
    436         'branch': branch,
    437         'build_type': build_type,
    438         'suite': suite,
    439     }
    440     return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
    441             % data_key_dict)
    442 
    443 
    444 def setup_logging(logfile=None, prefix=False):
    445     """Setup basic logging with all logging info stripped.
    446 
    447     Calls to logging will only show the message. No severity is logged.
    448 
    449     @param logfile: If specified dump output to a file as well.
    450     @param prefix: Flag for log prefix. Set to True to add prefix to log
    451         entries to include timestamp and log level. Default is False.
    452     """
    453     # Remove all existing handlers. client/common_lib/logging_config adds
    454     # a StreamHandler to logger when modules are imported, e.g.,
    455     # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
    456     # log only messages, not severity.
    457     logging.getLogger().handlers = []
    458 
    459     if prefix:
    460         log_format = '%(asctime)s %(levelname)-5s| %(message)s'
    461     else:
    462         log_format = '%(message)s'
    463 
    464     screen_handler = logging.StreamHandler()
    465     screen_handler.setFormatter(logging.Formatter(log_format))
    466     logging.getLogger().addHandler(screen_handler)
    467     logging.getLogger().setLevel(logging.INFO)
    468     if logfile:
    469         file_handler = logging.FileHandler(logfile)
    470         file_handler.setFormatter(logging.Formatter(log_format))
    471         file_handler.setLevel(logging.DEBUG)
    472         logging.getLogger().addHandler(file_handler)
    473 
    474 
    475 def is_shard():
    476     """Determines if this instance is running as a shard.
    477 
    478     Reads the global_config value shard_hostname in the section SHARD.
    479 
    480     @return True, if shard_hostname is set, False otherwise.
    481     """
    482     hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
    483     return bool(hostname)
    484 
    485 
    486 def get_global_afe_hostname():
    487     """Read the hostname of the global AFE from the global configuration."""
    488     return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
    489 
    490 
    491 def is_restricted_user(username):
    492     """Determines if a user is in a restricted group.
    493 
    494     User in restricted group only have access to master.
    495 
    496     @param username: A string, representing a username.
    497 
    498     @returns: True if the user is in a restricted group.
    499     """
    500     if not username:
    501         return False
    502 
    503     restricted_groups = CONFIG.get_config_value(
    504             'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
    505     for group in restricted_groups:
    506         try:
    507             if group and username in grp.getgrnam(group).gr_mem:
    508                 return True
    509         except KeyError as e:
    510             logging.debug("%s is not a valid group.", group)
    511     return False
    512 
    513 
    514 def get_special_task_status(is_complete, success, is_active):
    515     """Get the status of a special task.
    516 
    517     Emulate a host queue entry status for a special task
    518     Although SpecialTasks are not HostQueueEntries, it is helpful to
    519     the user to present similar statuses.
    520 
    521     @param is_complete    Boolean if the task is completed.
    522     @param success        Boolean if the task succeeded.
    523     @param is_active      Boolean if the task is active.
    524 
    525     @return The status of a special task.
    526     """
    527     if is_complete:
    528         if success:
    529             return host_queue_entry_states.Status.COMPLETED
    530         return host_queue_entry_states.Status.FAILED
    531     if is_active:
    532         return host_queue_entry_states.Status.RUNNING
    533     return host_queue_entry_states.Status.QUEUED
    534 
    535 
    536 def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
    537     """Get the execution path of the SpecialTask.
    538 
    539     This method returns different paths depending on where a
    540     the task ran:
    541         * Master: hosts/hostname/task_id-task_type
    542         * Shard: Master_path/time_created
    543     This is to work around the fact that a shard can fail independent
    544     of the master, and be replaced by another shard that has the same
    545     hosts. Without the time_created stamp the logs of the tasks running
    546     on the second shard will clobber the logs from the first in google
    547     storage, because task ids are not globally unique.
    548 
    549     @param hostname        Hostname
    550     @param task_id         Special task id
    551     @param task_name       Special task name (e.g., Verify, Repair, etc)
    552     @param time_requested  Special task requested time.
    553 
    554     @return An execution path for the task.
    555     """
    556     results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
    557 
    558     # If we do this on the master it will break backward compatibility,
    559     # as there are tasks that currently don't have timestamps. If a host
    560     # or job has been sent to a shard, the rpc for that host/job will
    561     # be redirected to the shard, so this global_config check will happen
    562     # on the shard the logs are on.
    563     if not is_shard():
    564         return results_path
    565 
    566     # Generate a uid to disambiguate special task result directories
    567     # in case this shard fails. The simplest uid is the job_id, however
    568     # in rare cases tasks do not have jobs associated with them (eg:
    569     # frontend verify), so just use the creation timestamp. The clocks
    570     # between a shard and master should always be in sync. Any discrepancies
    571     # will be brought to our attention in the form of job timeouts.
    572     uid = time_requested.strftime('%Y%d%m%H%M%S')
    573 
    574     # TODO: This is a hack, however it is the easiest way to achieve
    575     # correctness. There is currently some debate over the future of
    576     # tasks in our infrastructure and refactoring everything right
    577     # now isn't worth the time.
    578     return '%s/%s' % (results_path, uid)
    579 
    580 
    581 def get_job_tag(id, owner):
    582     """Returns a string tag for a job.
    583 
    584     @param id    Job id
    585     @param owner Job owner
    586 
    587     """
    588     return '%s-%s' % (id, owner)
    589 
    590 
    591 def get_hqe_exec_path(tag, execution_subdir):
    592     """Returns a execution path to a HQE's results.
    593 
    594     @param tag               Tag string for a job associated with a HQE.
    595     @param execution_subdir  Execution sub-directory string of a HQE.
    596 
    597     """
    598     return os.path.join(tag, execution_subdir)
    599 
    600 
    601 def is_inside_chroot():
    602     """Check if the process is running inside chroot.
    603 
    604     This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The
    605     method checks if cros_build_lib can be imported first.
    606 
    607     @return: True if the process is running inside chroot or cros_build_lib
    608              cannot be imported.
    609 
    610     """
    611     try:
    612         # TODO(crbug.com/739466) This module import is delayed because it adds
    613         # 1-2 seconds to the module import time and most users of site_utils
    614         # don't need it. The correct fix is to break apart site_utils into more
    615         # meaningful chunks.
    616         from chromite.lib import cros_build_lib
    617     except ImportError:
    618         logging.warn('Unable to import chromite. Can not detect chroot. '
    619                      'Defaulting to False')
    620         return False
    621     return cros_build_lib.IsInsideChroot()
    622 
    623 
    624 def parse_job_name(name):
    625     """Parse job name to get information including build, board and suite etc.
    626 
    627     Suite job created by run_suite follows the naming convention of:
    628     [build]-test_suites/control.[suite]
    629     For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
    630     The naming convention is defined in rpc_interface.create_suite_job.
    631 
    632     Test job created by suite job follows the naming convention of:
    633     [build]/[suite]/[test name]
    634     For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
    635     The naming convention is defined in
    636     server/cros/dynamic_suite/tools.create_job_name
    637 
    638     Note that pgo and chrome-perf builds will fail the method. Since lab does
    639     not run test for these builds, they can be ignored.
    640     Also, tests for Launch Control builds have different naming convention.
    641     The build ID will be used as build_version.
    642 
    643     @param name: Name of the job.
    644 
    645     @return: A dictionary containing the test information. The keyvals include:
    646              build: Name of the build, e.g., lumpy-release/R46-7272.0.0
    647              build_version: The version of the build, e.g., R46-7272.0.0
    648              board: Name of the board, e.g., lumpy
    649              suite: Name of the test suite, e.g., bvt
    650 
    651     """
    652     info = {}
    653     suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
    654     test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
    655     match = re.match(suite_job_regex, name)
    656     if not match:
    657         match = re.match(test_job_regex, name)
    658     if match:
    659         info['build'] = match.groups()[0]
    660         info['suite'] = match.groups()[1]
    661         info['build_version'] = info['build'].split('/')[1]
    662         try:
    663             info['board'], _, _, _ = ParseBuildName(info['build'])
    664         except ParseBuildNameException:
    665             # Try to parse it as Launch Control build
    666             # Launch Control builds have name format:
    667             # branch/build_target-build_type/build_id.
    668             try:
    669                 _, target, build_id = utils.parse_launch_control_build(
    670                         info['build'])
    671                 build_target, _ = utils.parse_launch_control_target(target)
    672                 if build_target:
    673                     info['board'] = build_target
    674                     info['build_version'] = build_id
    675             except ValueError:
    676                 pass
    677     return info
    678 
    679 
    680 def add_label_detector(label_function_list, label_list=None, label=None):
    681     """Decorator used to group functions together into the provided list.
    682 
    683     This is a helper function to automatically add label functions that have
    684     the label decorator.  This is to help populate the class list of label
    685     functions to be retrieved by the get_labels class method.
    686 
    687     @param label_function_list: List of label detecting functions to add
    688                                 decorated function to.
    689     @param label_list: List of detectable labels to add detectable labels to.
    690                        (Default: None)
    691     @param label: Label string that is detectable by this detection function
    692                   (Default: None)
    693     """
    694     def add_func(func):
    695         """
    696         @param func: The function to be added as a detector.
    697         """
    698         label_function_list.append(func)
    699         if label and label_list is not None:
    700             label_list.append(label)
    701         return func
    702     return add_func
    703 
    704 
    705 def verify_not_root_user():
    706     """Simple function to error out if running with uid == 0"""
    707     if os.getuid() == 0:
    708         raise error.IllegalUser('This script can not be ran as root.')
    709 
    710 
    711 def get_hostname_from_machine(machine):
    712     """Lookup hostname from a machine string or dict.
    713 
    714     @returns: Machine hostname in string format.
    715     """
    716     hostname, _ = get_host_info_from_machine(machine)
    717     return hostname
    718 
    719 
    720 def get_host_info_from_machine(machine):
    721     """Lookup host information from a machine string or dict.
    722 
    723     @returns: Tuple of (hostname, afe_host)
    724     """
    725     if isinstance(machine, dict):
    726         return (machine['hostname'], machine['afe_host'])
    727     else:
    728         return (machine, EmptyAFEHost())
    729 
    730 
    731 def get_afe_host_from_machine(machine):
    732     """Return the afe_host from the machine dict if possible.
    733 
    734     @returns: AFE host object.
    735     """
    736     _, afe_host = get_host_info_from_machine(machine)
    737     return afe_host
    738 
    739 
    740 def get_connection_pool_from_machine(machine):
    741     """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
    742     if not isinstance(machine, dict):
    743         return None
    744     return machine.get('connection_pool')
    745 
    746 
    747 def get_creds_abspath(creds_file):
    748     """Returns the abspath of the credentials file.
    749 
    750     If creds_file is already an absolute path, just return it.
    751     Otherwise, assume it is located in the creds directory
    752     specified in global_config and return the absolute path.
    753 
    754     @param: creds_path, a path to the credentials.
    755     @return: An absolute path to the credentials file.
    756     """
    757     if not creds_file:
    758         return None
    759     if os.path.isabs(creds_file):
    760         return creds_file
    761     creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
    762     if not creds_dir or not os.path.exists(creds_dir):
    763         creds_dir = common.autotest_dir
    764     return os.path.join(creds_dir, creds_file)
    765 
    766 
    767 def machine_is_testbed(machine):
    768     """Checks if the machine is a testbed.
    769 
    770     The signal we use to determine if the machine is a testbed
    771     is if the host attributes contain more than 1 serial.
    772 
    773     @param machine: is a list of dicts
    774 
    775     @return: True if the machine is a testbed, False otherwise.
    776     """
    777     _, afe_host = get_host_info_from_machine(machine)
    778     return len(afe_host.attributes.get('serials', '').split(',')) > 1
    779 
    780 
    781 def SetupTsMonGlobalState(*args, **kwargs):
    782     """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
    783 
    784     @param *args: Args to pass through.
    785     @param **kwargs: Kwargs to pass through.
    786     """
    787     try:
    788         # TODO(crbug.com/739466) This module import is delayed because it adds
    789         # 1-2 seconds to the module import time and most users of site_utils
    790         # don't need it. The correct fix is to break apart site_utils into more
    791         # meaningful chunks.
    792         from chromite.lib import ts_mon_config
    793     except ImportError:
    794         logging.warn('Unable to import chromite. Monarch is disabled.')
    795         return TrivialContextManager()
    796 
    797     try:
    798         context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
    799         if hasattr(context, '__exit__'):
    800             return context
    801     except Exception as e:
    802         logging.warning('Caught an exception trying to setup ts_mon, '
    803                         'monitoring is disabled: %s', e, exc_info=True)
    804     return TrivialContextManager()
    805 
    806 
    807 @contextlib.contextmanager
    808 def TrivialContextManager(*args, **kwargs):
    809     """Context manager that does nothing.
    810 
    811     @param *args: Ignored args
    812     @param **kwargs: Ignored kwargs.
    813     """
    814     yield
    815 
    816 
    817 def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
    818     """Wait for the hosts to all go idle.
    819 
    820     @param duts: List of duts to check for idle state.
    821     @param afe: afe instance.
    822     @param max_wait: Max wait time in seconds to wait for duts to be idle.
    823 
    824     @returns Boolean True if all hosts are idle or False if any hosts did not
    825             go idle within max_wait.
    826     """
    827     start_time = time.time()
    828     # We make a shallow copy since we're going to be modifying active_dut_list.
    829     active_dut_list = duts[:]
    830     while active_dut_list:
    831         # Let's rate-limit how often we hit the AFE.
    832         time.sleep(1)
    833 
    834         # Check if we've waited too long.
    835         if (time.time() - start_time) > max_wait:
    836             return False
    837 
    838         idle_duts = []
    839         # Get the status for the duts and see if they're in the idle state.
    840         afe_hosts = afe.get_hosts(active_dut_list)
    841         idle_duts = [afe_host.hostname for afe_host in afe_hosts
    842                      if afe_host.status in host_states.IDLE_STATES]
    843 
    844         # Take out idle duts so we don't needlessly check them
    845         # next time around.
    846         for idle_dut in idle_duts:
    847             active_dut_list.remove(idle_dut)
    848 
    849         logging.info('still waiting for following duts to go idle: %s',
    850                      active_dut_list)
    851     return True
    852 
    853 
    854 @contextlib.contextmanager
    855 def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
    856                        max_wait=IDLE_DUT_WAIT_TIMEOUT):
    857     """Context manager to lock the duts and wait for them to go idle.
    858 
    859     @param duts: List of duts to lock.
    860     @param afe: afe instance.
    861     @param lock_msg: message for afe on locking this host.
    862     @param max_wait: Max wait time in seconds to wait for duts to be idle.
    863 
    864     @returns Boolean lock_success where True if all duts locked successfully or
    865              False if we timed out waiting too long for hosts to go idle.
    866     """
    867     try:
    868         locked_duts = []
    869         duts.sort()
    870         for dut in duts:
    871             if afe.lock_host(dut, lock_msg, fail_if_locked=True):
    872                 locked_duts.append(dut)
    873             else:
    874                 logging.info('%s already locked', dut)
    875         yield wait_for_idle_duts(locked_duts, afe, max_wait)
    876     finally:
    877         afe.unlock_hosts(locked_duts)
    878 
    879 
    880 def board_labels_allowed(boards):
    881     """Check if the list of board labels can be set to a single host.
    882 
    883     The only case multiple board labels can be set to a single host is for
    884     testbed, which may have a list of board labels like
    885     board:angler-1, board:angler-2, board:angler-3, board:marlin-1'
    886 
    887     @param boards: A list of board labels (may include platform label).
    888 
    889     @returns True if the the list of boards can be set to a single host.
    890     """
    891     # Filter out any non-board labels
    892     boards = [b for b in boards if re.match('board:.*', b)]
    893     if len(boards) <= 1:
    894         return True
    895     for board in boards:
    896         if not re.match('board:[^-]+-\d+', board):
    897             return False
    898     return True
    899 
    900 
    901 def _get_default_size_info(path):
    902     """Get the default result size information.
    903 
    904     In case directory summary is failed to build, assume the test result is not
    905     throttled and all result sizes are the size of existing test results.
    906 
    907     @return: A namedtuple of result size informations, including:
    908             client_result_collected_KB: The total size (in KB) of test results
    909                     collected from test device. Set to be the total size of the
    910                     given path.
    911             original_result_total_KB: The original size (in KB) of test results
    912                     before being trimmed. Set to be the total size of the given
    913                     path.
    914             result_uploaded_KB: The total size (in KB) of test results to be
    915                     uploaded. Set to be the total size of the given path.
    916             result_throttled: True if test results collection is throttled.
    917                     It's set to False in this default behavior.
    918     """
    919     total_size = file_utils.get_directory_size_kibibytes(path);
    920     return result_utils_lib.ResultSizeInfo(
    921             client_result_collected_KB=total_size,
    922             original_result_total_KB=total_size,
    923             result_uploaded_KB=total_size,
    924             result_throttled=False)
    925 
    926 
    927 def _report_result_size_metrics(result_size_info):
    928     """Report result sizes information to metrics.
    929 
    930     @param result_size_info: A ResultSizeInfo namedtuple containing information
    931             of test result sizes.
    932     """
    933     fields = {'result_throttled' : result_size_info.result_throttled}
    934     metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
    935                     description='The total size (in KB) of test results '
    936                     'collected from test device. Set to be the total size of '
    937                     'the given path.'
    938                     ).increment_by(result_size_info.client_result_collected_KB,
    939                                    fields=fields)
    940     metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
    941                     description='The original size (in KB) of test results '
    942                     'before being trimmed.'
    943                     ).increment_by(result_size_info.original_result_total_KB,
    944                                    fields=fields)
    945     metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
    946                     description='The total size (in KB) of test results to be '
    947                     'uploaded.'
    948                     ).increment_by(result_size_info.result_uploaded_KB,
    949                                    fields=fields)
    950 
    951 
    952 @metrics.SecondsTimerDecorator(
    953         'chromeos/autotest/result_collection/collect_result_sizes_duration')
    954 def collect_result_sizes(path, log=logging.debug):
    955     """Collect the result sizes information and build result summary.
    956 
    957     It first tries to merge directory summaries and calculate the result sizes
    958     including:
    959     client_result_collected_KB: The volume in KB that's transfered from the test
    960             device.
    961     original_result_total_KB: The volume in KB that's the original size of the
    962             result files before being trimmed.
    963     result_uploaded_KB: The volume in KB that will be uploaded.
    964     result_throttled: Indicating if the result files were throttled.
    965 
    966     If directory summary merging failed for any reason, fall back to use the
    967     total size of the given result directory.
    968 
    969     @param path: Path of the result directory to get size information.
    970     @param log: The logging method, default to logging.debug
    971     @return: A ResultSizeInfo namedtuple containing information of test result
    972              sizes.
    973     """
    974     try:
    975         client_collected_bytes, summary, files = result_utils.merge_summaries(
    976                 path)
    977         result_size_info = result_utils_lib.get_result_size_info(
    978                 client_collected_bytes, summary)
    979         html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
    980         result_view.build(client_collected_bytes, summary, html_file)
    981 
    982         # Delete all summary files after final view is built.
    983         for summary_file in files:
    984             os.remove(summary_file)
    985     except:
    986         log('Failed to calculate result sizes based on directory summaries for '
    987             'directory %s. Fall back to record the total size.\nException: %s' %
    988             (path, traceback.format_exc()))
    989         result_size_info = _get_default_size_info(path)
    990 
    991     _report_result_size_metrics(result_size_info)
    992 
    993     return result_size_info