Home | History | Annotate | Download | only in server
      1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 
      6 import contextlib
      7 import grp
      8 import httplib
      9 import json
     10 import logging
     11 import os
     12 import random
     13 import re
     14 import time
     15 import traceback
     16 import urllib2
     17 
     18 import common
     19 from autotest_lib.client.bin.result_tools import utils as result_utils
     20 from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
     21 from autotest_lib.client.bin.result_tools import view as result_view
     22 from autotest_lib.client.common_lib import lsbrelease_utils
     23 from autotest_lib.client.common_lib import utils
     24 from autotest_lib.client.common_lib import error
     25 from autotest_lib.client.common_lib import file_utils
     26 from autotest_lib.client.common_lib import global_config
     27 from autotest_lib.client.common_lib import host_queue_entry_states
     28 from autotest_lib.client.common_lib import host_states
     29 from autotest_lib.server.cros import provision
     30 from autotest_lib.server.cros.dynamic_suite import constants
     31 from autotest_lib.server.cros.dynamic_suite import job_status
     32 
     33 try:
     34     from chromite.lib import metrics
     35 except ImportError:
     36     metrics = utils.metrics_mock
     37 
     38 
     39 CONFIG = global_config.global_config
     40 
     41 _SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
     42 _LAB_SHERIFF_JS = CONFIG.get_config_value(
     43         'NOTIFICATIONS', 'lab_sheriffs', default='')
     44 _CHROMIUM_BUILD_URL = CONFIG.get_config_value(
     45         'NOTIFICATIONS', 'chromium_build_url', default='')
     46 
     47 LAB_GOOD_STATES = ('open', 'throttled')
     48 
     49 ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
     50         'CROS', 'enable_drone_in_restricted_subnet', type=bool,
     51         default=False)
     52 
     53 # Wait at most 10 mins for duts to go idle.
     54 IDLE_DUT_WAIT_TIMEOUT = 600
     55 
     56 # Mapping between board name and build target. This is for special case handling
     57 # for certain Android board that the board name and build target name does not
     58 # match.
     59 ANDROID_TARGET_TO_BOARD_MAP = {
     60         'seed_l8150': 'gm4g_sprout',
     61         'bat_land': 'bat'
     62         }
     63 ANDROID_BOARD_TO_TARGET_MAP = {
     64         'gm4g_sprout': 'seed_l8150',
     65         'bat': 'bat_land'
     66         }
     67 # Prefix for the metrics name for result size information.
     68 RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
     69 
     70 class TestLabException(Exception):
     71     """Exception raised when the Test Lab blocks a test or suite."""
     72     pass
     73 
     74 
     75 class ParseBuildNameException(Exception):
     76     """Raised when ParseBuildName() cannot parse a build name."""
     77     pass
     78 
     79 
     80 class Singleton(type):
     81     """Enforce that only one client class is instantiated per process."""
     82     _instances = {}
     83 
     84     def __call__(cls, *args, **kwargs):
     85         """Fetch the instance of a class to use for subsequent calls."""
     86         if cls not in cls._instances:
     87             cls._instances[cls] = super(Singleton, cls).__call__(
     88                     *args, **kwargs)
     89         return cls._instances[cls]
     90 
     91 class EmptyAFEHost(object):
     92     """Object to represent an AFE host object when there is no AFE."""
     93 
     94     def __init__(self):
     95         """
     96         We'll be setting the instance attributes as we use them.  Right now
     97         we only use attributes and labels but as time goes by and other
     98         attributes are used from an actual AFE Host object (check
     99         rpc_interfaces.get_hosts()), we'll add them in here so users won't be
    100         perplexed why their host's afe_host object complains that attribute
    101         doesn't exist.
    102         """
    103         self.attributes = {}
    104         self.labels = []
    105 
    106 
    107 def ParseBuildName(name):
    108     """Format a build name, given board, type, milestone, and manifest num.
    109 
    110     @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
    111                  relative build name, e.g. 'x86-alex-release/LATEST'
    112 
    113     @return board: board the manifest is for, e.g. x86-alex.
    114     @return type: one of 'release', 'factory', or 'firmware'
    115     @return milestone: (numeric) milestone the manifest was associated with.
    116                         Will be None for relative build names.
    117     @return manifest: manifest number, e.g. '2015.0.0'.
    118                       Will be None for relative build names.
    119 
    120     """
    121     match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
    122                      r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
    123                      r'(?P<manifest>[\d.ab-]+)|LATEST)',
    124                      name)
    125     if match and len(match.groups()) >= 5:
    126         return (match.group('board'), match.group('type'),
    127                 match.group('milestone'), match.group('manifest'))
    128     raise ParseBuildNameException('%s is a malformed build name.' % name)
    129 
    130 
    131 def get_labels_from_afe(hostname, label_prefix, afe):
    132     """Retrieve a host's specific labels from the AFE.
    133 
    134     Looks for the host labels that have the form <label_prefix>:<value>
    135     and returns the "<value>" part of the label. None is returned
    136     if there is not a label matching the pattern
    137 
    138     @param hostname: hostname of given DUT.
    139     @param label_prefix: prefix of label to be matched, e.g., |board:|
    140     @param afe: afe instance.
    141 
    142     @returns A list of labels that match the prefix or 'None'
    143 
    144     """
    145     labels = afe.get_labels(name__startswith=label_prefix,
    146                             host__hostname__in=[hostname])
    147     if labels:
    148         return [l.name.split(label_prefix, 1)[1] for l in labels]
    149 
    150 
    151 def get_label_from_afe(hostname, label_prefix, afe):
    152     """Retrieve a host's specific label from the AFE.
    153 
    154     Looks for a host label that has the form <label_prefix>:<value>
    155     and returns the "<value>" part of the label. None is returned
    156     if there is not a label matching the pattern
    157 
    158     @param hostname: hostname of given DUT.
    159     @param label_prefix: prefix of label to be matched, e.g., |board:|
    160     @param afe: afe instance.
    161     @returns the label that matches the prefix or 'None'
    162 
    163     """
    164     labels = get_labels_from_afe(hostname, label_prefix, afe)
    165     if labels and len(labels) == 1:
    166         return labels[0]
    167 
    168 
    169 def get_board_from_afe(hostname, afe):
    170     """Retrieve given host's board from its labels in the AFE.
    171 
    172     Looks for a host label of the form "board:<board>", and
    173     returns the "<board>" part of the label.  `None` is returned
    174     if there is not a single, unique label matching the pattern.
    175 
    176     @param hostname: hostname of given DUT.
    177     @param afe: afe instance.
    178     @returns board from label, or `None`.
    179 
    180     """
    181     return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
    182 
    183 
    184 def get_build_from_afe(hostname, afe):
    185     """Retrieve the current build for given host from the AFE.
    186 
    187     Looks through the host's labels in the AFE to determine its build.
    188 
    189     @param hostname: hostname of given DUT.
    190     @param afe: afe instance.
    191     @returns The current build or None if it could not find it or if there
    192              were multiple build labels assigned to this host.
    193 
    194     """
    195     prefix = provision.CROS_VERSION_PREFIX
    196     build = get_label_from_afe(hostname, prefix + ':', afe)
    197     if build:
    198         return build
    199     return None
    200 
    201 
    202 # TODO(fdeng): fix get_sheriffs crbug.com/483254
    203 def get_sheriffs(lab_only=False):
    204     """
    205     Polls the javascript file that holds the identity of the sheriff and
    206     parses it's output to return a list of chromium sheriff email addresses.
    207     The javascript file can contain the ldap of more than one sheriff, eg:
    208     document.write('sheriff_one, sheriff_two').
    209 
    210     @param lab_only: if True, only pulls lab sheriff.
    211     @return: A list of chroium.org sheriff email addresses to cc on the bug.
    212              An empty list if failed to parse the javascript.
    213     """
    214     sheriff_ids = []
    215     sheriff_js_list = _LAB_SHERIFF_JS.split(',')
    216     if not lab_only:
    217         sheriff_js_list.extend(_SHERIFF_JS.split(','))
    218 
    219     for sheriff_js in sheriff_js_list:
    220         try:
    221             url_content = utils.urlopen('%s%s'% (
    222                 _CHROMIUM_BUILD_URL, sheriff_js)).read()
    223         except (ValueError, IOError) as e:
    224             logging.warning('could not parse sheriff from url %s%s: %s',
    225                              _CHROMIUM_BUILD_URL, sheriff_js, str(e))
    226         except (urllib2.URLError, httplib.HTTPException) as e:
    227             logging.warning('unexpected error reading from url "%s%s": %s',
    228                              _CHROMIUM_BUILD_URL, sheriff_js, str(e))
    229         else:
    230             ldaps = re.search(r"document.write\('(.*)'\)", url_content)
    231             if not ldaps:
    232                 logging.warning('Could not retrieve sheriff ldaps for: %s',
    233                                  url_content)
    234                 continue
    235             sheriff_ids += ['%s (at] chromium.org' % alias.replace(' ', '')
    236                             for alias in ldaps.group(1).split(',')]
    237     return sheriff_ids
    238 
    239 
    240 def remote_wget(source_url, dest_path, ssh_cmd):
    241     """wget source_url from localhost to dest_path on remote host using ssh.
    242 
    243     @param source_url: The complete url of the source of the package to send.
    244     @param dest_path: The path on the remote host's file system where we would
    245         like to store the package.
    246     @param ssh_cmd: The ssh command to use in performing the remote wget.
    247     """
    248     wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
    249                 (source_url, ssh_cmd, dest_path))
    250     utils.run(wget_cmd)
    251 
    252 
    253 _MAX_LAB_STATUS_ATTEMPTS = 5
    254 def _get_lab_status(status_url):
    255     """Grabs the current lab status and message.
    256 
    257     @returns The JSON object obtained from the given URL.
    258 
    259     """
    260     retry_waittime = 1
    261     for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
    262         try:
    263             response = urllib2.urlopen(status_url)
    264         except IOError as e:
    265             logging.debug('Error occurred when grabbing the lab status: %s.',
    266                           e)
    267             time.sleep(retry_waittime)
    268             continue
    269         # Check for successful response code.
    270         if response.getcode() == 200:
    271             return json.load(response)
    272         time.sleep(retry_waittime)
    273     return None
    274 
    275 
    276 def _decode_lab_status(lab_status, build):
    277     """Decode lab status, and report exceptions as needed.
    278 
    279     Take a deserialized JSON object from the lab status page, and
    280     interpret it to determine the actual lab status.  Raise
    281     exceptions as required to report when the lab is down.
    282 
    283     @param build: build name that we want to check the status of.
    284 
    285     @raises TestLabException Raised if a request to test for the given
    286                              status and build should be blocked.
    287     """
    288     # First check if the lab is up.
    289     if not lab_status['general_state'] in LAB_GOOD_STATES:
    290         raise TestLabException('Chromium OS Test Lab is closed: '
    291                                '%s.' % lab_status['message'])
    292 
    293     # Check if the build we wish to use is disabled.
    294     # Lab messages should be in the format of:
    295     #    Lab is 'status' [regex ...] (comment)
    296     # If the build name matches any regex, it will be blocked.
    297     build_exceptions = re.search('\[(.*)\]', lab_status['message'])
    298     if not build_exceptions or not build:
    299         return
    300     for build_pattern in build_exceptions.group(1).split():
    301         if re.match(build_pattern, build):
    302             raise TestLabException('Chromium OS Test Lab is closed: '
    303                                    '%s matches %s.' % (
    304                                            build, build_pattern))
    305     return
    306 
    307 
    308 def is_in_lab():
    309     """Check if current Autotest instance is in lab
    310 
    311     @return: True if the Autotest instance is in lab.
    312     """
    313     test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
    314     return test_server_name.startswith('cautotest')
    315 
    316 
    317 def check_lab_status(build):
    318     """Check if the lab status allows us to schedule for a build.
    319 
    320     Checks if the lab is down, or if testing for the requested build
    321     should be blocked.
    322 
    323     @param build: Name of the build to be scheduled for testing.
    324 
    325     @raises TestLabException Raised if a request to test for the given
    326                              status and build should be blocked.
    327 
    328     """
    329     # Ensure we are trying to schedule on the actual lab.
    330     if not is_in_lab():
    331         return
    332 
    333     # Download the lab status from its home on the web.
    334     status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
    335     json_status = _get_lab_status(status_url)
    336     if json_status is None:
    337         # We go ahead and say the lab is open if we can't get the status.
    338         logging.warning('Could not get a status from %s', status_url)
    339         return
    340     _decode_lab_status(json_status, build)
    341 
    342 
    343 def host_in_lab(hostname):
    344     return (not utils.in_moblab_ssp()
    345             and not lsbrelease_utils.is_moblab()
    346             and utils.host_is_in_lab_zone(hostname))
    347 
    348 
    349 def lock_host_with_labels(afe, lock_manager, labels):
    350     """Lookup and lock one host that matches the list of input labels.
    351 
    352     @param afe: An instance of the afe class, as defined in server.frontend.
    353     @param lock_manager: A lock manager capable of locking hosts, eg the
    354         one defined in server.cros.host_lock_manager.
    355     @param labels: A list of labels to look for on hosts.
    356 
    357     @return: The hostname of a host matching all labels, and locked through the
    358         lock_manager. The hostname will be as specified in the database the afe
    359         object is associated with, i.e if it exists in afe_hosts with a .cros
    360         suffix, the hostname returned will contain a .cros suffix.
    361 
    362     @raises: error.NoEligibleHostException: If no hosts matching the list of
    363         input labels are available.
    364     @raises: error.TestError: If unable to lock a host matching the labels.
    365     """
    366     potential_hosts = afe.get_hosts(multiple_labels=labels)
    367     if not potential_hosts:
    368         raise error.NoEligibleHostException(
    369                 'No devices found with labels %s.' % labels)
    370 
    371     # This prevents errors where a fault might seem repeatable
    372     # because we lock, say, the same packet capturer for each test run.
    373     random.shuffle(potential_hosts)
    374     for host in potential_hosts:
    375         if lock_manager.lock([host.hostname]):
    376             logging.info('Locked device %s with labels %s.',
    377                          host.hostname, labels)
    378             return host.hostname
    379         else:
    380             logging.info('Unable to lock device %s with labels %s.',
    381                          host.hostname, labels)
    382 
    383     raise error.TestError('Could not lock a device with labels %s' % labels)
    384 
    385 
    386 def get_test_views_from_tko(suite_job_id, tko):
    387     """Get test name and result for given suite job ID.
    388 
    389     @param suite_job_id: ID of suite job.
    390     @param tko: an instance of TKO as defined in server/frontend.py.
    391     @return: A dictionary of test status keyed by test name, e.g.,
    392              {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'}
    393     @raise: Exception when there is no test view found.
    394 
    395     """
    396     views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
    397     relevant_views = filter(job_status.view_is_relevant, views)
    398     if not relevant_views:
    399         raise Exception('Failed to retrieve job results.')
    400 
    401     test_views = {}
    402     for view in relevant_views:
    403         test_views[view['test_name']] = view['status']
    404 
    405     return test_views
    406 
    407 
    408 def get_data_key(prefix, suite, build, board):
    409     """
    410     Constructs a key string from parameters.
    411 
    412     @param prefix: Prefix for the generating key.
    413     @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
    414     @param build: The build string. This string should have a consistent
    415         format eg: x86-mario-release/R26-3570.0.0. If the format of this
    416         string changes such that we can't determine build_type or branch
    417         we give up and use the parametes we're sure of instead (suite,
    418         board). eg:
    419             1. build = x86-alex-pgo-release/R26-3570.0.0
    420                branch = 26
    421                build_type = pgo-release
    422             2. build = lumpy-paladin/R28-3993.0.0-rc5
    423                branch = 28
    424                build_type = paladin
    425     @param board: The board that this suite ran on.
    426     @return: The key string used for a dictionary.
    427     """
    428     try:
    429         _board, build_type, branch = ParseBuildName(build)[:3]
    430     except ParseBuildNameException as e:
    431         logging.error(str(e))
    432         branch = 'Unknown'
    433         build_type = 'Unknown'
    434     else:
    435         embedded_str = re.search(r'x86-\w+-(.*)', _board)
    436         if embedded_str:
    437             build_type = embedded_str.group(1) + '-' + build_type
    438 
    439     data_key_dict = {
    440         'prefix': prefix,
    441         'board': board,
    442         'branch': branch,
    443         'build_type': build_type,
    444         'suite': suite,
    445     }
    446     return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
    447             % data_key_dict)
    448 
    449 
    450 def setup_logging(logfile=None, prefix=False):
    451     """Setup basic logging with all logging info stripped.
    452 
    453     Calls to logging will only show the message. No severity is logged.
    454 
    455     @param logfile: If specified dump output to a file as well.
    456     @param prefix: Flag for log prefix. Set to True to add prefix to log
    457         entries to include timestamp and log level. Default is False.
    458     """
    459     # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
    460     # abort_suite.py in skylab.
    461     # Remove all existing handlers. client/common_lib/logging_config adds
    462     # a StreamHandler to logger when modules are imported, e.g.,
    463     # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
    464     # log only messages, not severity.
    465     logging.getLogger().handlers = []
    466 
    467     if prefix:
    468         log_format = '%(asctime)s %(levelname)-5s| %(message)s'
    469     else:
    470         log_format = '%(message)s'
    471 
    472     screen_handler = logging.StreamHandler()
    473     screen_handler.setFormatter(logging.Formatter(log_format))
    474     logging.getLogger().addHandler(screen_handler)
    475     logging.getLogger().setLevel(logging.INFO)
    476     if logfile:
    477         file_handler = logging.FileHandler(logfile)
    478         file_handler.setFormatter(logging.Formatter(log_format))
    479         file_handler.setLevel(logging.DEBUG)
    480         logging.getLogger().addHandler(file_handler)
    481 
    482 
    483 def is_shard():
    484     """Determines if this instance is running as a shard.
    485 
    486     Reads the global_config value shard_hostname in the section SHARD.
    487 
    488     @return True, if shard_hostname is set, False otherwise.
    489     """
    490     hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
    491     return bool(hostname)
    492 
    493 
    494 def get_global_afe_hostname():
    495     """Read the hostname of the global AFE from the global configuration."""
    496     return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
    497 
    498 
    499 def is_restricted_user(username):
    500     """Determines if a user is in a restricted group.
    501 
    502     User in restricted group only have access to master.
    503 
    504     @param username: A string, representing a username.
    505 
    506     @returns: True if the user is in a restricted group.
    507     """
    508     if not username:
    509         return False
    510 
    511     restricted_groups = CONFIG.get_config_value(
    512             'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
    513     for group in restricted_groups:
    514         try:
    515             if group and username in grp.getgrnam(group).gr_mem:
    516                 return True
    517         except KeyError as e:
    518             logging.debug("%s is not a valid group.", group)
    519     return False
    520 
    521 
    522 def get_special_task_status(is_complete, success, is_active):
    523     """Get the status of a special task.
    524 
    525     Emulate a host queue entry status for a special task
    526     Although SpecialTasks are not HostQueueEntries, it is helpful to
    527     the user to present similar statuses.
    528 
    529     @param is_complete    Boolean if the task is completed.
    530     @param success        Boolean if the task succeeded.
    531     @param is_active      Boolean if the task is active.
    532 
    533     @return The status of a special task.
    534     """
    535     if is_complete:
    536         if success:
    537             return host_queue_entry_states.Status.COMPLETED
    538         return host_queue_entry_states.Status.FAILED
    539     if is_active:
    540         return host_queue_entry_states.Status.RUNNING
    541     return host_queue_entry_states.Status.QUEUED
    542 
    543 
    544 def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
    545     """Get the execution path of the SpecialTask.
    546 
    547     This method returns different paths depending on where a
    548     the task ran:
    549         * Master: hosts/hostname/task_id-task_type
    550         * Shard: Master_path/time_created
    551     This is to work around the fact that a shard can fail independent
    552     of the master, and be replaced by another shard that has the same
    553     hosts. Without the time_created stamp the logs of the tasks running
    554     on the second shard will clobber the logs from the first in google
    555     storage, because task ids are not globally unique.
    556 
    557     @param hostname        Hostname
    558     @param task_id         Special task id
    559     @param task_name       Special task name (e.g., Verify, Repair, etc)
    560     @param time_requested  Special task requested time.
    561 
    562     @return An execution path for the task.
    563     """
    564     results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
    565 
    566     # If we do this on the master it will break backward compatibility,
    567     # as there are tasks that currently don't have timestamps. If a host
    568     # or job has been sent to a shard, the rpc for that host/job will
    569     # be redirected to the shard, so this global_config check will happen
    570     # on the shard the logs are on.
    571     if not is_shard():
    572         return results_path
    573 
    574     # Generate a uid to disambiguate special task result directories
    575     # in case this shard fails. The simplest uid is the job_id, however
    576     # in rare cases tasks do not have jobs associated with them (eg:
    577     # frontend verify), so just use the creation timestamp. The clocks
    578     # between a shard and master should always be in sync. Any discrepancies
    579     # will be brought to our attention in the form of job timeouts.
    580     uid = time_requested.strftime('%Y%d%m%H%M%S')
    581 
    582     # TODO: This is a hack, however it is the easiest way to achieve
    583     # correctness. There is currently some debate over the future of
    584     # tasks in our infrastructure and refactoring everything right
    585     # now isn't worth the time.
    586     return '%s/%s' % (results_path, uid)
    587 
    588 
    589 def get_job_tag(id, owner):
    590     """Returns a string tag for a job.
    591 
    592     @param id    Job id
    593     @param owner Job owner
    594 
    595     """
    596     return '%s-%s' % (id, owner)
    597 
    598 
    599 def get_hqe_exec_path(tag, execution_subdir):
    600     """Returns a execution path to a HQE's results.
    601 
    602     @param tag               Tag string for a job associated with a HQE.
    603     @param execution_subdir  Execution sub-directory string of a HQE.
    604 
    605     """
    606     return os.path.join(tag, execution_subdir)
    607 
    608 
    609 def is_inside_chroot():
    610     """Check if the process is running inside chroot.
    611 
    612     This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The
    613     method checks if cros_build_lib can be imported first.
    614 
    615     @return: True if the process is running inside chroot or cros_build_lib
    616              cannot be imported.
    617 
    618     """
    619     try:
    620         # TODO(crbug.com/739466) This module import is delayed because it adds
    621         # 1-2 seconds to the module import time and most users of site_utils
    622         # don't need it. The correct fix is to break apart site_utils into more
    623         # meaningful chunks.
    624         from chromite.lib import cros_build_lib
    625     except ImportError:
    626         logging.warn('Unable to import chromite. Can not detect chroot. '
    627                      'Defaulting to False')
    628         return False
    629     return cros_build_lib.IsInsideChroot()
    630 
    631 
    632 def parse_job_name(name):
    633     """Parse job name to get information including build, board and suite etc.
    634 
    635     Suite job created by run_suite follows the naming convention of:
    636     [build]-test_suites/control.[suite]
    637     For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
    638     The naming convention is defined in rpc_interface.create_suite_job.
    639 
    640     Test job created by suite job follows the naming convention of:
    641     [build]/[suite]/[test name]
    642     For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
    643     The naming convention is defined in
    644     server/cros/dynamic_suite/tools.create_job_name
    645 
    646     Note that pgo and chrome-perf builds will fail the method. Since lab does
    647     not run test for these builds, they can be ignored.
    648     Also, tests for Launch Control builds have different naming convention.
    649     The build ID will be used as build_version.
    650 
    651     @param name: Name of the job.
    652 
    653     @return: A dictionary containing the test information. The keyvals include:
    654              build: Name of the build, e.g., lumpy-release/R46-7272.0.0
    655              build_version: The version of the build, e.g., R46-7272.0.0
    656              board: Name of the board, e.g., lumpy
    657              suite: Name of the test suite, e.g., bvt
    658 
    659     """
    660     info = {}
    661     suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
    662     test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
    663     match = re.match(suite_job_regex, name)
    664     if not match:
    665         match = re.match(test_job_regex, name)
    666     if match:
    667         info['build'] = match.groups()[0]
    668         info['suite'] = match.groups()[1]
    669         info['build_version'] = info['build'].split('/')[1]
    670         try:
    671             info['board'], _, _, _ = ParseBuildName(info['build'])
    672         except ParseBuildNameException:
    673             # Try to parse it as Launch Control build
    674             # Launch Control builds have name format:
    675             # branch/build_target-build_type/build_id.
    676             try:
    677                 _, target, build_id = utils.parse_launch_control_build(
    678                         info['build'])
    679                 build_target, _ = utils.parse_launch_control_target(target)
    680                 if build_target:
    681                     info['board'] = build_target
    682                     info['build_version'] = build_id
    683             except ValueError:
    684                 pass
    685     return info
    686 
    687 
    688 def verify_not_root_user():
    689     """Simple function to error out if running with uid == 0"""
    690     if os.getuid() == 0:
    691         raise error.IllegalUser('This script can not be ran as root.')
    692 
    693 
    694 def get_hostname_from_machine(machine):
    695     """Lookup hostname from a machine string or dict.
    696 
    697     @returns: Machine hostname in string format.
    698     """
    699     hostname, _ = get_host_info_from_machine(machine)
    700     return hostname
    701 
    702 
    703 def get_host_info_from_machine(machine):
    704     """Lookup host information from a machine string or dict.
    705 
    706     @returns: Tuple of (hostname, afe_host)
    707     """
    708     if isinstance(machine, dict):
    709         return (machine['hostname'], machine['afe_host'])
    710     else:
    711         return (machine, EmptyAFEHost())
    712 
    713 
    714 def get_afe_host_from_machine(machine):
    715     """Return the afe_host from the machine dict if possible.
    716 
    717     @returns: AFE host object.
    718     """
    719     _, afe_host = get_host_info_from_machine(machine)
    720     return afe_host
    721 
    722 
    723 def get_connection_pool_from_machine(machine):
    724     """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
    725     if not isinstance(machine, dict):
    726         return None
    727     return machine.get('connection_pool')
    728 
    729 
    730 def get_creds_abspath(creds_file):
    731     """Returns the abspath of the credentials file.
    732 
    733     If creds_file is already an absolute path, just return it.
    734     Otherwise, assume it is located in the creds directory
    735     specified in global_config and return the absolute path.
    736 
    737     @param: creds_path, a path to the credentials.
    738     @return: An absolute path to the credentials file.
    739     """
    740     if not creds_file:
    741         return None
    742     if os.path.isabs(creds_file):
    743         return creds_file
    744     creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
    745     if not creds_dir or not os.path.exists(creds_dir):
    746         creds_dir = common.autotest_dir
    747     return os.path.join(creds_dir, creds_file)
    748 
    749 
    750 def SetupTsMonGlobalState(*args, **kwargs):
    751     """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
    752 
    753     @param *args: Args to pass through.
    754     @param **kwargs: Kwargs to pass through.
    755     """
    756     try:
    757         # TODO(crbug.com/739466) This module import is delayed because it adds
    758         # 1-2 seconds to the module import time and most users of site_utils
    759         # don't need it. The correct fix is to break apart site_utils into more
    760         # meaningful chunks.
    761         from chromite.lib import ts_mon_config
    762     except ImportError:
    763         logging.warn('Unable to import chromite. Monarch is disabled.')
    764         return TrivialContextManager()
    765 
    766     try:
    767         context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
    768         if hasattr(context, '__exit__'):
    769             return context
    770     except Exception as e:
    771         logging.warning('Caught an exception trying to setup ts_mon, '
    772                         'monitoring is disabled: %s', e, exc_info=True)
    773     return TrivialContextManager()
    774 
    775 
    776 @contextlib.contextmanager
    777 def TrivialContextManager(*args, **kwargs):
    778     """Context manager that does nothing.
    779 
    780     @param *args: Ignored args
    781     @param **kwargs: Ignored kwargs.
    782     """
    783     yield
    784 
    785 
    786 def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
    787     """Wait for the hosts to all go idle.
    788 
    789     @param duts: List of duts to check for idle state.
    790     @param afe: afe instance.
    791     @param max_wait: Max wait time in seconds to wait for duts to be idle.
    792 
    793     @returns Boolean True if all hosts are idle or False if any hosts did not
    794             go idle within max_wait.
    795     """
    796     start_time = time.time()
    797     # We make a shallow copy since we're going to be modifying active_dut_list.
    798     active_dut_list = duts[:]
    799     while active_dut_list:
    800         # Let's rate-limit how often we hit the AFE.
    801         time.sleep(1)
    802 
    803         # Check if we've waited too long.
    804         if (time.time() - start_time) > max_wait:
    805             return False
    806 
    807         idle_duts = []
    808         # Get the status for the duts and see if they're in the idle state.
    809         afe_hosts = afe.get_hosts(active_dut_list)
    810         idle_duts = [afe_host.hostname for afe_host in afe_hosts
    811                      if afe_host.status in host_states.IDLE_STATES]
    812 
    813         # Take out idle duts so we don't needlessly check them
    814         # next time around.
    815         for idle_dut in idle_duts:
    816             active_dut_list.remove(idle_dut)
    817 
    818         logging.info('still waiting for following duts to go idle: %s',
    819                      active_dut_list)
    820     return True
    821 
    822 
    823 @contextlib.contextmanager
    824 def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
    825                        max_wait=IDLE_DUT_WAIT_TIMEOUT):
    826     """Context manager to lock the duts and wait for them to go idle.
    827 
    828     @param duts: List of duts to lock.
    829     @param afe: afe instance.
    830     @param lock_msg: message for afe on locking this host.
    831     @param max_wait: Max wait time in seconds to wait for duts to be idle.
    832 
    833     @returns Boolean lock_success where True if all duts locked successfully or
    834              False if we timed out waiting too long for hosts to go idle.
    835     """
    836     try:
    837         locked_duts = []
    838         duts.sort()
    839         for dut in duts:
    840             if afe.lock_host(dut, lock_msg, fail_if_locked=True):
    841                 locked_duts.append(dut)
    842             else:
    843                 logging.info('%s already locked', dut)
    844         yield wait_for_idle_duts(locked_duts, afe, max_wait)
    845     finally:
    846         afe.unlock_hosts(locked_duts)
    847 
    848 
    849 def _get_default_size_info(path):
    850     """Get the default result size information.
    851 
    852     In case directory summary is failed to build, assume the test result is not
    853     throttled and all result sizes are the size of existing test results.
    854 
    855     @return: A namedtuple of result size informations, including:
    856             client_result_collected_KB: The total size (in KB) of test results
    857                     collected from test device. Set to be the total size of the
    858                     given path.
    859             original_result_total_KB: The original size (in KB) of test results
    860                     before being trimmed. Set to be the total size of the given
    861                     path.
    862             result_uploaded_KB: The total size (in KB) of test results to be
    863                     uploaded. Set to be the total size of the given path.
    864             result_throttled: True if test results collection is throttled.
    865                     It's set to False in this default behavior.
    866     """
    867     total_size = file_utils.get_directory_size_kibibytes(path);
    868     return result_utils_lib.ResultSizeInfo(
    869             client_result_collected_KB=total_size,
    870             original_result_total_KB=total_size,
    871             result_uploaded_KB=total_size,
    872             result_throttled=False)
    873 
    874 
    875 def _report_result_size_metrics(result_size_info):
    876     """Report result sizes information to metrics.
    877 
    878     @param result_size_info: A ResultSizeInfo namedtuple containing information
    879             of test result sizes.
    880     """
    881     fields = {'result_throttled' : result_size_info.result_throttled}
    882     metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
    883                     description='The total size (in KB) of test results '
    884                     'collected from test device. Set to be the total size of '
    885                     'the given path.'
    886                     ).increment_by(result_size_info.client_result_collected_KB,
    887                                    fields=fields)
    888     metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
    889                     description='The original size (in KB) of test results '
    890                     'before being trimmed.'
    891                     ).increment_by(result_size_info.original_result_total_KB,
    892                                    fields=fields)
    893     metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
    894                     description='The total size (in KB) of test results to be '
    895                     'uploaded.'
    896                     ).increment_by(result_size_info.result_uploaded_KB,
    897                                    fields=fields)
    898 
    899 
    900 @metrics.SecondsTimerDecorator(
    901         'chromeos/autotest/result_collection/collect_result_sizes_duration')
    902 def collect_result_sizes(path, log=logging.debug):
    903     """Collect the result sizes information and build result summary.
    904 
    905     It first tries to merge directory summaries and calculate the result sizes
    906     including:
    907     client_result_collected_KB: The volume in KB that's transfered from the test
    908             device.
    909     original_result_total_KB: The volume in KB that's the original size of the
    910             result files before being trimmed.
    911     result_uploaded_KB: The volume in KB that will be uploaded.
    912     result_throttled: Indicating if the result files were throttled.
    913 
    914     If directory summary merging failed for any reason, fall back to use the
    915     total size of the given result directory.
    916 
    917     @param path: Path of the result directory to get size information.
    918     @param log: The logging method, default to logging.debug
    919     @return: A ResultSizeInfo namedtuple containing information of test result
    920              sizes.
    921     """
    922     try:
    923         client_collected_bytes, summary, files = result_utils.merge_summaries(
    924                 path)
    925         result_size_info = result_utils_lib.get_result_size_info(
    926                 client_collected_bytes, summary)
    927         html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
    928         result_view.build(client_collected_bytes, summary, html_file)
    929 
    930         # Delete all summary files after final view is built.
    931         for summary_file in files:
    932             os.remove(summary_file)
    933     except:
    934         log('Failed to calculate result sizes based on directory summaries for '
    935             'directory %s. Fall back to record the total size.\nException: %s' %
    936             (path, traceback.format_exc()))
    937         result_size_info = _get_default_size_info(path)
    938 
    939     _report_result_size_metrics(result_size_info)
    940 
    941     return result_size_info
    942