Home | History | Annotate | Download | only in lxc
      1 # Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import os
      7 import socket
      8 import time
      9 
     10 import common
     11 
     12 from autotest_lib.client.bin import utils
     13 from autotest_lib.client.common_lib import error
     14 from autotest_lib.client.common_lib.global_config import global_config
     15 from autotest_lib.site_utils.lxc import config as lxc_config
     16 from autotest_lib.site_utils.lxc import constants
     17 from autotest_lib.site_utils.lxc import container_pool
     18 from autotest_lib.site_utils.lxc import lxc
     19 from autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail
     20 from autotest_lib.site_utils.lxc.base_image import BaseImage
     21 from autotest_lib.site_utils.lxc.constants import \
     22     CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX
     23 from autotest_lib.site_utils.lxc.container import Container
     24 from autotest_lib.site_utils.lxc.container_factory import ContainerFactory
     25 
     26 try:
     27     from chromite.lib import metrics
     28     from infra_libs import ts_mon
     29 except ImportError:
     30     import mock
     31     metrics = utils.metrics_mock
     32     ts_mon = mock.Mock()
     33 
     34 
     35 # Timeout (in seconds) for container pool operations.
     36 _CONTAINER_POOL_TIMEOUT = 3
     37 
     38 _USE_LXC_POOL = global_config.get_config_value('LXC_POOL', 'use_lxc_pool',
     39                                                type=bool)
     40 
     41 class ContainerBucket(object):
     42     """A wrapper class to interact with containers in a specific container path.
     43     """
     44 
     45     def __init__(self, container_path=constants.DEFAULT_CONTAINER_PATH,
     46                  container_factory=None):
     47         """Initialize a ContainerBucket.
     48 
     49         @param container_path: Path to the directory used to store containers.
     50                                Default is set to AUTOSERV/container_path in
     51                                global config.
     52         @param container_factory: A factory for creating Containers.
     53         """
     54         self.container_path = os.path.realpath(container_path)
     55         if container_factory is not None:
     56             self._factory = container_factory
     57         else:
     58             # Pick the correct factory class to use (pool-based, or regular)
     59             # based on the config variable.
     60             factory_class = ContainerFactory
     61             if _USE_LXC_POOL:
     62                 logging.debug('Using container pool')
     63                 factory_class = _PoolBasedFactory
     64 
     65             # Pass in the container path so that the bucket is hermetic (i.e. so
     66             # that if the container path is customized, the base image doesn't
     67             # fall back to using the default container path).
     68             try:
     69                 base_image_ok = True
     70                 container = BaseImage(self.container_path).get()
     71             except error.ContainerError as e:
     72                 base_image_ok = False
     73                 raise e
     74             finally:
     75                 metrics.Counter(METRICS_PREFIX + '/base_image',
     76                                 field_spec=[ts_mon.BooleanField('corrupted')]
     77                                 ).increment(
     78                                     fields={'corrupted': not base_image_ok})
     79             self._factory = factory_class(
     80                 base_container=container,
     81                 lxc_path=self.container_path)
     82         self.container_cache = {}
     83 
     84 
     85     def get_all(self, force_update=False):
     86         """Get details of all containers.
     87 
     88         Retrieves all containers owned by the bucket.  Note that this doesn't
     89         include the base container, or any containers owned by the container
     90         pool.
     91 
     92         @param force_update: Boolean, ignore cached values if set.
     93 
     94         @return: A dictionary of all containers with detailed attributes,
     95                  indexed by container name.
     96         """
     97         info_collection = lxc.get_container_info(self.container_path)
     98         containers = {} if force_update else self.container_cache
     99         for info in info_collection:
    100             if info["name"] in containers:
    101                 continue
    102             container = Container.create_from_existing_dir(self.container_path,
    103                                                            **info)
    104             # Active containers have an ID.  Zygotes and base containers, don't.
    105             if container.id is not None:
    106                 containers[container.id] = container
    107         self.container_cache = containers
    108         return containers
    109 
    110 
    111     def get_container(self, container_id):
    112         """Get a container with matching name.
    113 
    114         @param container_id: ID of the container.
    115 
    116         @return: A container object with matching name. Returns None if no
    117                  container matches the given name.
    118         """
    119         if container_id in self.container_cache:
    120             return self.container_cache[container_id]
    121 
    122         return self.get_all().get(container_id, None)
    123 
    124 
    125     def exist(self, container_id):
    126         """Check if a container exists with the given name.
    127 
    128         @param container_id: ID of the container.
    129 
    130         @return: True if the container with the given ID exists, otherwise
    131                  returns False.
    132         """
    133         return self.get_container(container_id) != None
    134 
    135 
    136     def destroy_all(self):
    137         """Destroy all containers, base must be destroyed at the last.
    138         """
    139         containers = self.get_all().values()
    140         for container in sorted(
    141                 containers, key=lambda n: 1 if n.name == constants.BASE else 0):
    142             key = container.id
    143             logging.info('Destroy container %s.', container.name)
    144             container.destroy()
    145             del self.container_cache[key]
    146 
    147 
    148 
    149     @metrics.SecondsTimerDecorator(
    150         '%s/setup_test_duration' % constants.STATS_KEY)
    151     @cleanup_if_fail()
    152     def setup_test(self, container_id, job_id, server_package_url, result_path,
    153                    control=None, skip_cleanup=False, job_folder=None,
    154                    dut_name=None):
    155         """Setup test container for the test job to run.
    156 
    157         The setup includes:
    158         1. Install autotest_server package from given url.
    159         2. Copy over local shadow_config.ini.
    160         3. Mount local site-packages.
    161         4. Mount test result directory.
    162 
    163         TODO(dshi): Setup also needs to include test control file for autoserv
    164                     to run in container.
    165 
    166         @param container_id: ID to assign to the test container.
    167         @param job_id: Job id for the test job to run in the test container.
    168         @param server_package_url: Url to download autotest_server package.
    169         @param result_path: Directory to be mounted to container to store test
    170                             results.
    171         @param control: Path to the control file to run the test job. Default is
    172                         set to None.
    173         @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
    174                              container failures.
    175         @param job_folder: Folder name of the job, e.g., 123-debug_user.
    176         @param dut_name: Name of the dut to run test, used as the hostname of
    177                          the container. Default is None.
    178         @return: A Container object for the test container.
    179 
    180         @raise ContainerError: If container does not exist, or not running.
    181         """
    182         start_time = time.time()
    183 
    184         if not os.path.exists(result_path):
    185             raise error.ContainerError('Result directory does not exist: %s',
    186                                        result_path)
    187         result_path = os.path.abspath(result_path)
    188 
    189         # Save control file to result_path temporarily. The reason is that the
    190         # control file in drone_tmp folder can be deleted during scheduler
    191         # restart. For test not using SSP, the window between test starts and
    192         # control file being picked up by the test is very small (< 2 seconds).
    193         # However, for tests using SSP, it takes around 1 minute before the
    194         # container is setup. If scheduler is restarted during that period, the
    195         # control file will be deleted, and the test will fail.
    196         if control:
    197             control_file_name = os.path.basename(control)
    198             safe_control = os.path.join(result_path, control_file_name)
    199             utils.run('cp %s %s' % (control, safe_control))
    200 
    201         # Create test container from the base container.
    202         container = self._factory.create_container(container_id)
    203 
    204         # Deploy server side package
    205         container.install_ssp(server_package_url)
    206 
    207         deploy_config_manager = lxc_config.DeployConfigManager(container)
    208         deploy_config_manager.deploy_pre_start()
    209 
    210         # Copy over control file to run the test job.
    211         if control:
    212             container.install_control_file(safe_control)
    213 
    214         mount_entries = [(constants.SITE_PACKAGES_PATH,
    215                           constants.CONTAINER_SITE_PACKAGES_PATH,
    216                           True),
    217                          (result_path,
    218                           os.path.join(constants.RESULT_DIR_FMT % job_folder),
    219                           False),
    220         ]
    221 
    222         # Update container config to mount directories.
    223         for source, destination, readonly in mount_entries:
    224             container.mount_dir(source, destination, readonly)
    225 
    226         # Update file permissions.
    227         # TODO(dshi): crbug.com/459344 Skip following action when test container
    228         # can be unprivileged container.
    229         autotest_path = os.path.join(
    230                 container.rootfs,
    231                 constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep))
    232         utils.run('sudo chown -R root "%s"' % autotest_path)
    233         utils.run('sudo chgrp -R root "%s"' % autotest_path)
    234 
    235         container.start(wait_for_network=True)
    236         deploy_config_manager.deploy_post_start()
    237 
    238         # Update the hostname of the test container to be `dut-name`.
    239         # Some TradeFed tests use hostname in test results, which is used to
    240         # group test results in dashboard. The default container name is set to
    241         # be the name of the folder, which is unique (as it is composed of job
    242         # id and timestamp. For better result view, the container's hostname is
    243         # set to be a string containing the dut hostname.
    244         if dut_name:
    245             container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT %
    246                                    dut_name.replace('.', '-'))
    247 
    248         container.modify_import_order()
    249 
    250         container.verify_autotest_setup(job_folder)
    251 
    252         logging.debug('Test container %s is set up.', container.name)
    253         return container
    254 
    255 
    256 class _PoolBasedFactory(ContainerFactory):
    257     """A ContainerFactory that queries the running container pool.
    258 
    259     Implementation falls back to the regular container factory behaviour
    260     (i.e. locally cloning a container) if the pool is unavailable or if it does
    261     not return a bucket before the specified timeout.
    262     """
    263 
    264     def __init__(self, *args, **kwargs):
    265         super(_PoolBasedFactory, self).__init__(*args, **kwargs)
    266         try:
    267             self._client = container_pool.Client()
    268         except (socket.error, socket.timeout) as e:
    269             # If an error occurs connecting to the container pool, fall back to
    270             # the default container factory.
    271             logging.exception('Container pool connection failed.')
    272             self._client = None
    273 
    274 
    275     def create_container(self, new_id):
    276         """Creates a new container.
    277 
    278         Attempts to retrieve a container from the container pool.  If that
    279         operation fails, this falls back to the parent class behaviour.
    280 
    281         @param new_id: ContainerId to assign to the new container.  Containers
    282                        must be assigned an ID before they can be released from
    283                        the container pool.
    284 
    285         @return: The new container.
    286         """
    287         container = None
    288         if self._client:
    289             try:
    290                 container = self._client.get_container(new_id,
    291                                                        _CONTAINER_POOL_TIMEOUT)
    292             except Exception:
    293                 logging.exception('Error communicating with container pool.')
    294             else:
    295                 if container is not None:
    296                     logging.debug('Retrieved container from pool: %s',
    297                                   container.name)
    298                     return container
    299         metrics.Counter(METRICS_PREFIX + '/containers_served',
    300                         field_spec = [ts_mon.BooleanField('from_pool')]
    301                         ).increment(fields={
    302                             'from_pool': (container is not None)})
    303         if container is not None:
    304             return container
    305 
    306         # If the container pool did not yield a container, make one locally.
    307         logging.warning('Unable to obtain container from pre-populated pool.  '
    308                         'Creating container locally.  This slows server tests '
    309                         'down and should be debugged even if local creation '
    310                         'works out.')
    311         return super(_PoolBasedFactory, self).create_container(new_id)
    312