Home | History | Annotate | Download | only in site_utils
      1 # Copyright 2015 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """This module provides some tools to interact with LXC containers, for example:
      6   1. Download base container from given GS location, setup the base container.
      7   2. Create a snapshot as test container from base container.
      8   3. Mount a directory in drone to the test container.
      9   4. Run a command in the container and return the output.
     10   5. Cleanup, e.g., destroy the container.
     11 
     12 This tool can also be used to set up a base container for test. For example,
     13   python lxc.py -s -p /tmp/container
     14 This command will download and setup base container in directory /tmp/container.
     15 After that command finishes, you can run lxc command to work with the base
     16 container, e.g.,
     17   lxc-start -P /tmp/container -n base -d
     18   lxc-attach -P /tmp/container -n base
     19 """
     20 
     21 
     22 import argparse
     23 import logging
     24 import os
     25 import re
     26 import socket
     27 import sys
     28 import time
     29 
     30 import common
     31 from autotest_lib.client.bin import utils
     32 from autotest_lib.client.common_lib import error
     33 from autotest_lib.client.common_lib import global_config
     34 from autotest_lib.client.common_lib.cros import dev_server
     35 from autotest_lib.client.common_lib.cros import retry
     36 from autotest_lib.client.common_lib.cros.graphite import autotest_es
     37 from autotest_lib.server import utils as server_utils
     38 from autotest_lib.site_utils import lxc_config
     39 from autotest_lib.site_utils import lxc_utils
     40 
     41 try:
     42     from chromite.lib import metrics
     43 except ImportError:
     44     metrics = utils.metrics_mock
     45 
     46 
     47 config = global_config.global_config
     48 
     49 # Name of the base container.
     50 BASE = config.get_config_value('AUTOSERV', 'container_base_name')
     51 # Naming convention of test container, e.g., test_300_1422862512_2424, where:
     52 # 300:        The test job ID.
     53 # 1422862512: The tick when container is created.
     54 # 2424:       The PID of autoserv that starts the container.
     55 TEST_CONTAINER_NAME_FMT = 'test_%s_%d_%d'
     56 # Naming convention of the result directory in test container.
     57 RESULT_DIR_FMT = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'results',
     58                               '%s')
     59 # Attributes to retrieve about containers.
     60 ATTRIBUTES = ['name', 'state']
     61 
     62 # Format for mount entry to share a directory in host with container.
     63 # source is the directory in host, destination is the directory in container.
     64 # readonly is a binding flag for readonly mount, its value should be `,ro`.
     65 MOUNT_FMT = ('lxc.mount.entry = %(source)s %(destination)s none '
     66              'bind%(readonly)s 0 0')
     67 SSP_ENABLED = config.get_config_value('AUTOSERV', 'enable_ssp_container',
     68                                       type=bool, default=True)
     69 # url to the folder stores base container.
     70 CONTAINER_BASE_FOLDER_URL = config.get_config_value('AUTOSERV',
     71                                                     'container_base_folder_url')
     72 CONTAINER_BASE_URL_FMT = '%s/%%s.tar.xz' % CONTAINER_BASE_FOLDER_URL
     73 CONTAINER_BASE_URL = CONTAINER_BASE_URL_FMT % BASE
     74 # Default directory used to store LXC containers.
     75 DEFAULT_CONTAINER_PATH = config.get_config_value('AUTOSERV', 'container_path')
     76 
     77 # Path to drone_temp folder in the container, which stores the control file for
     78 # test job to run.
     79 CONTROL_TEMP_PATH = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'drone_tmp')
     80 
     81 # Bash command to return the file count in a directory. Test the existence first
     82 # so the command can return an error code if the directory doesn't exist.
     83 COUNT_FILE_CMD = '[ -d %(dir)s ] && ls %(dir)s | wc -l'
     84 
     85 # Command line to append content to a file
     86 APPEND_CMD_FMT = ('echo \'%(content)s\' | sudo tee --append %(file)s'
     87                   '> /dev/null')
     88 
     89 # Path to site-packates in Moblab
     90 MOBLAB_SITE_PACKAGES = '/usr/lib64/python2.7/site-packages'
     91 MOBLAB_SITE_PACKAGES_CONTAINER = '/usr/local/lib/python2.7/dist-packages/'
     92 
     93 # Flag to indicate it's running in a Moblab. Due to crbug.com/457496, lxc-ls has
     94 # different behavior in Moblab.
     95 IS_MOBLAB = utils.is_moblab()
     96 
     97 # TODO(dshi): If we are adding more logic in how lxc should interact with
     98 # different systems, we should consider code refactoring to use a setting-style
     99 # object to store following flags mapping to different systems.
    100 # TODO(crbug.com/464834): Snapshot clone is disabled until Moblab can
    101 # support overlayfs or aufs, which requires a newer kernel.
    102 SUPPORT_SNAPSHOT_CLONE = not IS_MOBLAB
    103 
    104 # Number of seconds to wait for network to be up in a container.
    105 NETWORK_INIT_TIMEOUT = 300
    106 # Network bring up is slower in Moblab.
    107 NETWORK_INIT_CHECK_INTERVAL = 2 if IS_MOBLAB else 0.1
    108 
    109 # Type string for container related metadata.
    110 CONTAINER_CREATE_METADB_TYPE = 'container_create'
    111 CONTAINER_CREATE_RETRY_METADB_TYPE = 'container_create_retry'
    112 CONTAINER_RUN_TEST_METADB_TYPE = 'container_run_test'
    113 
    114 # The container's hostname MUST start with `test_`. DHCP server in MobLab uses
    115 # that prefix to determine the lease time.
    116 CONTAINER_UTSNAME_FORMAT = 'test_%s'
    117 
    118 STATS_KEY = 'chromeos/autotest/lxc'
    119 
    120 
    121 def _get_container_info_moblab(container_path, **filters):
    122     """Get a collection of container information in the given container path
    123     in a Moblab.
    124 
    125     TODO(crbug.com/457496): remove this method once python 3 can be installed
    126     in Moblab and lxc-ls command can use python 3 code.
    127 
    128     When running in Moblab, lxc-ls behaves differently from a server with python
    129     3 installed:
    130     1. lxc-ls returns a list of containers installed under /etc/lxc, the default
    131        lxc container directory.
    132     2. lxc-ls --active lists all active containers, regardless where the
    133        container is located.
    134     For such differences, we have to special case Moblab to make the behavior
    135     close to a server with python 3 installed. That is,
    136     1. List only containers in a given folder.
    137     2. Assume all active containers have state of RUNNING.
    138 
    139     @param container_path: Path to look for containers.
    140     @param filters: Key value to filter the containers, e.g., name='base'
    141 
    142     @return: A list of dictionaries that each dictionary has the information of
    143              a container. The keys are defined in ATTRIBUTES.
    144     """
    145     info_collection = []
    146     active_containers = utils.run('sudo lxc-ls --active').stdout.split()
    147     name_filter = filters.get('name', None)
    148     state_filter = filters.get('state', None)
    149     if filters and set(filters.keys()) - set(['name', 'state']):
    150         raise error.ContainerError('When running in Moblab, container list '
    151                                    'filter only supports name and state.')
    152 
    153     for name in os.listdir(container_path):
    154         # Skip all files and folders without rootfs subfolder.
    155         if (os.path.isfile(os.path.join(container_path, name)) or
    156             not lxc_utils.path_exists(os.path.join(container_path, name,
    157                                                    'rootfs'))):
    158             continue
    159         info = {'name': name,
    160                 'state': 'RUNNING' if name in active_containers else 'STOPPED'
    161                }
    162         if ((name_filter and name_filter != info['name']) or
    163             (state_filter and state_filter != info['state'])):
    164             continue
    165 
    166         info_collection.append(info)
    167     return info_collection
    168 
    169 
    170 def get_container_info(container_path, **filters):
    171     """Get a collection of container information in the given container path.
    172 
    173     This method parse the output of lxc-ls to get a list of container
    174     information. The lxc-ls command output looks like:
    175     NAME      STATE    IPV4       IPV6  AUTOSTART  PID   MEMORY  RAM     SWAP
    176     --------------------------------------------------------------------------
    177     base      STOPPED  -          -     NO         -     -       -       -
    178     test_123  RUNNING  10.0.3.27  -     NO         8359  6.28MB  6.28MB  0.0MB
    179 
    180     @param container_path: Path to look for containers.
    181     @param filters: Key value to filter the containers, e.g., name='base'
    182 
    183     @return: A list of dictionaries that each dictionary has the information of
    184              a container. The keys are defined in ATTRIBUTES.
    185     """
    186     if IS_MOBLAB:
    187         return _get_container_info_moblab(container_path, **filters)
    188 
    189     cmd = 'sudo lxc-ls -P %s -f -F %s' % (os.path.realpath(container_path),
    190                                           ','.join(ATTRIBUTES))
    191     output = utils.run(cmd).stdout
    192     info_collection = []
    193 
    194     for line in output.splitlines()[1:]:
    195         # Only LXC 1.x has the second line of '-' as a separator.
    196         if line.startswith('------'):
    197             continue
    198         info_collection.append(dict(zip(ATTRIBUTES, line.split())))
    199     if filters:
    200         filtered_collection = []
    201         for key, value in filters.iteritems():
    202             for info in info_collection:
    203                 if key in info and info[key] == value:
    204                     filtered_collection.append(info)
    205         info_collection = filtered_collection
    206     return info_collection
    207 
    208 
    209 def cleanup_if_fail():
    210     """Decorator to do cleanup if container fails to be set up.
    211     """
    212     def deco_cleanup_if_fail(func):
    213         """Wrapper for the decorator.
    214 
    215         @param func: Function to be called.
    216         """
    217         def func_cleanup_if_fail(*args, **kwargs):
    218             """Decorator to do cleanup if container fails to be set up.
    219 
    220             The first argument must be a ContainerBucket object, which can be
    221             used to retrieve the container object by name.
    222 
    223             @param func: function to be called.
    224             @param args: arguments for function to be called.
    225             @param kwargs: keyword arguments for function to be called.
    226             """
    227             bucket = args[0]
    228             name = utils.get_function_arg_value(func, 'name', args, kwargs)
    229             try:
    230                 skip_cleanup = utils.get_function_arg_value(
    231                         func, 'skip_cleanup', args, kwargs)
    232             except (KeyError, ValueError):
    233                 skip_cleanup = False
    234             try:
    235                 return func(*args, **kwargs)
    236             except:
    237                 exc_info = sys.exc_info()
    238                 try:
    239                     container = bucket.get(name)
    240                     if container and not skip_cleanup:
    241                         container.destroy()
    242                 except error.CmdError as e:
    243                     logging.error(e)
    244 
    245                 try:
    246                     job_id = utils.get_function_arg_value(
    247                             func, 'job_id', args, kwargs)
    248                 except (KeyError, ValueError):
    249                     job_id = ''
    250                 metadata={'drone': socket.gethostname(),
    251                           'job_id': job_id,
    252                           'success': False}
    253                 # Record all args if job_id is not available.
    254                 if not job_id:
    255                     metadata['args'] = str(args)
    256                     if kwargs:
    257                         metadata.update(kwargs)
    258                 autotest_es.post(use_http=True,
    259                                  type_str=CONTAINER_CREATE_METADB_TYPE,
    260                                  metadata=metadata)
    261 
    262                 # Raise the cached exception with original backtrace.
    263                 raise exc_info[0], exc_info[1], exc_info[2]
    264         return func_cleanup_if_fail
    265     return deco_cleanup_if_fail
    266 
    267 
    268 @retry.retry(error.CmdError, timeout_min=5)
    269 def download_extract(url, target, extract_dir):
    270     """Download the file from given url and save it to the target, then extract.
    271 
    272     @param url: Url to download the file.
    273     @param target: Path of the file to save to.
    274     @param extract_dir: Directory to extract the content of the file to.
    275     """
    276     remote_url = dev_server.DevServer.get_server_url(url)
    277     # TODO(xixuan): Better to only ssh to devservers in lab, and continue using
    278     # wget for ganeti devservers.
    279     if remote_url in dev_server.ImageServerBase.servers():
    280         tmp_file = '/tmp/%s' % os.path.basename(target)
    281         dev_server.ImageServerBase.download_file(url, tmp_file, timeout=300)
    282         utils.run('sudo mv %s %s' % (tmp_file, target))
    283     else:
    284         utils.run('sudo wget --timeout=300 -nv %s -O %s' % (url, target),
    285                   stderr_tee=utils.TEE_TO_LOGS)
    286 
    287     utils.run('sudo tar -xvf %s -C %s' % (target, extract_dir))
    288 
    289 
    290 def install_package_precheck(packages):
    291     """If SSP is not enabled or the test is running in chroot (using test_that),
    292     packages installation should be skipped.
    293 
    294     The check does not raise exception so tests started by test_that or running
    295     in an Autotest setup with SSP disabled can continue. That assume the running
    296     environment, chroot or a machine, has the desired packages installed
    297     already.
    298 
    299     @param packages: A list of names of the packages to install.
    300 
    301     @return: True if package installation can continue. False if it should be
    302              skipped.
    303 
    304     """
    305     if not SSP_ENABLED and not utils.is_in_container():
    306         logging.info('Server-side packaging is not enabled. Install package %s '
    307                      'is skipped.', packages)
    308         return False
    309 
    310     if server_utils.is_inside_chroot():
    311         logging.info('Test is running inside chroot. Install package %s is '
    312                      'skipped.', packages)
    313         return False
    314 
    315     if not utils.is_in_container():
    316         raise error.ContainerError('Package installation is only supported '
    317                                    'when test is running inside container.')
    318 
    319     return True
    320 
    321 
    322 @metrics.SecondsTimerDecorator('%s/install_packages_duration' % STATS_KEY)
    323 @retry.retry(error.CmdError, timeout_min=30)
    324 def install_packages(packages=[], python_packages=[], force_latest=False):
    325     """Install the given package inside container.
    326 
    327     !!! WARNING !!!
    328     This call may introduce several minutes of delay in test run. The best way
    329     to avoid such delay is to update the base container used for the test run.
    330     File a bug for infra deputy to update the base container with the new
    331     package a test requires.
    332 
    333     @param packages: A list of names of the packages to install.
    334     @param python_packages: A list of names of the python packages to install
    335                             using pip.
    336     @param force_latest: True to force to install the latest version of the
    337                          package. Default to False, which means skip installing
    338                          the package if it's installed already, even with an old
    339                          version.
    340 
    341     @raise error.ContainerError: If package is attempted to be installed outside
    342                                  a container.
    343     @raise error.CmdError: If the package doesn't exist or failed to install.
    344 
    345     """
    346     if not install_package_precheck(packages or python_packages):
    347         return
    348 
    349     # If force_latest is False, only install packages that are not already
    350     # installed.
    351     if not force_latest:
    352         packages = [p for p in packages if not utils.is_package_installed(p)]
    353         python_packages = [p for p in python_packages
    354                            if not utils.is_python_package_installed(p)]
    355         if not packages and not python_packages:
    356             logging.debug('All packages are installed already, skip reinstall.')
    357             return
    358 
    359     # Always run apt-get update before installing any container. The base
    360     # container may have outdated cache.
    361     utils.run('sudo apt-get update')
    362     # Make sure the lists are not None for iteration.
    363     packages = [] if not packages else packages
    364     if python_packages:
    365         packages.extend(['python-pip', 'python-dev'])
    366     if packages:
    367         utils.run('sudo apt-get install %s -y --force-yes' % ' '.join(packages))
    368         logging.debug('Packages are installed: %s.', packages)
    369 
    370     target_setting = ''
    371     # For containers running in Moblab, /usr/local/lib/python2.7/dist-packages/
    372     # is a readonly mount from the host. Therefore, new python modules have to
    373     # be installed in /usr/lib/python2.7/dist-packages/
    374     # Containers created in Moblab does not have autotest/site-packages folder.
    375     if not os.path.exists('/usr/local/autotest/site-packages'):
    376         target_setting = '--target="/usr/lib/python2.7/dist-packages/"'
    377     if python_packages:
    378         utils.run('sudo pip install %s %s' % (target_setting,
    379                                               ' '.join(python_packages)))
    380         logging.debug('Python packages are installed: %s.', python_packages)
    381 
    382 
    383 @retry.retry(error.CmdError, timeout_min=20)
    384 def install_package(package):
    385     """Install the given package inside container.
    386 
    387     This function is kept for backwards compatibility reason. New code should
    388     use function install_packages for better performance.
    389 
    390     @param package: Name of the package to install.
    391 
    392     @raise error.ContainerError: If package is attempted to be installed outside
    393                                  a container.
    394     @raise error.CmdError: If the package doesn't exist or failed to install.
    395 
    396     """
    397     logging.warn('This function is obsoleted, please use install_packages '
    398                  'instead.')
    399     install_packages(packages=[package])
    400 
    401 
    402 @retry.retry(error.CmdError, timeout_min=20)
    403 def install_python_package(package):
    404     """Install the given python package inside container using pip.
    405 
    406     This function is kept for backwards compatibility reason. New code should
    407     use function install_packages for better performance.
    408 
    409     @param package: Name of the python package to install.
    410 
    411     @raise error.CmdError: If the package doesn't exist or failed to install.
    412     """
    413     logging.warn('This function is obsoleted, please use install_packages '
    414                  'instead.')
    415     install_packages(python_packages=[package])
    416 
    417 
    418 class Container(object):
    419     """A wrapper class of an LXC container.
    420 
    421     The wrapper class provides methods to interact with a container, e.g.,
    422     start, stop, destroy, run a command. It also has attributes of the
    423     container, including:
    424     name: Name of the container.
    425     state: State of the container, e.g., ABORTING, RUNNING, STARTING, STOPPED,
    426            or STOPPING.
    427 
    428     lxc-ls can also collect other attributes of a container including:
    429     ipv4: IP address for IPv4.
    430     ipv6: IP address for IPv6.
    431     autostart: If the container will autostart at system boot.
    432     pid: Process ID of the container.
    433     memory: Memory used by the container, as a string, e.g., "6.2MB"
    434     ram: Physical ram used by the container, as a string, e.g., "6.2MB"
    435     swap: swap used by the container, as a string, e.g., "1.0MB"
    436 
    437     For performance reason, such info is not collected for now.
    438 
    439     The attributes available are defined in ATTRIBUTES constant.
    440     """
    441 
    442     def __init__(self, container_path, attribute_values):
    443         """Initialize an object of LXC container with given attribute values.
    444 
    445         @param container_path: Directory that stores the container.
    446         @param attribute_values: A dictionary of attribute values for the
    447                                  container.
    448         """
    449         self.container_path = os.path.realpath(container_path)
    450         # Path to the rootfs of the container. This will be initialized when
    451         # property rootfs is retrieved.
    452         self._rootfs = None
    453         for attribute, value in attribute_values.iteritems():
    454             setattr(self, attribute, value)
    455 
    456 
    457     def refresh_status(self):
    458         """Refresh the status information of the container.
    459         """
    460         containers = get_container_info(self.container_path, name=self.name)
    461         if not containers:
    462             raise error.ContainerError(
    463                     'No container found in directory %s with name of %s.' %
    464                     self.container_path, self.name)
    465         attribute_values = containers[0]
    466         for attribute, value in attribute_values.iteritems():
    467             setattr(self, attribute, value)
    468 
    469 
    470     @property
    471     def rootfs(self):
    472         """Path to the rootfs of the container.
    473 
    474         This property returns the path to the rootfs of the container, that is,
    475         the folder where the container stores its local files. It reads the
    476         attribute lxc.rootfs from the config file of the container, e.g.,
    477             lxc.rootfs = /usr/local/autotest/containers/t4/rootfs
    478         If the container is created with snapshot, the rootfs is a chain of
    479         folders, separated by `:` and ordered by how the snapshot is created,
    480         e.g.,
    481             lxc.rootfs = overlayfs:/usr/local/autotest/containers/base/rootfs:
    482             /usr/local/autotest/containers/t4_s/delta0
    483         This function returns the last folder in the chain, in above example,
    484         that is `/usr/local/autotest/containers/t4_s/delta0`
    485 
    486         Files in the rootfs will be accessible directly within container. For
    487         example, a folder in host "[rootfs]/usr/local/file1", can be accessed
    488         inside container by path "/usr/local/file1". Note that symlink in the
    489         host can not across host/container boundary, instead, directory mount
    490         should be used, refer to function mount_dir.
    491 
    492         @return: Path to the rootfs of the container.
    493         """
    494         if not self._rootfs:
    495             cmd = ('sudo lxc-info -P %s -n %s -c lxc.rootfs' %
    496                    (self.container_path, self.name))
    497             lxc_rootfs_config = utils.run(cmd).stdout.strip()
    498             match = re.match('lxc.rootfs = (.*)', lxc_rootfs_config)
    499             if not match:
    500                 raise error.ContainerError(
    501                         'Failed to locate rootfs for container %s. lxc.rootfs '
    502                         'in the container config file is %s' %
    503                         (self.name, lxc_rootfs_config))
    504             lxc_rootfs = match.group(1)
    505             self.clone_from_snapshot = ':' in lxc_rootfs
    506             if self.clone_from_snapshot:
    507                 self._rootfs = lxc_rootfs.split(':')[-1]
    508             else:
    509                 self._rootfs = lxc_rootfs
    510         return self._rootfs
    511 
    512 
    513     def attach_run(self, command, bash=True):
    514         """Attach to a given container and run the given command.
    515 
    516         @param command: Command to run in the container.
    517         @param bash: Run the command through bash -c "command". This allows
    518                      pipes to be used in command. Default is set to True.
    519 
    520         @return: The output of the command.
    521 
    522         @raise error.CmdError: If container does not exist, or not running.
    523         """
    524         cmd = 'sudo lxc-attach -P %s -n %s' % (self.container_path, self.name)
    525         if bash and not command.startswith('bash -c'):
    526             command = 'bash -c "%s"' % utils.sh_escape(command)
    527         cmd += ' -- %s' % command
    528         # TODO(dshi): crbug.com/459344 Set sudo to default to False when test
    529         # container can be unprivileged container.
    530         return utils.run(cmd)
    531 
    532 
    533     def is_network_up(self):
    534         """Check if network is up in the container by curl base container url.
    535 
    536         @return: True if the network is up, otherwise False.
    537         """
    538         try:
    539             self.attach_run('curl --head %s' % CONTAINER_BASE_URL)
    540             return True
    541         except error.CmdError as e:
    542             logging.debug(e)
    543             return False
    544 
    545 
    546     @metrics.SecondsTimerDecorator('%s/container_start_duration' % STATS_KEY)
    547     def start(self, wait_for_network=True):
    548         """Start the container.
    549 
    550         @param wait_for_network: True to wait for network to be up. Default is
    551                                  set to True.
    552 
    553         @raise ContainerError: If container does not exist, or fails to start.
    554         """
    555         cmd = 'sudo lxc-start -P %s -n %s -d' % (self.container_path, self.name)
    556         output = utils.run(cmd).stdout
    557         self.refresh_status()
    558         if self.state != 'RUNNING':
    559             raise error.ContainerError(
    560                     'Container %s failed to start. lxc command output:\n%s' %
    561                     (os.path.join(self.container_path, self.name),
    562                      output))
    563 
    564         if wait_for_network:
    565             logging.debug('Wait for network to be up.')
    566             start_time = time.time()
    567             utils.poll_for_condition(condition=self.is_network_up,
    568                                      timeout=NETWORK_INIT_TIMEOUT,
    569                                      sleep_interval=NETWORK_INIT_CHECK_INTERVAL)
    570             logging.debug('Network is up after %.2f seconds.',
    571                           time.time() - start_time)
    572 
    573 
    574     @metrics.SecondsTimerDecorator('%s/container_stop_duration' % STATS_KEY)
    575     def stop(self):
    576         """Stop the container.
    577 
    578         @raise ContainerError: If container does not exist, or fails to start.
    579         """
    580         cmd = 'sudo lxc-stop -P %s -n %s' % (self.container_path, self.name)
    581         output = utils.run(cmd).stdout
    582         self.refresh_status()
    583         if self.state != 'STOPPED':
    584             raise error.ContainerError(
    585                     'Container %s failed to be stopped. lxc command output:\n'
    586                     '%s' % (os.path.join(self.container_path, self.name),
    587                             output))
    588 
    589 
    590     @metrics.SecondsTimerDecorator('%s/container_destroy_duration' % STATS_KEY)
    591     def destroy(self, force=True):
    592         """Destroy the container.
    593 
    594         @param force: Set to True to force to destroy the container even if it's
    595                       running. This is faster than stop a container first then
    596                       try to destroy it. Default is set to True.
    597 
    598         @raise ContainerError: If container does not exist or failed to destroy
    599                                the container.
    600         """
    601         cmd = 'sudo lxc-destroy -P %s -n %s' % (self.container_path,
    602                                                 self.name)
    603         if force:
    604             cmd += ' -f'
    605         utils.run(cmd)
    606 
    607 
    608     def mount_dir(self, source, destination, readonly=False):
    609         """Mount a directory in host to a directory in the container.
    610 
    611         @param source: Directory in host to be mounted.
    612         @param destination: Directory in container to mount the source directory
    613         @param readonly: Set to True to make a readonly mount, default is False.
    614         """
    615         # Destination path in container must be relative.
    616         destination = destination.lstrip('/')
    617         # Create directory in container for mount.
    618         utils.run('sudo mkdir -p %s' % os.path.join(self.rootfs, destination))
    619         config_file = os.path.join(self.container_path, self.name, 'config')
    620         mount = MOUNT_FMT % {'source': source,
    621                              'destination': destination,
    622                              'readonly': ',ro' if readonly else ''}
    623         utils.run(APPEND_CMD_FMT % {'content': mount, 'file': config_file})
    624 
    625 
    626     def verify_autotest_setup(self, job_folder):
    627         """Verify autotest code is set up properly in the container.
    628 
    629         @param job_folder: Name of the job result folder.
    630 
    631         @raise ContainerError: If autotest code is not set up properly.
    632         """
    633         # Test autotest code is setup by verifying a list of
    634         # (directory, minimum file count)
    635         if IS_MOBLAB:
    636             site_packages_path = MOBLAB_SITE_PACKAGES_CONTAINER
    637         else:
    638             site_packages_path = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR,
    639                                               'site-packages')
    640         directories_to_check = [
    641                 (lxc_config.CONTAINER_AUTOTEST_DIR, 3),
    642                 (RESULT_DIR_FMT % job_folder, 0),
    643                 (site_packages_path, 3)]
    644         for directory, count in directories_to_check:
    645             result = self.attach_run(command=(COUNT_FILE_CMD %
    646                                               {'dir': directory})).stdout
    647             logging.debug('%s entries in %s.', int(result), directory)
    648             if int(result) < count:
    649                 raise error.ContainerError('%s is not properly set up.' %
    650                                            directory)
    651         # lxc-attach and run command does not run in shell, thus .bashrc is not
    652         # loaded. Following command creates a symlink in /usr/bin/ for gsutil
    653         # if it's installed.
    654         # TODO(dshi): Remove this code after lab container is updated with
    655         # gsutil installed in /usr/bin/
    656         self.attach_run('test -f /root/gsutil/gsutil && '
    657                         'ln -s /root/gsutil/gsutil /usr/bin/gsutil || true')
    658 
    659 
    660     def modify_import_order(self):
    661         """Swap the python import order of lib and local/lib.
    662 
    663         In Moblab, the host's python modules located in
    664         /usr/lib64/python2.7/site-packages is mounted to following folder inside
    665         container: /usr/local/lib/python2.7/dist-packages/. The modules include
    666         an old version of requests module, which is used in autotest
    667         site-packages. For test, the module is only used in
    668         dev_server/symbolicate_dump for requests.call and requests.codes.OK.
    669         When pip is installed inside the container, it installs requests module
    670         with version of 2.2.1 in /usr/lib/python2.7/dist-packages/. The version
    671         is newer than the one used in autotest site-packages, but not the latest
    672         either.
    673         According to /usr/lib/python2.7/site.py, modules in /usr/local/lib are
    674         imported before the ones in /usr/lib. That leads to pip to use the older
    675         version of requests (0.11.2), and it will fail. On the other hand,
    676         requests module 2.2.1 can't be installed in CrOS (refer to CL:265759),
    677         and higher version of requests module can't work with pip.
    678         The only fix to resolve this is to switch the import order, so modules
    679         in /usr/lib can be imported before /usr/local/lib.
    680         """
    681         site_module = '/usr/lib/python2.7/site.py'
    682         self.attach_run("sed -i ':a;N;$!ba;s/\"local\/lib\",\\n/"
    683                         "\"lib_placeholder\",\\n/g' %s" % site_module)
    684         self.attach_run("sed -i ':a;N;$!ba;s/\"lib\",\\n/"
    685                         "\"local\/lib\",\\n/g' %s" % site_module)
    686         self.attach_run('sed -i "s/lib_placeholder/lib/g" %s' %
    687                         site_module)
    688 
    689 
    690 
    691 class ContainerBucket(object):
    692     """A wrapper class to interact with containers in a specific container path.
    693     """
    694 
    695     def __init__(self, container_path=DEFAULT_CONTAINER_PATH):
    696         """Initialize a ContainerBucket.
    697 
    698         @param container_path: Path to the directory used to store containers.
    699                                Default is set to AUTOSERV/container_path in
    700                                global config.
    701         """
    702         self.container_path = os.path.realpath(container_path)
    703 
    704 
    705     def get_all(self):
    706         """Get details of all containers.
    707 
    708         @return: A dictionary of all containers with detailed attributes,
    709                  indexed by container name.
    710         """
    711         info_collection = get_container_info(self.container_path)
    712         containers = {}
    713         for info in info_collection:
    714             container = Container(self.container_path, info)
    715             containers[container.name] = container
    716         return containers
    717 
    718 
    719     def get(self, name):
    720         """Get a container with matching name.
    721 
    722         @param name: Name of the container.
    723 
    724         @return: A container object with matching name. Returns None if no
    725                  container matches the given name.
    726         """
    727         return self.get_all().get(name, None)
    728 
    729 
    730     def exist(self, name):
    731         """Check if a container exists with the given name.
    732 
    733         @param name: Name of the container.
    734 
    735         @return: True if the container with the given name exists, otherwise
    736                  returns False.
    737         """
    738         return self.get(name) != None
    739 
    740 
    741     def destroy_all(self):
    742         """Destroy all containers, base must be destroyed at the last.
    743         """
    744         containers = self.get_all().values()
    745         for container in sorted(containers,
    746                                 key=lambda n: 1 if n.name == BASE else 0):
    747             logging.info('Destroy container %s.', container.name)
    748             container.destroy()
    749 
    750 
    751     @metrics.SecondsTimerDecorator('%s/create_from_base_duration' % STATS_KEY)
    752     def create_from_base(self, name, disable_snapshot_clone=False,
    753                          force_cleanup=False):
    754         """Create a container from the base container.
    755 
    756         @param name: Name of the container.
    757         @param disable_snapshot_clone: Set to True to force to clone without
    758                 using snapshot clone even if the host supports that.
    759         @param force_cleanup: Force to cleanup existing container.
    760 
    761         @return: A Container object for the created container.
    762 
    763         @raise ContainerError: If the container already exist.
    764         @raise error.CmdError: If lxc-clone call failed for any reason.
    765         """
    766         if self.exist(name) and not force_cleanup:
    767             raise error.ContainerError('Container %s already exists.' % name)
    768 
    769         # Cleanup existing container with the given name.
    770         container_folder = os.path.join(self.container_path, name)
    771         if lxc_utils.path_exists(container_folder) and force_cleanup:
    772             container = Container(self.container_path, {'name': name})
    773             try:
    774                 container.destroy()
    775             except error.CmdError as e:
    776                 # The container could be created in a incompleted state. Delete
    777                 # the container folder instead.
    778                 logging.warn('Failed to destroy container %s, error: %s',
    779                              name, e)
    780                 utils.run('sudo rm -rf "%s"' % container_folder)
    781 
    782         use_snapshot = SUPPORT_SNAPSHOT_CLONE and not disable_snapshot_clone
    783         snapshot = '-s' if  use_snapshot else ''
    784         # overlayfs is the default clone backend storage. However it is not
    785         # supported in Ganeti yet. Use aufs as the alternative.
    786         aufs = '-B aufs' if utils.is_vm() and use_snapshot else ''
    787         cmd = ('sudo lxc-clone -p %s -P %s %s' %
    788                (self.container_path, self.container_path,
    789                 ' '.join([BASE, name, snapshot, aufs])))
    790         try:
    791             utils.run(cmd)
    792             return self.get(name)
    793         except error.CmdError:
    794             if not use_snapshot:
    795                 raise
    796             else:
    797                 # Snapshot clone failed, retry clone without snapshot. The retry
    798                 # won't hit the code here and cause an infinite loop as
    799                 # disable_snapshot_clone is set to True.
    800                 container = self.create_from_base(
    801                         name, disable_snapshot_clone=True, force_cleanup=True)
    802                 # Report metadata about retry success.
    803                 autotest_es.post(use_http=True,
    804                                  type_str=CONTAINER_CREATE_RETRY_METADB_TYPE,
    805                                  metadata={'drone': socket.gethostname(),
    806                                            'name': name,
    807                                            'success': True})
    808                 return container
    809 
    810 
    811     @cleanup_if_fail()
    812     def setup_base(self, name=BASE, force_delete=False):
    813         """Setup base container.
    814 
    815         @param name: Name of the base container, default to base.
    816         @param force_delete: True to force to delete existing base container.
    817                              This action will destroy all running test
    818                              containers. Default is set to False.
    819         """
    820         if not self.container_path:
    821             raise error.ContainerError(
    822                     'You must set a valid directory to store containers in '
    823                     'global config "AUTOSERV/ container_path".')
    824 
    825         if not os.path.exists(self.container_path):
    826             os.makedirs(self.container_path)
    827 
    828         base_path = os.path.join(self.container_path, name)
    829         if self.exist(name) and not force_delete:
    830             logging.error(
    831                     'Base container already exists. Set force_delete to True '
    832                     'to force to re-stage base container. Note that this '
    833                     'action will destroy all running test containers')
    834             # Set proper file permission. base container in moblab may have
    835             # owner of not being root. Force to update the folder's owner.
    836             # TODO(dshi): Change root to current user when test container can be
    837             # unprivileged container.
    838             utils.run('sudo chown -R root "%s"' % base_path)
    839             utils.run('sudo chgrp -R root "%s"' % base_path)
    840             return
    841 
    842         # Destroy existing base container if exists.
    843         if self.exist(name):
    844             # TODO: We may need to destroy all snapshots created from this base
    845             # container, not all container.
    846             self.destroy_all()
    847 
    848         # Download and untar the base container.
    849         tar_path = os.path.join(self.container_path, '%s.tar.xz' % name)
    850         path_to_cleanup = [tar_path, base_path]
    851         for path in path_to_cleanup:
    852             if os.path.exists(path):
    853                 utils.run('sudo rm -rf "%s"' % path)
    854         container_url = CONTAINER_BASE_URL_FMT % name
    855         download_extract(container_url, tar_path, self.container_path)
    856         # Remove the downloaded container tar file.
    857         utils.run('sudo rm "%s"' % tar_path)
    858         # Set proper file permission.
    859         # TODO(dshi): Change root to current user when test container can be
    860         # unprivileged container.
    861         utils.run('sudo chown -R root "%s"' % base_path)
    862         utils.run('sudo chgrp -R root "%s"' % base_path)
    863 
    864         # Update container config with container_path from global config.
    865         config_path = os.path.join(base_path, 'config')
    866         utils.run('sudo sed -i "s|container_dir|%s|g" "%s"' %
    867                   (self.container_path, config_path))
    868 
    869 
    870     @metrics.SecondsTimerDecorator('%s/setup_test_duration' % STATS_KEY)
    871     @cleanup_if_fail()
    872     def setup_test(self, name, job_id, server_package_url, result_path,
    873                    control=None, skip_cleanup=False, job_folder=None,
    874                    dut_name=None):
    875         """Setup test container for the test job to run.
    876 
    877         The setup includes:
    878         1. Install autotest_server package from given url.
    879         2. Copy over local shadow_config.ini.
    880         3. Mount local site-packages.
    881         4. Mount test result directory.
    882 
    883         TODO(dshi): Setup also needs to include test control file for autoserv
    884                     to run in container.
    885 
    886         @param name: Name of the container.
    887         @param job_id: Job id for the test job to run in the test container.
    888         @param server_package_url: Url to download autotest_server package.
    889         @param result_path: Directory to be mounted to container to store test
    890                             results.
    891         @param control: Path to the control file to run the test job. Default is
    892                         set to None.
    893         @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
    894                              container failures.
    895         @param job_folder: Folder name of the job, e.g., 123-debug_user.
    896         @param dut_name: Name of the dut to run test, used as the hostname of
    897                          the container. Default is None.
    898         @return: A Container object for the test container.
    899 
    900         @raise ContainerError: If container does not exist, or not running.
    901         """
    902         start_time = time.time()
    903 
    904         if not os.path.exists(result_path):
    905             raise error.ContainerError('Result directory does not exist: %s',
    906                                        result_path)
    907         result_path = os.path.abspath(result_path)
    908 
    909         # Save control file to result_path temporarily. The reason is that the
    910         # control file in drone_tmp folder can be deleted during scheduler
    911         # restart. For test not using SSP, the window between test starts and
    912         # control file being picked up by the test is very small (< 2 seconds).
    913         # However, for tests using SSP, it takes around 1 minute before the
    914         # container is setup. If scheduler is restarted during that period, the
    915         # control file will be deleted, and the test will fail.
    916         if control:
    917             control_file_name = os.path.basename(control)
    918             safe_control = os.path.join(result_path, control_file_name)
    919             utils.run('cp %s %s' % (control, safe_control))
    920 
    921         # Create test container from the base container.
    922         container = self.create_from_base(name)
    923 
    924         # Update the hostname of the test container to be `dut_name`.
    925         # Some TradeFed tests use hostname in test results, which is used to
    926         # group test results in dashboard. The default container name is set to
    927         # be the name of the folder, which is unique (as it is composed of job
    928         # id and timestamp. For better result view, the container's hostname is
    929         # set to be a string containing the dut hostname.
    930         if dut_name:
    931             config_file = os.path.join(container.container_path, name, 'config')
    932             lxc_utsname_setting = (
    933                     'lxc.utsname = ' +
    934                     CONTAINER_UTSNAME_FORMAT % dut_name.replace('.', '_'))
    935             utils.run(APPEND_CMD_FMT % {'content': lxc_utsname_setting,
    936                                         'file': config_file})
    937 
    938         # Deploy server side package
    939         usr_local_path = os.path.join(container.rootfs, 'usr', 'local')
    940         autotest_pkg_path = os.path.join(usr_local_path,
    941                                          'autotest_server_package.tar.bz2')
    942         autotest_path = os.path.join(usr_local_path, 'autotest')
    943         # sudo is required so os.makedirs may not work.
    944         utils.run('sudo mkdir -p %s'% usr_local_path)
    945 
    946         download_extract(server_package_url, autotest_pkg_path, usr_local_path)
    947         deploy_config_manager = lxc_config.DeployConfigManager(container)
    948         deploy_config_manager.deploy_pre_start()
    949 
    950         # Copy over control file to run the test job.
    951         if control:
    952             container_drone_temp = os.path.join(autotest_path, 'drone_tmp')
    953             utils.run('sudo mkdir -p %s'% container_drone_temp)
    954             container_control_file = os.path.join(
    955                     container_drone_temp, control_file_name)
    956             # Move the control file stored in the result folder to container.
    957             utils.run('sudo mv %s %s' % (safe_control, container_control_file))
    958 
    959         if IS_MOBLAB:
    960             site_packages_path = MOBLAB_SITE_PACKAGES
    961             site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:]
    962         else:
    963             site_packages_path = os.path.join(common.autotest_dir,
    964                                               'site-packages')
    965             site_packages_container_path = os.path.join(
    966                     lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages')
    967         mount_entries = [(site_packages_path, site_packages_container_path,
    968                           True),
    969                          (os.path.join(common.autotest_dir, 'puppylab'),
    970                           os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR,
    971                                        'puppylab'),
    972                           True),
    973                          (result_path,
    974                           os.path.join(RESULT_DIR_FMT % job_folder),
    975                           False),
    976                         ]
    977         for mount_config in deploy_config_manager.mount_configs:
    978             mount_entries.append((mount_config.source, mount_config.target,
    979                                   mount_config.readonly))
    980         # Update container config to mount directories.
    981         for source, destination, readonly in mount_entries:
    982             container.mount_dir(source, destination, readonly)
    983 
    984         # Update file permissions.
    985         # TODO(dshi): crbug.com/459344 Skip following action when test container
    986         # can be unprivileged container.
    987         utils.run('sudo chown -R root "%s"' % autotest_path)
    988         utils.run('sudo chgrp -R root "%s"' % autotest_path)
    989 
    990         container.start(name)
    991         deploy_config_manager.deploy_post_start()
    992 
    993         container.modify_import_order()
    994 
    995         container.verify_autotest_setup(job_folder)
    996 
    997         autotest_es.post(use_http=True,
    998                          type_str=CONTAINER_CREATE_METADB_TYPE,
    999                          metadata={'drone': socket.gethostname(),
   1000                                    'job_id': job_id,
   1001                                    'time_used': time.time() - start_time,
   1002                                    'success': True})
   1003 
   1004         logging.debug('Test container %s is set up.', name)
   1005         return container
   1006 
   1007 
   1008 def parse_options():
   1009     """Parse command line inputs.
   1010 
   1011     @raise argparse.ArgumentError: If command line arguments are invalid.
   1012     """
   1013     parser = argparse.ArgumentParser()
   1014     parser.add_argument('-s', '--setup', action='store_true',
   1015                         default=False,
   1016                         help='Set up base container.')
   1017     parser.add_argument('-p', '--path', type=str,
   1018                         help='Directory to store the container.',
   1019                         default=DEFAULT_CONTAINER_PATH)
   1020     parser.add_argument('-f', '--force_delete', action='store_true',
   1021                         default=False,
   1022                         help=('Force to delete existing containers and rebuild '
   1023                               'base containers.'))
   1024     parser.add_argument('-n', '--name', type=str,
   1025                         help='Name of the base container.',
   1026                         default=BASE)
   1027     options = parser.parse_args()
   1028     if not options.setup and not options.force_delete:
   1029         raise argparse.ArgumentError(
   1030                 'Use --setup to setup a base container, or --force_delete to '
   1031                 'delete all containers in given path.')
   1032     return options
   1033 
   1034 
   1035 def main():
   1036     """main script."""
   1037     # Force to run the setup as superuser.
   1038     # TODO(dshi): crbug.com/459344 Set remove this enforcement when test
   1039     # container can be unprivileged container.
   1040     if utils.sudo_require_password():
   1041         logging.warn('SSP requires root privilege to run commands, please '
   1042                      'grant root access to this process.')
   1043         utils.run('sudo true')
   1044 
   1045     options = parse_options()
   1046     bucket = ContainerBucket(container_path=options.path)
   1047     if options.setup:
   1048         bucket.setup_base(name=options.name, force_delete=options.force_delete)
   1049     elif options.force_delete:
   1050         bucket.destroy_all()
   1051 
   1052 
   1053 if __name__ == '__main__':
   1054     main()
   1055