1 # Copyright 2017 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import logging 6 import os 7 import socket 8 import time 9 10 import common 11 12 from autotest_lib.client.bin import utils 13 from autotest_lib.client.common_lib import error 14 from autotest_lib.client.common_lib.global_config import global_config 15 from autotest_lib.site_utils.lxc import config as lxc_config 16 from autotest_lib.site_utils.lxc import constants 17 from autotest_lib.site_utils.lxc import container_pool 18 from autotest_lib.site_utils.lxc import lxc 19 from autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail 20 from autotest_lib.site_utils.lxc.base_image import BaseImage 21 from autotest_lib.site_utils.lxc.constants import \ 22 CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX 23 from autotest_lib.site_utils.lxc.container import Container 24 from autotest_lib.site_utils.lxc.container_factory import ContainerFactory 25 26 try: 27 from chromite.lib import metrics 28 from infra_libs import ts_mon 29 except ImportError: 30 import mock 31 metrics = utils.metrics_mock 32 ts_mon = mock.Mock() 33 34 35 # Timeout (in seconds) for container pool operations. 36 _CONTAINER_POOL_TIMEOUT = 3 37 38 _USE_LXC_POOL = global_config.get_config_value('LXC_POOL', 'use_lxc_pool', 39 type=bool) 40 41 class ContainerBucket(object): 42 """A wrapper class to interact with containers in a specific container path. 43 """ 44 45 def __init__(self, container_path=constants.DEFAULT_CONTAINER_PATH, 46 container_factory=None): 47 """Initialize a ContainerBucket. 48 49 @param container_path: Path to the directory used to store containers. 50 Default is set to AUTOSERV/container_path in 51 global config. 52 @param container_factory: A factory for creating Containers. 53 """ 54 self.container_path = os.path.realpath(container_path) 55 if container_factory is not None: 56 self._factory = container_factory 57 else: 58 # Pick the correct factory class to use (pool-based, or regular) 59 # based on the config variable. 60 factory_class = ContainerFactory 61 if _USE_LXC_POOL: 62 logging.debug('Using container pool') 63 factory_class = _PoolBasedFactory 64 65 # Pass in the container path so that the bucket is hermetic (i.e. so 66 # that if the container path is customized, the base image doesn't 67 # fall back to using the default container path). 68 try: 69 base_image_ok = True 70 container = BaseImage(self.container_path).get() 71 except error.ContainerError as e: 72 base_image_ok = False 73 raise e 74 finally: 75 metrics.Counter(METRICS_PREFIX + '/base_image', 76 field_spec=[ts_mon.BooleanField('corrupted')] 77 ).increment( 78 fields={'corrupted': not base_image_ok}) 79 self._factory = factory_class( 80 base_container=container, 81 lxc_path=self.container_path) 82 self.container_cache = {} 83 84 85 def get_all(self, force_update=False): 86 """Get details of all containers. 87 88 Retrieves all containers owned by the bucket. Note that this doesn't 89 include the base container, or any containers owned by the container 90 pool. 91 92 @param force_update: Boolean, ignore cached values if set. 93 94 @return: A dictionary of all containers with detailed attributes, 95 indexed by container name. 96 """ 97 info_collection = lxc.get_container_info(self.container_path) 98 containers = {} if force_update else self.container_cache 99 for info in info_collection: 100 if info["name"] in containers: 101 continue 102 container = Container.create_from_existing_dir(self.container_path, 103 **info) 104 # Active containers have an ID. Zygotes and base containers, don't. 105 if container.id is not None: 106 containers[container.id] = container 107 self.container_cache = containers 108 return containers 109 110 111 def get_container(self, container_id): 112 """Get a container with matching name. 113 114 @param container_id: ID of the container. 115 116 @return: A container object with matching name. Returns None if no 117 container matches the given name. 118 """ 119 if container_id in self.container_cache: 120 return self.container_cache[container_id] 121 122 return self.get_all().get(container_id, None) 123 124 125 def exist(self, container_id): 126 """Check if a container exists with the given name. 127 128 @param container_id: ID of the container. 129 130 @return: True if the container with the given ID exists, otherwise 131 returns False. 132 """ 133 return self.get_container(container_id) != None 134 135 136 def destroy_all(self): 137 """Destroy all containers, base must be destroyed at the last. 138 """ 139 containers = self.get_all().values() 140 for container in sorted( 141 containers, key=lambda n: 1 if n.name == constants.BASE else 0): 142 key = container.id 143 logging.info('Destroy container %s.', container.name) 144 container.destroy() 145 del self.container_cache[key] 146 147 148 149 @metrics.SecondsTimerDecorator( 150 '%s/setup_test_duration' % constants.STATS_KEY) 151 @cleanup_if_fail() 152 def setup_test(self, container_id, job_id, server_package_url, result_path, 153 control=None, skip_cleanup=False, job_folder=None, 154 dut_name=None): 155 """Setup test container for the test job to run. 156 157 The setup includes: 158 1. Install autotest_server package from given url. 159 2. Copy over local shadow_config.ini. 160 3. Mount local site-packages. 161 4. Mount test result directory. 162 163 TODO(dshi): Setup also needs to include test control file for autoserv 164 to run in container. 165 166 @param container_id: ID to assign to the test container. 167 @param job_id: Job id for the test job to run in the test container. 168 @param server_package_url: Url to download autotest_server package. 169 @param result_path: Directory to be mounted to container to store test 170 results. 171 @param control: Path to the control file to run the test job. Default is 172 set to None. 173 @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot 174 container failures. 175 @param job_folder: Folder name of the job, e.g., 123-debug_user. 176 @param dut_name: Name of the dut to run test, used as the hostname of 177 the container. Default is None. 178 @return: A Container object for the test container. 179 180 @raise ContainerError: If container does not exist, or not running. 181 """ 182 start_time = time.time() 183 184 if not os.path.exists(result_path): 185 raise error.ContainerError('Result directory does not exist: %s', 186 result_path) 187 result_path = os.path.abspath(result_path) 188 189 # Save control file to result_path temporarily. The reason is that the 190 # control file in drone_tmp folder can be deleted during scheduler 191 # restart. For test not using SSP, the window between test starts and 192 # control file being picked up by the test is very small (< 2 seconds). 193 # However, for tests using SSP, it takes around 1 minute before the 194 # container is setup. If scheduler is restarted during that period, the 195 # control file will be deleted, and the test will fail. 196 if control: 197 control_file_name = os.path.basename(control) 198 safe_control = os.path.join(result_path, control_file_name) 199 utils.run('cp %s %s' % (control, safe_control)) 200 201 # Create test container from the base container. 202 container = self._factory.create_container(container_id) 203 204 # Deploy server side package 205 container.install_ssp(server_package_url) 206 207 deploy_config_manager = lxc_config.DeployConfigManager(container) 208 deploy_config_manager.deploy_pre_start() 209 210 # Copy over control file to run the test job. 211 if control: 212 container.install_control_file(safe_control) 213 214 mount_entries = [(constants.SITE_PACKAGES_PATH, 215 constants.CONTAINER_SITE_PACKAGES_PATH, 216 True), 217 (result_path, 218 os.path.join(constants.RESULT_DIR_FMT % job_folder), 219 False), 220 ] 221 222 # Update container config to mount directories. 223 for source, destination, readonly in mount_entries: 224 container.mount_dir(source, destination, readonly) 225 226 # Update file permissions. 227 # TODO(dshi): crbug.com/459344 Skip following action when test container 228 # can be unprivileged container. 229 autotest_path = os.path.join( 230 container.rootfs, 231 constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep)) 232 utils.run('sudo chown -R root "%s"' % autotest_path) 233 utils.run('sudo chgrp -R root "%s"' % autotest_path) 234 235 container.start(wait_for_network=True) 236 deploy_config_manager.deploy_post_start() 237 238 # Update the hostname of the test container to be `dut-name`. 239 # Some TradeFed tests use hostname in test results, which is used to 240 # group test results in dashboard. The default container name is set to 241 # be the name of the folder, which is unique (as it is composed of job 242 # id and timestamp. For better result view, the container's hostname is 243 # set to be a string containing the dut hostname. 244 if dut_name: 245 container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT % 246 dut_name.replace('.', '-')) 247 248 container.modify_import_order() 249 250 container.verify_autotest_setup(job_folder) 251 252 logging.debug('Test container %s is set up.', container.name) 253 return container 254 255 256 class _PoolBasedFactory(ContainerFactory): 257 """A ContainerFactory that queries the running container pool. 258 259 Implementation falls back to the regular container factory behaviour 260 (i.e. locally cloning a container) if the pool is unavailable or if it does 261 not return a bucket before the specified timeout. 262 """ 263 264 def __init__(self, *args, **kwargs): 265 super(_PoolBasedFactory, self).__init__(*args, **kwargs) 266 try: 267 self._client = container_pool.Client() 268 except (socket.error, socket.timeout) as e: 269 # If an error occurs connecting to the container pool, fall back to 270 # the default container factory. 271 logging.exception('Container pool connection failed.') 272 self._client = None 273 274 275 def create_container(self, new_id): 276 """Creates a new container. 277 278 Attempts to retrieve a container from the container pool. If that 279 operation fails, this falls back to the parent class behaviour. 280 281 @param new_id: ContainerId to assign to the new container. Containers 282 must be assigned an ID before they can be released from 283 the container pool. 284 285 @return: The new container. 286 """ 287 container = None 288 if self._client: 289 try: 290 container = self._client.get_container(new_id, 291 _CONTAINER_POOL_TIMEOUT) 292 except Exception: 293 logging.exception('Error communicating with container pool.') 294 else: 295 if container is not None: 296 logging.debug('Retrieved container from pool: %s', 297 container.name) 298 return container 299 metrics.Counter(METRICS_PREFIX + '/containers_served', 300 field_spec = [ts_mon.BooleanField('from_pool')] 301 ).increment(fields={ 302 'from_pool': (container is not None)}) 303 if container is not None: 304 return container 305 306 # If the container pool did not yield a container, make one locally. 307 logging.warning('Unable to obtain container from pre-populated pool. ' 308 'Creating container locally. This slows server tests ' 309 'down and should be debugged even if local creation ' 310 'works out.') 311 return super(_PoolBasedFactory, self).create_container(new_id) 312