1 # Copyright 2016 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import contextlib 6 import datetime 7 import logging 8 import pprint 9 import time 10 11 import common 12 from autotest_lib.client.common_lib import error, site_utils 13 from autotest_lib.client.common_lib import utils as base_utils 14 from autotest_lib.client.common_lib.cros.network import ap_constants 15 from autotest_lib.client.common_lib.cros.network import iw_runner 16 from autotest_lib.server import hosts 17 from autotest_lib.server import site_linux_system 18 from autotest_lib.server.cros import host_lock_manager 19 from autotest_lib.server.cros.ap_configurators import ap_batch_locker 20 from autotest_lib.server.cros.ap_configurators \ 21 import ap_configurator_factory 22 from autotest_lib.server.cros.network import chaos_clique_utils as utils 23 from autotest_lib.server.cros.network import wifi_client 24 from autotest_lib.server.hosts import adb_host 25 26 # Webdriver master hostname 27 MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com' 28 WEBDRIVER_PORT = 9515 29 30 31 class ChaosRunner(object): 32 """Object to run a network_WiFi_ChaosXXX test.""" 33 34 35 def __init__(self, test, host, spec, broken_pdus=list()): 36 """Initializes and runs test. 37 38 @param test: a string, test name. 39 @param host: an Autotest host object, device under test. 40 @param spec: an APSpec object. 41 @param broken_pdus: list of offline PDUs. 42 43 """ 44 self._test = test 45 self._host = host 46 self._ap_spec = spec 47 self._broken_pdus = broken_pdus 48 # Log server and DUT times 49 dt = datetime.datetime.now() 50 logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y')) 51 logging.info('DUT time: %s', self._host.run('date').stdout.strip()) 52 53 54 def run(self, job, batch_size=10, tries=10, capturer_hostname=None, 55 conn_worker=None, work_client_hostname=None, 56 disabled_sysinfo=False): 57 """Executes Chaos test. 58 59 @param job: an Autotest job object. 60 @param batch_size: an integer, max number of APs to lock in one batch. 61 @param tries: an integer, number of iterations to run per AP. 62 @param capturer_hostname: a string or None, hostname or IP of capturer. 63 @param conn_worker: ConnectionWorkerAbstract or None, to run extra 64 work after successful connection. 65 @param work_client_hostname: a string or None, hostname of work client 66 @param disabled_sysinfo: a bool, disable collection of logs from DUT. 67 68 69 @raises TestError: Issues locking VM webdriver instance 70 """ 71 72 lock_manager = host_lock_manager.HostLockManager() 73 webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster') 74 host_prefix = self._host.hostname.split('-')[0] 75 with host_lock_manager.HostsLockedBy(lock_manager): 76 capture_host = utils.allocate_packet_capturer( 77 lock_manager, hostname=capturer_hostname, 78 prefix=host_prefix) 79 # Cleanup and reboot packet capturer before the test. 80 utils.sanitize_client(capture_host) 81 capturer = site_linux_system.LinuxSystem(capture_host, {}, 82 'packet_capturer') 83 84 # Run iw scan and abort if more than allowed number of APs are up. 85 iw_command = iw_runner.IwRunner(capture_host) 86 start_time = time.time() 87 logging.info('Performing a scan with a max timeout of 30 seconds.') 88 capture_interface = 'wlan0' 89 capturer_info = capture_host.run('cat /etc/lsb-release', 90 ignore_status=True, timeout=5).stdout 91 if 'whirlwind' in capturer_info: 92 # Use the dual band aux radio for scanning networks. 93 capture_interface = 'wlan2' 94 while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT: 95 networks = iw_command.scan(capture_interface) 96 if networks is None: 97 if (time.time() - start_time == 98 ap_constants.MAX_SCAN_TIMEOUT): 99 raise error.TestError( 100 'Packet capturer is not responding to scans. Check' 101 'device and re-run test') 102 continue 103 elif len(networks) < ap_constants.MAX_SSID_COUNT: 104 break 105 elif len(networks) >= ap_constants.MAX_SSID_COUNT: 106 raise error.TestError( 107 'Probably someone is already running a ' 108 'chaos test?!') 109 110 if conn_worker is not None: 111 work_client_machine = utils.allocate_packet_capturer( 112 lock_manager, hostname=work_client_hostname) 113 conn_worker.prepare_work_client(work_client_machine) 114 115 # Lock VM. If on, power off; always power on. Then create a tunnel. 116 webdriver_instance = utils.allocate_webdriver_instance(lock_manager) 117 118 if utils.is_VM_running(webdriver_master, webdriver_instance): 119 logging.info('VM %s was on; powering off for a clean instance', 120 webdriver_instance) 121 utils.power_off_VM(webdriver_master, webdriver_instance) 122 logging.info('Allow VM time to gracefully shut down') 123 time.sleep(5) 124 125 logging.info('Starting up VM %s', webdriver_instance) 126 utils.power_on_VM(webdriver_master, webdriver_instance) 127 logging.info('Allow VM time to power on before creating a tunnel.') 128 time.sleep(5) 129 130 if not site_utils.host_is_in_lab_zone(webdriver_instance.hostname): 131 self._ap_spec._webdriver_hostname = webdriver_instance.hostname 132 else: 133 # If in the lab then port forwarding must be done so webdriver 134 # connection will be over localhost. 135 self._ap_spec._webdriver_hostname = 'localhost' 136 webdriver_tunnel = webdriver_instance.create_ssh_tunnel( 137 WEBDRIVER_PORT, WEBDRIVER_PORT) 138 logging.info('Wait for tunnel to be created.') 139 for i in range(3): 140 time.sleep(10) 141 results = base_utils.run('lsof -i:%s' % WEBDRIVER_PORT, 142 ignore_status=True) 143 if results: 144 break 145 if not results: 146 raise error.TestError( 147 'Unable to listen to WEBDRIVER_PORT: %s', results) 148 149 batch_locker = ap_batch_locker.ApBatchLocker( 150 lock_manager, self._ap_spec, 151 ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS) 152 153 while batch_locker.has_more_aps(): 154 # Work around for CrOS devices only:crbug.com/358716 155 # Do not reboot Android devices:b/27977927 156 if self._host.get_os_type() != adb_host.OS_TYPE_ANDROID: 157 utils.sanitize_client(self._host) 158 healthy_dut = True 159 160 with contextlib.closing(wifi_client.WiFiClient( 161 hosts.create_host({'hostname' : self._host.hostname, 162 'afe_host' : self._host._afe_host}, 163 host_class=self._host.__class__), 164 './debug', False)) as client: 165 166 aps = batch_locker.get_ap_batch(batch_size=batch_size) 167 if not aps: 168 logging.info('No more APs to test.') 169 break 170 171 # Power down all of the APs because some can get grumpy 172 # if they are configured several times and remain on. 173 # User the cartridge to down group power downs and 174 # configurations. 175 utils.power_down_aps(aps, self._broken_pdus) 176 utils.configure_aps(aps, self._ap_spec, self._broken_pdus) 177 178 aps = utils.filter_quarantined_and_config_failed_aps(aps, 179 batch_locker, job, self._broken_pdus) 180 181 for ap in aps: 182 # http://crbug.com/306687 183 if ap.ssid == None: 184 logging.error('The SSID was not set for the AP:%s', 185 ap) 186 187 healthy_dut = utils.is_dut_healthy(client, ap) 188 189 if not healthy_dut: 190 logging.error('DUT is not healthy, rebooting.') 191 batch_locker.unlock_and_reclaim_aps() 192 break 193 194 networks = utils.return_available_networks( 195 ap, capturer, job, self._ap_spec) 196 197 if networks is None: 198 # If scan returned no networks, iw scan failed. 199 # Reboot the packet capturer device and 200 # reconfigure the capturer. 201 batch_locker.unlock_and_reclaim_ap(ap.host_name) 202 logging.error('Packet capture is not healthy, ' 203 'rebooting.') 204 capturer.host.reboot() 205 capturer = site_linux_system.LinuxSystem( 206 capture_host, {},'packet_capturer') 207 continue 208 if networks == list(): 209 # Packet capturer did not find the SSID in scan or 210 # there was a security mismatch. 211 utils.release_ap(ap, batch_locker, self._broken_pdus) 212 continue 213 214 assoc_params = ap.get_association_parameters() 215 216 if not utils.is_conn_worker_healthy( 217 conn_worker, ap, assoc_params, job): 218 utils.release_ap( 219 ap, batch_locker, self._broken_pdus) 220 continue 221 222 name = ap.name 223 kernel_ver = self._host.get_kernel_ver() 224 firmware_ver = utils.get_firmware_ver(self._host) 225 if not firmware_ver: 226 firmware_ver = "Unknown" 227 228 debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++', 229 'SSID': ap._ssid, 230 'DUT': client.wifi_mac, 231 'AP Info': ap.name, 232 'kernel_version': kernel_ver, 233 'wifi_firmware_version': firmware_ver} 234 debug_string = pprint.pformat(debug_dict) 235 236 logging.info('Waiting %d seconds for the AP dhcp ' 237 'server', ap.dhcp_delay) 238 time.sleep(ap.dhcp_delay) 239 240 result = job.run_test(self._test, 241 capturer=capturer, 242 capturer_frequency=networks[0].frequency, 243 capturer_ht_type=networks[0].ht, 244 host=self._host, 245 assoc_params=assoc_params, 246 client=client, 247 tries=tries, 248 debug_info=debug_string, 249 # Copy all logs from the system 250 disabled_sysinfo=disabled_sysinfo, 251 conn_worker=conn_worker, 252 tag=ap.ssid if conn_worker is None else 253 '%s.%s' % (conn_worker.name, ap.ssid)) 254 255 utils.release_ap(ap, batch_locker, self._broken_pdus) 256 257 if conn_worker is not None: 258 conn_worker.cleanup() 259 260 if not healthy_dut: 261 continue 262 263 batch_locker.unlock_aps() 264 265 if webdriver_tunnel: 266 webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel, 267 WEBDRIVER_PORT) 268 webdriver_instance.close() 269 capturer.close() 270 logging.info('Powering off VM %s', webdriver_instance) 271 utils.power_off_VM(webdriver_master, webdriver_instance) 272 lock_manager.unlock(webdriver_instance.hostname) 273 274 if self._broken_pdus: 275 logging.info('PDU is down!!!\nThe following PDUs are down:\n') 276 pprint.pprint(self._broken_pdus) 277 278 factory = ap_configurator_factory.APConfiguratorFactory( 279 ap_constants.AP_TEST_TYPE_CHAOS) 280 factory.turn_off_all_routers(self._broken_pdus) 281