1 # Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import logging 6 import re 7 8 from autotest_lib.client.common_lib import error 9 from autotest_lib.server.cros import moblab_test 10 from autotest_lib.server.hosts import moblab_host 11 from autotest_lib.utils import labellib 12 13 14 _CLEANUP_TIME_M = 5 15 _MOBLAB_IMAGE_STORAGE = '/mnt/moblab/static' 16 17 class moblab_StorageQual(moblab_test.MoblabTest): 18 """ 19 Moblab storage qual suite test. Ensures that moblab can run the storage 20 qual tests on the correct DUTs in the correct order. This test does not 21 perform any destructive disk operations. 22 23 The test requires 2 duts, labeled 'storage_qual_cq_1', 'storage_qual_cq_2'. 24 Each DUT will run a sequence of tests, and the test will then verify 25 that the correct tests ran on the correctly labeled DUT, in the correct 26 order. 27 """ 28 version = 1 29 30 # Moblab expects to have 1 dut with each of these labels 31 REQUIRED_LABELS = {'storage_qual_cq_1', 'storage_qual_cq_2'} 32 33 EXPECTED_RESULTS = { 34 'storage_qual_cq_1': [ 35 'hardware_StorageQualBase_before', 36 'hardware_StorageStress_soak', 37 'hardware_StorageStress_soak', 38 'hardware_StorageStress_suspend', 39 'hardware_StorageQualBase_after' 40 ], 41 'storage_qual_cq_2': [ 42 'hardware_StorageQualBase_before', 43 'hardware_StorageStress_soak', 44 'hardware_StorageStress_soak', 45 'hardware_StorageQualTrimStress', 46 'hardware_StorageQualTrimStress', 47 'hardware_StorageQualBase_after' 48 ] 49 } 50 51 def run_once(self, host, moblab_suite_max_retries, 52 target_build='', clear_devserver_cache=True, 53 test_timeout_hint_m=None): 54 """Runs a suite on a Moblab Host against its test DUTS. 55 56 @param host: Moblab Host that will run the suite. 57 @param moblab_suite_max_retries: The maximum number of test retries 58 allowed within the suite launched on moblab. 59 @param target_build: Optional build to be use in the run_suite 60 call on moblab. This argument is passed as is to run_suite. It 61 must be a sensible build target for the board of the sub-DUTs 62 attached to the moblab. 63 @param clear_devserver_cache: If True, image cache of the devserver 64 running on moblab is cleared before running the test to validate 65 devserver imaging staging flow. 66 @param test_timeout_hint_m: (int) Optional overall timeout for the test. 67 For this test, it is very important to collect post failure data 68 from the moblab device. If the overall timeout is provided, the 69 test will try to fail early to save some time for log collection 70 from the DUT. 71 72 @raises AutoservRunError if the suite does not complete successfully. 73 """ 74 self._host = host 75 self._maybe_clear_devserver_cache(clear_devserver_cache) 76 77 duts = host.afe.get_hosts() 78 if len(duts) == 0: 79 raise error.TestFail('All hosts for this MobLab are down. Please ' 80 'request the lab admins to take a look.') 81 82 board = None 83 dut_to_label = {} 84 for dut in duts: 85 # Fetch the board of the DUT's assigned to this Moblab. There should 86 # only be one type. 87 board = labellib.LabelsMapping(dut.labels)['board'] 88 for label in dut.labels: 89 if label in self.REQUIRED_LABELS: 90 dut_to_label[dut.hostname] = label 91 92 if not set(dut_to_label.values()) == self.REQUIRED_LABELS: 93 raise error.TestFail( 94 'Missing required labels on hosts %s, are some hosts down?' 95 % self.REQUIRED_LABELS - set(dut_to_label.values())) 96 97 if not board: 98 raise error.TestFail('Could not determine board from hosts.') 99 100 if not target_build: 101 stable_version_map = host.afe.get_stable_version_map( 102 host.afe.CROS_IMAGE_TYPE) 103 target_build = stable_version_map.get_image_name(board) 104 105 logging.info('Running suite: hardware_storagequal_cq') 106 cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s " 107 "--suite_name=hardware_storagequal_cq --retry=True " 108 "--max_retries=%d" % 109 (moblab_host.AUTOTEST_INSTALL_DIR, board, target_build, 110 moblab_suite_max_retries)) 111 cmd, run_suite_timeout_s = self._append_run_suite_timeout( 112 cmd, 113 test_timeout_hint_m, 114 ) 115 116 logging.debug('Run suite command: %s', cmd) 117 try: 118 result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s) 119 except error.AutoservRunError as e: 120 if _is_run_suite_error_critical(e.result_obj.exit_status): 121 raise 122 123 logging.debug('Suite Run Output:\n%s', result.stderr) 124 125 job_ids = self._get_job_ids_from_suite_output(result.stderr) 126 127 logging.debug('Suite job ids %s', job_ids) 128 129 keyvals_per_host = self._get_keyval_files_per_host(host, job_ids) 130 131 logging.debug('Keyvals grouped by host %s', keyvals_per_host) 132 133 failed_test = False 134 for hostname in keyvals_per_host: 135 label = dut_to_label[hostname] 136 expected = self.EXPECTED_RESULTS[label] 137 actual = self._get_test_execution_order( 138 host, keyvals_per_host[hostname]) 139 140 logging.info('Comparing test order for %s from host %s', 141 label, hostname) 142 logging.info('%-37s %s', 'Expected', 'Actual') 143 for i in range(max(len(expected), len(actual))): 144 expected_i = expected[i] if i < len(expected) else None 145 actual_i = actual[i] if i < len(actual) else None 146 check_fail = expected_i != actual_i 147 check_text = 'X' if check_fail else ' ' 148 logging.info('%s %-35s %s', check_text, expected_i, actual_i) 149 failed_test = failed_test or check_fail 150 151 # Cache directory can contain large binaries like CTS/CTS zip files 152 # no need to offload those in the results. 153 # The cache is owned by root user 154 host.run('rm -fR /mnt/moblab/results/shared/cache', 155 timeout=600) 156 157 if failed_test: 158 raise error.TestFail( 159 'Actual test execution order did not match expected') 160 161 def _append_run_suite_timeout(self, cmd, test_timeout_hint_m): 162 """Modify given run_suite command with timeout. 163 164 @param cmd: run_suite command str. 165 @param test_timeout_hint_m: (int) timeout for the test, or None. 166 @return cmd, run_suite_timeout_s: cmd is the updated command str, 167 run_suite_timeout_s is the timeout to use for the run_suite 168 call, in seconds. 169 """ 170 if test_timeout_hint_m is None: 171 return cmd, 10800 172 173 # Arguments passed in via test_args may be all str, depending on how 174 # they're passed in. 175 test_timeout_hint_m = int(test_timeout_hint_m) 176 elasped_m = self.elapsed.total_seconds() / 60 177 run_suite_timeout_m = ( 178 test_timeout_hint_m - elasped_m - _CLEANUP_TIME_M) 179 logging.info('Overall test timeout hint provided (%d minutes)', 180 test_timeout_hint_m) 181 logging.info('%d minutes have already elasped', elasped_m) 182 logging.info( 183 'Keeping %d minutes for cleanup, will allow %d minutes for ' 184 'the suite to run.', _CLEANUP_TIME_M, run_suite_timeout_m) 185 cmd += ' --timeout_mins %d' % run_suite_timeout_m 186 return cmd, run_suite_timeout_m * 60 187 188 def _maybe_clear_devserver_cache(self, clear_devserver_cache): 189 # When passed in via test_args, all arguments are str 190 if not isinstance(clear_devserver_cache, bool): 191 clear_devserver_cache = (clear_devserver_cache.lower() == 'true') 192 if clear_devserver_cache: 193 self._host.run('rm -rf %s/*' % _MOBLAB_IMAGE_STORAGE) 194 195 def _get_job_ids_from_suite_output(self, suite_output): 196 """Parse the set of job ids from run_suite output 197 198 @param suite_output (str) output from run_suite command 199 @return (set<int>) job ids contained in the suite 200 """ 201 job_ids = set() 202 job_id_pattern = re.compile('(\d+)-moblab') 203 for line in suite_output.splitlines(): 204 match = job_id_pattern.search(line) 205 logging.debug('suite line %s match %s', line, match) 206 if match is None: 207 continue 208 job_ids.add(int(match.groups()[0])) 209 return job_ids 210 211 def _get_keyval_files_per_host(self, host, job_ids): 212 """Find the result keyval files for the given job ids and 213 group them by host 214 215 @param host (moblab_host) 216 @param job_ids (set<int>) set of job ids to find keyvals for 217 @return (dict<str, list<str>>) map of hosts and the keyval 218 file locations 219 @throws AutoservRunError if the command fails to run on moblab 220 """ 221 keyvals_per_host = {} 222 keyvals = host.run_as_moblab( 223 'find /mnt/moblab/results ' 224 '-wholename *-moblab/192.168*/hardware_Storage*/keyval') 225 pattern = re.compile('(\d+)-moblab/(192.168.\d+.\d+)') 226 for line in keyvals.stdout.splitlines(): 227 match = pattern.search(line) 228 if match is None: 229 continue 230 job_id, dut = match.groups() 231 if int(job_id) not in job_ids: 232 continue 233 if dut not in keyvals_per_host: 234 keyvals_per_host[dut] = [] 235 keyvals_per_host[dut].append(line) 236 237 return keyvals_per_host 238 239 def _get_test_execution_order(self, host, keyvals): 240 """Determines the test execution order for the given list 241 of storage qual test result keyvals 242 243 @param host (moblab_host) 244 @param keyvals (list<str>) location of keyval files to order 245 @return (list<str>) test names in the order they executed 246 @throws AutoservRunError if the command fails to run on moblab 247 """ 248 tests = host.run_as_moblab( 249 'FILES=(%s); for FILE in ${FILES[@]}; do cat $FILE ' 250 '| grep storage_qual_cq; done ' 251 '| sort | cut -d " " -f 2' 252 % ' '.join(keyvals) 253 ) 254 test_execution_order = [] 255 pattern = re.compile('hardware_\w+') 256 logging.debug(tests.stdout) 257 for line in tests.stdout.splitlines(): 258 match = pattern.search(line) 259 if match: 260 test_execution_order.append(match.group(0)) 261 return test_execution_order 262 263 def _is_run_suite_error_critical(return_code): 264 # We can't actually import run_suite here because importing run_suite pulls 265 # in certain MySQLdb dependencies that fail to load in the context of a 266 # test. 267 # OTOH, these return codes are unlikely to change because external users / 268 # builders depend on them. 269 return return_code not in ( 270 0, # run_suite.RETURN_CODES.OK 271 2, # run_suite.RETURN_CODES.WARNING 272 ) 273