Home | History | Annotate | Download | only in moblab_StorageQual
      1 # Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import re
      7 
      8 from autotest_lib.client.common_lib import error
      9 from autotest_lib.server.cros import moblab_test
     10 from autotest_lib.server.hosts import moblab_host
     11 from autotest_lib.utils import labellib
     12 
     13 
     14 _CLEANUP_TIME_M = 5
     15 _MOBLAB_IMAGE_STORAGE = '/mnt/moblab/static'
     16 
     17 class moblab_StorageQual(moblab_test.MoblabTest):
     18     """
     19     Moblab storage qual suite test. Ensures that moblab can run the storage
     20     qual tests on the correct DUTs in the correct order. This test does not
     21     perform any destructive disk operations.
     22 
     23     The test requires 2 duts, labeled 'storage_qual_cq_1', 'storage_qual_cq_2'.
     24     Each DUT will run a sequence of tests, and the test will then verify
     25     that the correct tests ran on the correctly labeled DUT, in the correct
     26     order.
     27     """
     28     version = 1
     29 
     30     # Moblab expects to have 1 dut with each of these labels
     31     REQUIRED_LABELS = {'storage_qual_cq_1', 'storage_qual_cq_2'}
     32 
     33     EXPECTED_RESULTS = {
     34         'storage_qual_cq_1': [
     35             'hardware_StorageQualBase_before',
     36             'hardware_StorageStress_soak',
     37             'hardware_StorageStress_soak',
     38             'hardware_StorageStress_suspend',
     39             'hardware_StorageQualBase_after'
     40         ],
     41         'storage_qual_cq_2': [
     42             'hardware_StorageQualBase_before',
     43             'hardware_StorageStress_soak',
     44             'hardware_StorageStress_soak',
     45             'hardware_StorageQualTrimStress',
     46             'hardware_StorageQualTrimStress',
     47             'hardware_StorageQualBase_after'
     48         ]
     49     }
     50 
     51     def run_once(self, host, moblab_suite_max_retries,
     52                  target_build='', clear_devserver_cache=True,
     53                  test_timeout_hint_m=None):
     54         """Runs a suite on a Moblab Host against its test DUTS.
     55 
     56         @param host: Moblab Host that will run the suite.
     57         @param moblab_suite_max_retries: The maximum number of test retries
     58                 allowed within the suite launched on moblab.
     59         @param target_build: Optional build to be use in the run_suite
     60                 call on moblab. This argument is passed as is to run_suite. It
     61                 must be a sensible build target for the board of the sub-DUTs
     62                 attached to the moblab.
     63         @param clear_devserver_cache: If True, image cache of the devserver
     64                 running on moblab is cleared before running the test to validate
     65                 devserver imaging staging flow.
     66         @param test_timeout_hint_m: (int) Optional overall timeout for the test.
     67                 For this test, it is very important to collect post failure data
     68                 from the moblab device. If the overall timeout is provided, the
     69                 test will try to fail early to save some time for log collection
     70                 from the DUT.
     71 
     72         @raises AutoservRunError if the suite does not complete successfully.
     73         """
     74         self._host = host
     75         self._maybe_clear_devserver_cache(clear_devserver_cache)
     76 
     77         duts = host.afe.get_hosts()
     78         if len(duts) == 0:
     79             raise error.TestFail('All hosts for this MobLab are down. Please '
     80                                  'request the lab admins to take a look.')
     81 
     82         board = None
     83         dut_to_label = {}
     84         for dut in duts:
     85             # Fetch the board of the DUT's assigned to this Moblab. There should
     86             # only be one type.
     87             board = labellib.LabelsMapping(dut.labels)['board']
     88             for label in dut.labels:
     89                 if label in self.REQUIRED_LABELS:
     90                     dut_to_label[dut.hostname] = label
     91 
     92         if not set(dut_to_label.values()) == self.REQUIRED_LABELS:
     93             raise error.TestFail(
     94                 'Missing required labels on hosts %s, are some hosts down?'
     95                     % self.REQUIRED_LABELS - set(dut_to_label.values()))
     96 
     97         if not board:
     98             raise error.TestFail('Could not determine board from hosts.')
     99 
    100         if not target_build:
    101             stable_version_map = host.afe.get_stable_version_map(
    102                     host.afe.CROS_IMAGE_TYPE)
    103             target_build = stable_version_map.get_image_name(board)
    104 
    105         logging.info('Running suite: hardware_storagequal_cq')
    106         cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s "
    107                "--suite_name=hardware_storagequal_cq --retry=True "
    108                "--max_retries=%d" %
    109                (moblab_host.AUTOTEST_INSTALL_DIR, board, target_build,
    110                moblab_suite_max_retries))
    111         cmd, run_suite_timeout_s = self._append_run_suite_timeout(
    112                 cmd,
    113                 test_timeout_hint_m,
    114         )
    115 
    116         logging.debug('Run suite command: %s', cmd)
    117         try:
    118             result = host.run_as_moblab(cmd, timeout=run_suite_timeout_s)
    119         except error.AutoservRunError as e:
    120             if _is_run_suite_error_critical(e.result_obj.exit_status):
    121                 raise
    122 
    123         logging.debug('Suite Run Output:\n%s', result.stderr)
    124 
    125         job_ids = self._get_job_ids_from_suite_output(result.stderr)
    126 
    127         logging.debug('Suite job ids %s', job_ids)
    128 
    129         keyvals_per_host = self._get_keyval_files_per_host(host, job_ids)
    130 
    131         logging.debug('Keyvals grouped by host %s', keyvals_per_host)
    132 
    133         failed_test = False
    134         for hostname in keyvals_per_host:
    135             label = dut_to_label[hostname]
    136             expected = self.EXPECTED_RESULTS[label]
    137             actual = self._get_test_execution_order(
    138                 host, keyvals_per_host[hostname])
    139 
    140             logging.info('Comparing test order for %s from host %s',
    141                 label, hostname)
    142             logging.info('%-37s %s', 'Expected', 'Actual')
    143             for i in range(max(len(expected), len(actual))):
    144                 expected_i = expected[i] if i < len(expected) else None
    145                 actual_i = actual[i] if i < len(actual) else None
    146                 check_fail = expected_i != actual_i
    147                 check_text = 'X' if check_fail else ' '
    148                 logging.info('%s %-35s %s', check_text, expected_i, actual_i)
    149                 failed_test = failed_test or check_fail
    150 
    151         # Cache directory can contain large binaries like CTS/CTS zip files
    152         # no need to offload those in the results.
    153         # The cache is owned by root user
    154         host.run('rm -fR /mnt/moblab/results/shared/cache',
    155                     timeout=600)
    156 
    157         if failed_test:
    158             raise error.TestFail(
    159                 'Actual test execution order did not match expected')
    160 
    161     def _append_run_suite_timeout(self, cmd, test_timeout_hint_m):
    162         """Modify given run_suite command with timeout.
    163 
    164         @param cmd: run_suite command str.
    165         @param test_timeout_hint_m: (int) timeout for the test, or None.
    166         @return cmd, run_suite_timeout_s: cmd is the updated command str,
    167                 run_suite_timeout_s is the timeout to use for the run_suite
    168                 call, in seconds.
    169         """
    170         if test_timeout_hint_m is None:
    171             return cmd, 10800
    172 
    173         # Arguments passed in via test_args may be all str, depending on how
    174         # they're passed in.
    175         test_timeout_hint_m = int(test_timeout_hint_m)
    176         elasped_m = self.elapsed.total_seconds() / 60
    177         run_suite_timeout_m = (
    178                 test_timeout_hint_m - elasped_m - _CLEANUP_TIME_M)
    179         logging.info('Overall test timeout hint provided (%d minutes)',
    180                      test_timeout_hint_m)
    181         logging.info('%d minutes have already elasped', elasped_m)
    182         logging.info(
    183                 'Keeping %d minutes for cleanup, will allow %d minutes for '
    184                 'the suite to run.', _CLEANUP_TIME_M, run_suite_timeout_m)
    185         cmd += ' --timeout_mins %d' % run_suite_timeout_m
    186         return cmd, run_suite_timeout_m * 60
    187 
    188     def _maybe_clear_devserver_cache(self, clear_devserver_cache):
    189         # When passed in via test_args, all arguments are str
    190         if not isinstance(clear_devserver_cache, bool):
    191             clear_devserver_cache = (clear_devserver_cache.lower() == 'true')
    192         if clear_devserver_cache:
    193             self._host.run('rm -rf %s/*' % _MOBLAB_IMAGE_STORAGE)
    194 
    195     def _get_job_ids_from_suite_output(self, suite_output):
    196         """Parse the set of job ids from run_suite output
    197 
    198         @param suite_output (str) output from run_suite command
    199         @return (set<int>) job ids contained in the suite
    200         """
    201         job_ids = set()
    202         job_id_pattern = re.compile('(\d+)-moblab')
    203         for line in suite_output.splitlines():
    204             match = job_id_pattern.search(line)
    205             logging.debug('suite line %s match %s', line, match)
    206             if match is None:
    207                 continue
    208             job_ids.add(int(match.groups()[0]))
    209         return job_ids
    210 
    211     def _get_keyval_files_per_host(self, host, job_ids):
    212         """Find the result keyval files for the given job ids and
    213         group them by host
    214 
    215         @param host (moblab_host)
    216         @param job_ids (set<int>) set of job ids to find keyvals for
    217         @return (dict<str, list<str>>) map of hosts and the keyval
    218             file locations
    219         @throws AutoservRunError if the command fails to run on moblab
    220         """
    221         keyvals_per_host = {}
    222         keyvals = host.run_as_moblab(
    223             'find /mnt/moblab/results '
    224             '-wholename *-moblab/192.168*/hardware_Storage*/keyval')
    225         pattern = re.compile('(\d+)-moblab/(192.168.\d+.\d+)')
    226         for line in keyvals.stdout.splitlines():
    227             match = pattern.search(line)
    228             if match is None:
    229                 continue
    230             job_id, dut = match.groups()
    231             if int(job_id) not in job_ids:
    232                 continue
    233             if dut not in keyvals_per_host:
    234                 keyvals_per_host[dut] = []
    235             keyvals_per_host[dut].append(line)
    236 
    237         return keyvals_per_host
    238 
    239     def _get_test_execution_order(self, host, keyvals):
    240         """Determines the test execution order for the given list
    241         of storage qual test result keyvals
    242 
    243         @param host (moblab_host)
    244         @param keyvals (list<str>) location of keyval files to order
    245         @return (list<str>) test names in the order they executed
    246         @throws AutoservRunError if the command fails to run on moblab
    247         """
    248         tests = host.run_as_moblab(
    249             'FILES=(%s); for FILE in ${FILES[@]}; do cat $FILE '
    250             '| grep storage_qual_cq; done '
    251             '| sort | cut -d " " -f 2'
    252             % ' '.join(keyvals)
    253         )
    254         test_execution_order = []
    255         pattern = re.compile('hardware_\w+')
    256         logging.debug(tests.stdout)
    257         for line in tests.stdout.splitlines():
    258             match = pattern.search(line)
    259             if match:
    260                 test_execution_order.append(match.group(0))
    261         return test_execution_order
    262 
    263 def _is_run_suite_error_critical(return_code):
    264     # We can't actually import run_suite here because importing run_suite pulls
    265     # in certain MySQLdb dependencies that fail to load in the context of a
    266     # test.
    267     # OTOH, these return codes are unlikely to change because external users /
    268     # builders depend on them.
    269     return return_code not in (
    270             0,  # run_suite.RETURN_CODES.OK
    271             2,  # run_suite.RETURN_CODES.WARNING
    272     )
    273