Home | History | Annotate | Download | only in hardware_StorageStress
      1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging, sys, time
      6 from autotest_lib.client.common_lib import error
      7 from autotest_lib.server import autotest
      8 from autotest_lib.server import hosts
      9 from autotest_lib.server import test
     10 
     11 class hardware_StorageStress(test.test):
     12     """
     13     Integrity stress test for storage device
     14     """
     15     version = 1
     16 
     17     _HOURS_IN_SEC = 3600
     18     # Define default value for the test case
     19     _TEST_GAP = 60 # 1 min
     20     _TEST_DURATION = 12 * _HOURS_IN_SEC
     21     _SUSPEND_DURATION = _HOURS_IN_SEC
     22     _FIO_REQUIREMENT_FILE = '8k_async_randwrite'
     23     _FIO_WRITE_FLAGS = []
     24     _FIO_VERIFY_FLAGS = ['--verifyonly']
     25 
     26     def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION,
     27                  power_command='reboot', storage_test_command='integrity',
     28                  suspend_duration=_SUSPEND_DURATION, storage_test_argument='',
     29                  cq=False):
     30         """
     31         Run the Storage stress test
     32         Use hardwareStorageFio to run some test_command repeatedly for a long
     33         time. Between each iteration of test command, run power command such as
     34         reboot or suspend.
     35 
     36         @param client_ip:     string of client's ip address (required)
     37         @param gap:           gap between each test (second) default = 1 min
     38         @param duration:      duration to run test (second) default = 12 hours
     39         @param power_command: command to do between each test Command
     40                               possible command: reboot / suspend / nothing
     41         @param storage_test_command:  FIO command to run
     42                               - integrity:  Check data integrity
     43                               - full_write: Check performance consistency
     44                                             for full disk write. Use argument
     45                                             to determine which disk to write
     46         @param suspend_duration: if power_command is suspend, how long the DUT
     47                               is suspended.
     48         @param cq:            Indicates that this test is being run as part of
     49                               the cq. This is not used to test a component for
     50                               qualification, but to test the storage qual suite
     51         """
     52 
     53         # in a cq run, do not execute the test, just output
     54         # the order that the test would have run in
     55         if cq:
     56             label = 'suspend' if power_command is 'suspend' else 'soak'
     57             self.write_test_keyval(
     58                 {'storage_qual_cq': ('%f hardware_StorageStress_%s'
     59                     % (time.time(), label))})
     60             return
     61 
     62         # init test
     63         if not client_ip:
     64             error.TestError("Must provide client's IP address to test")
     65 
     66         self._client = hosts.create_host(client_ip)
     67         self._client_at = autotest.Autotest(self._client)
     68         self._results = {}
     69         self._suspend_duration = suspend_duration
     70 
     71         # parse power command
     72         if power_command == 'nothing':
     73             self._power_func = self._do_nothing
     74         elif power_command == 'reboot':
     75             self._power_func = self._do_reboot
     76         elif power_command == 'suspend':
     77             self._power_func = self._do_suspend
     78         elif power_command == 'wait':
     79             self._power_func = self._do_wait
     80         else:
     81             raise error.TestFail(
     82                 'Test failed with error: Invalid power command')
     83 
     84         # Test is doing a lot of disk activity, monitor disk data at each iteration.
     85         self.job.add_sysinfo_logfile('/var/log/storage_info.txt', on_every_test=True)
     86 
     87         # parse test command
     88         if storage_test_command == 'integrity':
     89             setup_func = self._write_data
     90             loop_func = self._verify_data
     91         elif storage_test_command == 'full_write':
     92             setup_func = self._do_nothing
     93             loop_func = self._full_disk_write
     94             # Do at least 2 soak runs. Given the absolute minimum of a loop is
     95             # around 1h, duration should be at least 1h.
     96             self._soak_time = min(self._TEST_DURATION, duration / 4)
     97         else:
     98             raise error.TestFail('Test failed with error: Invalid test command')
     99 
    100         # init statistic variable
    101         min_time_per_loop = sys.maxsize
    102         max_time_per_loop = 0
    103         all_loop_time = 0
    104         avr_time_per_loop = 0
    105         self._loop_count = 0
    106         setup_func()
    107 
    108         start_time = time.time()
    109 
    110         while time.time() - start_time < duration:
    111             # sleep
    112             time.sleep(gap)
    113 
    114             self._loop_count += 1
    115 
    116             # do power command & verify data & calculate time
    117             loop_start_time = time.time()
    118             loop_func()
    119             loop_time = time.time() - loop_start_time
    120 
    121             # update statistic
    122             all_loop_time += loop_time
    123             min_time_per_loop = min(loop_time, min_time_per_loop)
    124             max_time_per_loop = max(loop_time, max_time_per_loop)
    125 
    126         if self._loop_count > 0:
    127             avr_time_per_loop = all_loop_time / self._loop_count
    128 
    129         logging.info(str('check data count: %d' % self._loop_count))
    130 
    131         # report result
    132         self.write_perf_keyval({'loop_count':self._loop_count})
    133         self.write_perf_keyval({'min_time_per_loop':min_time_per_loop})
    134         self.write_perf_keyval({'max_time_per_loop':max_time_per_loop})
    135         self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop})
    136 
    137     def _do_nothing(self):
    138         pass
    139 
    140     def _do_wait(self):
    141         time.sleep(self._suspend_duration)
    142 
    143     def _do_reboot(self):
    144         """
    145         Reboot host machine
    146         """
    147         self._client.reboot()
    148 
    149     def _do_suspend(self):
    150         """
    151         Suspend host machine
    152         """
    153         self._client.suspend(suspend_time=self._suspend_duration)
    154 
    155     def _write_data(self):
    156         """
    157         Write test data to host using hardware_StorageFio
    158         """
    159         logging.info('_write_data')
    160         self._client_at.run_test('hardware_StorageFio',
    161             check_client_result=True, disable_sysinfo=True, wait=0,
    162             tag='%s_%d' % ('write_data', self._loop_count),
    163             requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)])
    164 
    165     def _verify_data(self):
    166         """
    167         Verify test data using hardware_StorageFio
    168         """
    169         logging.info(str('_verify_data #%d' % self._loop_count))
    170         self._client_at.run_test('hardware_StorageFio',
    171             check_client_result=True, disable_sysinfo=True, wait=0,
    172             tag='%s_%d' % ('verify_data', self._loop_count),
    173             requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)])
    174         self._power_func()
    175 
    176     def _full_disk_write(self):
    177         """
    178         Do the root device full area write and report performance
    179         Write random pattern for few hours, then do a write and a verify,
    180         noting the latency.
    181         """
    182         logging.info(str('_full_disk_write #%d' % self._loop_count))
    183 
    184         # use the default requirement that write different pattern arround.
    185         self._client_at.run_test('hardware_StorageFio',
    186                                  check_client_result=True,
    187                                  disable_sysinfo=True,
    188                                  tag='%s_%d' % ('soak', self._loop_count),
    189                                  requirements=[('64k_stress', [])],
    190                                  time_length=self._soak_time)
    191 
    192         self._power_func()
    193 
    194         self._client_at.run_test('hardware_StorageFio',
    195                                  check_client_result=True,
    196                                  disable_sysinfo=True,
    197                                  tag='%s_%d' % ('surf', self._loop_count),
    198                                  requirements=[('surfing', [])],
    199                                  time_length=self._soak_time)
    200 
    201         self._power_func()
    202 
    203         self._client_at.run_test('hardware_StorageFio',
    204                                  check_client_result=True,
    205                                  disable_sysinfo=True,
    206                                  tag='%s_%d' % ('integrity', self._loop_count),
    207                                  wait=0, integrity=True)
    208 
    209         self._power_func()
    210 
    211         self._client_at.run_test('hardware_StorageWearoutDetect',
    212                                  tag='%s_%d' % ('wearout', self._loop_count),
    213                                  wait=0, use_cached_result=False)
    214         # No checkout for wearout, to test device pass their limits.
    215