Home | History | Annotate | Download | only in hardware_StorageStress
      1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging, sys, time
      6 from autotest_lib.client.common_lib import error
      7 from autotest_lib.server import autotest
      8 from autotest_lib.server import hosts
      9 from autotest_lib.server import test
     10 
     11 class hardware_StorageStress(test.test):
     12     """
     13     Integrity stress test for storage device
     14     """
     15     version = 1
     16 
     17     _HOURS_IN_SEC = 3600
     18     # Define default value for the test case
     19     _TEST_GAP = 60 # 1 min
     20     _TEST_DURATION = 12 * _HOURS_IN_SEC
     21     _SUSPEND_DURATION = _HOURS_IN_SEC
     22     _FIO_REQUIREMENT_FILE = '8k_async_randwrite'
     23     _FIO_WRITE_FLAGS = []
     24     _FIO_VERIFY_FLAGS = ['--verifyonly']
     25 
     26     def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION,
     27                  power_command='reboot', storage_test_command='integrity',
     28                  suspend_duration=_SUSPEND_DURATION, storage_test_argument=''):
     29         """
     30         Run the Storage stress test
     31         Use hardwareStorageFio to run some test_command repeatedly for a long
     32         time. Between each iteration of test command, run power command such as
     33         reboot or suspend.
     34 
     35         @param client_ip:     string of client's ip address (required)
     36         @param gap:           gap between each test (second) default = 1 min
     37         @param duration:      duration to run test (second) default = 12 hours
     38         @param power_command: command to do between each test Command
     39                               possible command: reboot / suspend / nothing
     40         @param storage_test_command:  FIO command to run
     41                               - integrity:  Check data integrity
     42                               - full_write: Check performance consistency
     43                                             for full disk write. Use argument
     44                                             to determine which disk to write
     45         @param suspend_duration: if power_command is suspend, how long the DUT
     46                               is suspended.
     47         """
     48 
     49         # init test
     50         if not client_ip:
     51             error.TestError("Must provide client's IP address to test")
     52 
     53         self._client = hosts.create_host(client_ip)
     54         self._client_at = autotest.Autotest(self._client)
     55         self._results = {}
     56         self._suspend_duration = suspend_duration
     57 
     58         # parse power command
     59         if power_command == 'nothing':
     60             power_func = self._do_nothing
     61         elif power_command == 'reboot':
     62             power_func = self._do_reboot
     63         elif power_command == 'suspend':
     64             power_func = self._do_suspend
     65         else:
     66             raise error.TestFail(
     67                 'Test failed with error: Invalid power command')
     68 
     69         # Test is doing a lot of disk activity, monitor disk data at each iteration.
     70         self.job.add_sysinfo_logfile('/var/log/storage_info.txt', on_every_test=True)
     71 
     72         # parse test command
     73         if storage_test_command == 'integrity':
     74             setup_func = self._write_data
     75             loop_func = self._verify_data
     76         elif storage_test_command == 'full_write':
     77             setup_func = self._do_nothing
     78             loop_func = self._full_disk_write
     79             # Do at least 2 soak runs. Given the absolute minimum of a loop is
     80             # around 1h, duration should be at least 1h.
     81             self._soak_time = min(self._TEST_DURATION, duration / 4)
     82         else:
     83             raise error.TestFail('Test failed with error: Invalid test command')
     84 
     85         # init statistic variable
     86         min_time_per_loop = sys.maxsize
     87         max_time_per_loop = 0
     88         all_loop_time = 0
     89         avr_time_per_loop = 0
     90         self._loop_count = 0
     91         setup_func()
     92 
     93         start_time = time.time()
     94 
     95         while time.time() - start_time < duration:
     96             # sleep
     97             time.sleep(gap)
     98 
     99             self._loop_count += 1
    100 
    101             # do power command & verify data & calculate time
    102             loop_start_time = time.time()
    103             power_func()
    104             loop_func()
    105             loop_time = time.time() - loop_start_time
    106 
    107             # update statistic
    108             all_loop_time += loop_time
    109             min_time_per_loop = min(loop_time, min_time_per_loop)
    110             max_time_per_loop = max(loop_time, max_time_per_loop)
    111 
    112         if self._loop_count > 0:
    113             avr_time_per_loop = all_loop_time / self._loop_count
    114 
    115         logging.info(str('check data count: %d' % self._loop_count))
    116 
    117         # report result
    118         self.write_perf_keyval({'loop_count':self._loop_count})
    119         self.write_perf_keyval({'min_time_per_loop':min_time_per_loop})
    120         self.write_perf_keyval({'max_time_per_loop':max_time_per_loop})
    121         self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop})
    122 
    123     def _do_nothing(self):
    124         pass
    125 
    126     def _do_reboot(self):
    127         """
    128         Reboot host machine
    129         """
    130         self._client.reboot()
    131 
    132     def _do_suspend(self):
    133         """
    134         Suspend host machine
    135         """
    136         self._client.suspend(suspend_time=self._suspend_duration)
    137 
    138     @classmethod
    139     def _check_client_test_result(cls, client):
    140         """
    141         Check result of the client test.
    142         Auto test will store results in the file named status.
    143         We check that the second to last line in that file begin with 'END GOOD'
    144 
    145         @ raise an error if test fails.
    146         """
    147         client_result_dir = '%s/results/default' % client.autodir
    148         command = 'tail -2 %s/status | head -1' % client_result_dir
    149         status = client.run(command).stdout.strip()
    150         logging.info(status)
    151         if status[:8] != 'END GOOD':
    152             raise error.TestFail('client in StorageStress failed.')
    153 
    154 
    155     def _write_data(self):
    156         """
    157         Write test data to host using hardware_StorageFio
    158         """
    159         logging.info('_write_data')
    160         self._client_at.run_test('hardware_StorageFio', disable_sysinfo=True,
    161             wait=0, tag='%s_%d' % ('write_data', self._loop_count),
    162             requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)])
    163         self._check_client_test_result(self._client)
    164 
    165     def _verify_data(self):
    166         """
    167         Verify test data using hardware_StorageFio
    168         """
    169         logging.info(str('_verify_data #%d' % self._loop_count))
    170         self._client_at.run_test('hardware_StorageFio', disable_sysinfo=True,
    171             wait=0, tag='%s_%d' % ('verify_data', self._loop_count),
    172             requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)])
    173         self._check_client_test_result(self._client)
    174 
    175     def _full_disk_write(self):
    176         """
    177         Do the root device full area write and report performance
    178         Write random pattern for few hours, then do a write and a verify,
    179         noting the latency.
    180         """
    181         logging.info(str('_full_disk_write #%d' % self._loop_count))
    182 
    183         # use the default requirement that write different pattern arround.
    184         self._client_at.run_test('hardware_StorageFio',
    185                                  disable_sysinfo=True,
    186                                  tag='%s_%d' % ('soak', self._loop_count),
    187                                  requirements=[('64k_stress', [])],
    188                                  time_length=self._soak_time)
    189         self._check_client_test_result(self._client)
    190 
    191         self._client_at.run_test('hardware_StorageFio',
    192                                  disable_sysinfo=True,
    193                                  tag='%s_%d' % ('surf', self._loop_count),
    194                                  requirements=[('surfing', [])],
    195                                  time_length=self._soak_time)
    196         self._check_client_test_result(self._client)
    197 
    198         self._client_at.run_test('hardware_StorageFio',
    199                                  disable_sysinfo=True,
    200                                  tag='%s_%d' % ('integrity', self._loop_count),
    201                                  wait=0, integrity=True)
    202         self._check_client_test_result(self._client)
    203 
    204         self._client_at.run_test('hardware_StorageWearoutDetect',
    205                                  tag='%s_%d' % ('wearout', self._loop_count),
    206                                  wait=0, use_cached_result=False)
    207         # No checkout for wearout, to test device pass their limits.
    208