1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import logging, sys, time 6 from autotest_lib.client.common_lib import error 7 from autotest_lib.server import autotest 8 from autotest_lib.server import hosts 9 from autotest_lib.server import test 10 11 class hardware_StorageStress(test.test): 12 """ 13 Integrity stress test for storage device 14 """ 15 version = 1 16 17 _HOURS_IN_SEC = 3600 18 # Define default value for the test case 19 _TEST_GAP = 60 # 1 min 20 _TEST_DURATION = 12 * _HOURS_IN_SEC 21 _SUSPEND_DURATION = _HOURS_IN_SEC 22 _FIO_REQUIREMENT_FILE = '8k_async_randwrite' 23 _FIO_WRITE_FLAGS = [] 24 _FIO_VERIFY_FLAGS = ['--verifyonly'] 25 26 def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION, 27 power_command='reboot', storage_test_command='integrity', 28 suspend_duration=_SUSPEND_DURATION, storage_test_argument=''): 29 """ 30 Run the Storage stress test 31 Use hardwareStorageFio to run some test_command repeatedly for a long 32 time. Between each iteration of test command, run power command such as 33 reboot or suspend. 34 35 @param client_ip: string of client's ip address (required) 36 @param gap: gap between each test (second) default = 1 min 37 @param duration: duration to run test (second) default = 12 hours 38 @param power_command: command to do between each test Command 39 possible command: reboot / suspend / nothing 40 @param storage_test_command: FIO command to run 41 - integrity: Check data integrity 42 - full_write: Check performance consistency 43 for full disk write. Use argument 44 to determine which disk to write 45 @param suspend_duration: if power_command is suspend, how long the DUT 46 is suspended. 47 """ 48 49 # init test 50 if not client_ip: 51 error.TestError("Must provide client's IP address to test") 52 53 self._client = hosts.create_host(client_ip) 54 self._client_at = autotest.Autotest(self._client) 55 self._results = {} 56 self._suspend_duration = suspend_duration 57 58 # parse power command 59 if power_command == 'nothing': 60 power_func = self._do_nothing 61 elif power_command == 'reboot': 62 power_func = self._do_reboot 63 elif power_command == 'suspend': 64 power_func = self._do_suspend 65 else: 66 raise error.TestFail( 67 'Test failed with error: Invalid power command') 68 69 # Test is doing a lot of disk activity, monitor disk data at each iteration. 70 self.job.add_sysinfo_logfile('/var/log/storage_info.txt', on_every_test=True) 71 72 # parse test command 73 if storage_test_command == 'integrity': 74 setup_func = self._write_data 75 loop_func = self._verify_data 76 elif storage_test_command == 'full_write': 77 setup_func = self._do_nothing 78 loop_func = self._full_disk_write 79 # Do at least 2 soak runs. Given the absolute minimum of a loop is 80 # around 1h, duration should be at least 1h. 81 self._soak_time = min(self._TEST_DURATION, duration / 4) 82 else: 83 raise error.TestFail('Test failed with error: Invalid test command') 84 85 # init statistic variable 86 min_time_per_loop = sys.maxsize 87 max_time_per_loop = 0 88 all_loop_time = 0 89 avr_time_per_loop = 0 90 self._loop_count = 0 91 setup_func() 92 93 start_time = time.time() 94 95 while time.time() - start_time < duration: 96 # sleep 97 time.sleep(gap) 98 99 self._loop_count += 1 100 101 # do power command & verify data & calculate time 102 loop_start_time = time.time() 103 power_func() 104 loop_func() 105 loop_time = time.time() - loop_start_time 106 107 # update statistic 108 all_loop_time += loop_time 109 min_time_per_loop = min(loop_time, min_time_per_loop) 110 max_time_per_loop = max(loop_time, max_time_per_loop) 111 112 if self._loop_count > 0: 113 avr_time_per_loop = all_loop_time / self._loop_count 114 115 logging.info(str('check data count: %d' % self._loop_count)) 116 117 # report result 118 self.write_perf_keyval({'loop_count':self._loop_count}) 119 self.write_perf_keyval({'min_time_per_loop':min_time_per_loop}) 120 self.write_perf_keyval({'max_time_per_loop':max_time_per_loop}) 121 self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop}) 122 123 def _do_nothing(self): 124 pass 125 126 def _do_reboot(self): 127 """ 128 Reboot host machine 129 """ 130 self._client.reboot() 131 132 def _do_suspend(self): 133 """ 134 Suspend host machine 135 """ 136 self._client.suspend(suspend_time=self._suspend_duration) 137 138 @classmethod 139 def _check_client_test_result(cls, client): 140 """ 141 Check result of the client test. 142 Auto test will store results in the file named status. 143 We check that the second to last line in that file begin with 'END GOOD' 144 145 @ raise an error if test fails. 146 """ 147 client_result_dir = '%s/results/default' % client.autodir 148 command = 'tail -2 %s/status | head -1' % client_result_dir 149 status = client.run(command).stdout.strip() 150 logging.info(status) 151 if status[:8] != 'END GOOD': 152 raise error.TestFail('client in StorageStress failed.') 153 154 155 def _write_data(self): 156 """ 157 Write test data to host using hardware_StorageFio 158 """ 159 logging.info('_write_data') 160 self._client_at.run_test('hardware_StorageFio', disable_sysinfo=True, 161 wait=0, tag='%s_%d' % ('write_data', self._loop_count), 162 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)]) 163 self._check_client_test_result(self._client) 164 165 def _verify_data(self): 166 """ 167 Verify test data using hardware_StorageFio 168 """ 169 logging.info(str('_verify_data #%d' % self._loop_count)) 170 self._client_at.run_test('hardware_StorageFio', disable_sysinfo=True, 171 wait=0, tag='%s_%d' % ('verify_data', self._loop_count), 172 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)]) 173 self._check_client_test_result(self._client) 174 175 def _full_disk_write(self): 176 """ 177 Do the root device full area write and report performance 178 Write random pattern for few hours, then do a write and a verify, 179 noting the latency. 180 """ 181 logging.info(str('_full_disk_write #%d' % self._loop_count)) 182 183 # use the default requirement that write different pattern arround. 184 self._client_at.run_test('hardware_StorageFio', 185 disable_sysinfo=True, 186 tag='%s_%d' % ('soak', self._loop_count), 187 requirements=[('64k_stress', [])], 188 time_length=self._soak_time) 189 self._check_client_test_result(self._client) 190 191 self._client_at.run_test('hardware_StorageFio', 192 disable_sysinfo=True, 193 tag='%s_%d' % ('surf', self._loop_count), 194 requirements=[('surfing', [])], 195 time_length=self._soak_time) 196 self._check_client_test_result(self._client) 197 198 self._client_at.run_test('hardware_StorageFio', 199 disable_sysinfo=True, 200 tag='%s_%d' % ('integrity', self._loop_count), 201 wait=0, integrity=True) 202 self._check_client_test_result(self._client) 203 204 self._client_at.run_test('hardware_StorageWearoutDetect', 205 tag='%s_%d' % ('wearout', self._loop_count), 206 wait=0, use_cached_result=False) 207 # No checkout for wearout, to test device pass their limits. 208