1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import logging 6 import re 7 import time 8 import xmlrpclib 9 10 from autotest_lib.client.common_lib import error 11 from autotest_lib.server.cros.faft.firmware_test import FirmwareTest 12 13 class firmware_ECThermal(FirmwareTest): 14 """ 15 Servo based EC thermal engine test. 16 """ 17 version = 1 18 19 # Delay for waiting fan to start or stop 20 FAN_DELAY = 5 21 22 # Delay for waiting device stressing to stablize 23 STRESS_DELAY = 30 24 25 # Delay for stressing device with fan off to check temperature increase 26 STRESS_DELAY_NO_FAN = 12 27 28 # Margin for comparing servo based and ectool based CPU temperature 29 TEMP_MISMATCH_MARGIN = 3 30 31 # Minimum increase of CPU temperature when stressing DUT 32 TEMP_STRESS_INCREASE = 3 33 34 # Pseudo INT_MAX. Used as infinity when comparing temperature readings 35 INT_MAX = 10000 36 37 # Sensor type ID of ignored sensors 38 SENSOR_TYPE_IGNORED = 255 39 40 # PID of DUT stressing processes 41 _stress_pid = list() 42 43 def enable_auto_fan_control(self): 44 """Enable EC automatic fan speed control""" 45 # We use set_nocheck because servo reports current target 46 # RPM instead 'auto', and therefore servo.set always fails. 47 self.servo.set_nocheck('fan_target_rpm', 'auto') 48 49 50 def max_fan(self): 51 """Maximize fan speed""" 52 # We use set_nocheck because servo reports current target 53 # RPM instead 'max', and therefore servo.set always fails. 54 self.servo.set_nocheck('fan_target_rpm', 'max') 55 56 57 def turn_off_fan(self): 58 """Turn off fan""" 59 self.servo.set('fan_target_rpm', 'off') 60 61 62 def _get_setting_for_type(self, type_id): 63 """ 64 Retrieve thermal setting for a given type of sensor 65 66 Args: 67 type_id: The ID of sensor type. 68 69 Returns: 70 A list containing thresholds in the following order: 71 Warning 72 CPU off 73 All power off 74 Fan speed thresholds 75 """ 76 setting = list() 77 current_id = 0 78 while True: 79 try: 80 lines = self.faft_client.system.run_shell_command_get_output( 81 'ectool thermalget %d %d' % (type_id, current_id)) 82 except xmlrpclib.Fault: 83 break 84 pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.') 85 for line in lines: 86 matched = pattern.match(line) 87 if matched is not None: 88 # Convert degree K to degree C 89 setting.append(int(matched.group(1)) - 273) 90 current_id = current_id + 1 91 92 if len(setting) == 0: 93 return None 94 return setting 95 96 97 def get_fan_steps(self): 98 """Retrieve fan step config from EC""" 99 num_steps = len(self._thermal_setting[0]) - 3 100 self._fan_steps = list() 101 expected_pat = (["Lowest speed: ([0-9-]+) RPM"] + 102 ["\d+ K:\s+([0-9-]+) RPM"] * num_steps) 103 match = self.ec.send_command_get_output("thermalfan 0", expected_pat) 104 for m in match: 105 self._fan_steps.append(int(m[1])) 106 107 # Get the actual value of each fan step 108 for i in xrange(num_steps + 1): 109 if self._fan_steps[i] == 0: 110 continue 111 self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i]) 112 self._fan_steps[i] = int(self.servo.get('fan_target_rpm')) 113 114 logging.info("Actual fan steps: %s", self._fan_steps) 115 116 117 def get_thermal_setting(self): 118 """Retrieve thermal engine setting from EC""" 119 self._thermal_setting = list() 120 type_id = 0 121 while True: 122 setting = self._get_setting_for_type(type_id) 123 if setting is None: 124 break 125 self._thermal_setting.append(setting) 126 type_id = type_id + 1 127 logging.info("Number of tempearture sensor types: %d", type_id) 128 129 # Get the number of temperature sensors 130 self._num_temp_sensor = 0 131 while True: 132 try: 133 self.faft_client.system.run_shell_command('ectool temps %d' % 134 self._num_temp_sensor) 135 self._num_temp_sensor = self._num_temp_sensor + 1 136 except xmlrpclib.Fault: 137 break 138 logging.info("Number of temperature sensor: %d", self._num_temp_sensor) 139 140 141 def initialize(self, host, cmdline_args): 142 super(firmware_ECThermal, self).initialize(host, cmdline_args) 143 self.ec.send_command("chan 0") 144 try: 145 self.faft_client.system.run_shell_command('stop temp_metrics') 146 except xmlrpclib.Fault: 147 self._has_temp_metrics = False 148 else: 149 logging.info('Stopped temp_metrics') 150 self._has_temp_metrics = True 151 if self.check_ec_capability(['thermal']): 152 self.get_thermal_setting() 153 self.get_fan_steps() 154 self.enable_auto_fan_control() 155 156 157 def cleanup(self): 158 if self.check_ec_capability(['thermal']): 159 self.enable_auto_fan_control() 160 if self._has_temp_metrics: 161 logging.info('Starting temp_metrics') 162 self.faft_client.system.run_shell_command('start temp_metrics') 163 self.ec.send_command("chan 0xffffffff") 164 super(firmware_ECThermal, self).cleanup() 165 166 167 def _find_cpu_sensor_id(self): 168 """ 169 This function find CPU temperature sensor using ectool. 170 171 Returns: 172 Integer ID of CPU temperature sensor. 173 174 Raises: 175 error.TestFail: Raised if we fail to find PECI temparture through 176 ectool. 177 """ 178 for temp_id in range(self._num_temp_sensor): 179 lines = self.faft_client.system.run_shell_command_get_output( 180 'ectool tempsinfo %d' % temp_id) 181 for line in lines: 182 matched = re.match('Sensor name: (.*)', line) 183 if matched is not None and matched.group(1) == 'PECI': 184 return temp_id 185 raise error.TestFail('Cannot find CPU temperature sensor ID.') 186 187 188 def _get_temp_reading(self, sensor_id): 189 """ 190 Get temperature reading on a sensor through ectool 191 192 Args: 193 sensor_id: Temperature sensor ID. 194 195 Returns: 196 Temperature reading in degree C. 197 198 Raises: 199 xmlrpclib.Fault: Raised when we fail to read temperature. 200 error.TestError: Raised if ectool doesn't behave as we expected. 201 """ 202 assert sensor_id < self._num_temp_sensor 203 pattern = re.compile('Reading temperature...(\d*)') 204 lines = self.faft_client.system.run_shell_command_get_output( 205 'ectool temps %d' % sensor_id) 206 for line in lines: 207 matched = pattern.match(line) 208 if matched is not None: 209 return int(matched.group(1)) - 273 210 # Should never reach here 211 raise error.TestError("Unexpected error occurred") 212 213 214 def check_temp_report(self): 215 """ 216 Checker of temperature reporting. 217 218 This function reads CPU temperature from servo and ectool. If 219 the two readings mismatches by more than TEMP_MISMATCH_MARGIN,' 220 test fails. 221 222 Raises: 223 error.TestFail: Raised when temperature reading mismatches by 224 more than TEMP_MISMATCH_MARGIN. 225 """ 226 cpu_temp_id = self._find_cpu_sensor_id() 227 logging.info("CPU temperature sensor ID is %d", cpu_temp_id) 228 ectool_cpu_temp = self._get_temp_reading(cpu_temp_id) 229 servo_cpu_temp = int(self.servo.get('cpu_temp')) 230 logging.info("CPU temperature from servo: %d C", servo_cpu_temp) 231 logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp) 232 if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN: 233 raise error.TestFail( 234 'CPU temperature readings from servo and ectool differ') 235 236 237 def _stress_dut(self, threads=4): 238 """ 239 Stress DUT system. 240 241 By reading from /dev/urandom and writing to /dev/null, we can stress 242 DUT and cause CPU temperature to go up. We stress the system forever, 243 until _stop_stressing is called to kill the stress threads. This 244 function is non-blocking. 245 246 Args: 247 threads: Number of threads (processes) when stressing forever. 248 249 Returns: 250 A list of stress process IDs is returned. 251 """ 252 logging.info("Stressing DUT with %d threads...", threads) 253 self.faft_client.system.run_shell_command('pkill dd') 254 stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M &' 255 # Grep for [d]d instead of dd to prevent getting the PID of grep 256 # itself. 257 pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'" 258 self._stress_pid = list() 259 for _ in xrange(threads): 260 self.faft_client.system.run_shell_command(stress_cmd) 261 lines = self.faft_client.system.run_shell_command_get_output( 262 pid_cmd) 263 for line in lines: 264 logging.info("PID is %s", line) 265 self._stress_pid.append(int(line.strip())) 266 return self._stress_pid 267 268 269 def _stop_stressing(self): 270 """Stop stressing DUT system""" 271 stop_cmd = 'kill -9 %d' 272 for pid in self._stress_pid: 273 self.faft_client.system.run_shell_command(stop_cmd % pid) 274 275 276 def check_fan_off(self): 277 """ 278 Checker of fan turned off. 279 280 The function first delay FAN_DELAY seconds to ensure fan stops. 281 Then it reads fan speed and return False if fan speed is non-zero. 282 Then it stresses the system a bit and check if the temperature 283 goes up by more than TEMP_STRESS_INCREASE. 284 285 Raises: 286 error.TestFail: Raised when temperature doesn't increase by more than 287 TEMP_STRESS_INCREASE. 288 """ 289 time.sleep(self.FAN_DELAY) 290 fan_speed = self.servo.get('fan_actual_rpm') 291 if int(fan_speed) != 0: 292 raise error.TestFail("Fan is not turned off.") 293 logging.info("EC reports fan turned off.") 294 cpu_temp_before = int(self.servo.get('cpu_temp')) 295 logging.info("CPU temperature before stressing is %d C", 296 cpu_temp_before) 297 self._stress_dut() 298 time.sleep(self.STRESS_DELAY_NO_FAN) 299 cpu_temp_after = int(self.servo.get('cpu_temp')) 300 self._stop_stressing() 301 logging.info("CPU temperature after stressing is %d C", 302 cpu_temp_after) 303 if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE: 304 raise error.TestFail( 305 "CPU temperature did not go up by more than %d degrees" % 306 self.TEMP_STRESS_INCREASE) 307 308 309 def _get_temp_sensor_type(self, sensor_id): 310 """ 311 Get type of a given temperature sensor 312 313 Args: 314 sensor_id: Temperature sensor ID. 315 316 Returns: 317 Type ID of the temperature sensor. 318 319 Raises: 320 error.TestError: Raised when ectool doesn't behave as we expected. 321 """ 322 assert sensor_id < self._num_temp_sensor 323 pattern = re.compile('Sensor type: (\d*)') 324 lines = self.faft_client.system.run_shell_command_get_output( 325 'ectool tempsinfo %d' % sensor_id) 326 for line in lines: 327 matched = pattern.match(line) 328 if matched is not None: 329 return int(matched.group(1)) 330 # Should never reach here 331 raise error.TestError("Unexpected error occurred") 332 333 334 def _check_fan_speed_per_sensor(self, fan_speed, sensor_id): 335 """ 336 Check if the given fan_speed is reasonable from the view of certain 337 temperature sensor. There could be three types of outcome: 338 1. Fan speed is higher than expected. This may be due to other 339 sensor sensing higher temperature and setting fan to higher 340 speed. 341 2. Fan speed is as expected. 342 3. Fan speed is lower than expected. In this case, EC is not 343 working as expected and an error should be raised. 344 345 Args: 346 fan_speed: The current fan speed in RPM. 347 sensor_id: The ID of temperature sensor. 348 349 Returns: 350 0x00: Fan speed is higher than expected. 351 0x01: Fan speed is as expected. 352 0x10: Fan speed is lower than expected. 353 354 Raises: 355 error.TestError: Raised when getting unexpected fan speed. 356 """ 357 sensor_type = self._get_temp_sensor_type(sensor_id) 358 if sensor_type == self.SENSOR_TYPE_IGNORED: 359 # This sensor should be ignored 360 return 0x00 361 362 if self._thermal_setting[sensor_type][-1] == -273: 363 # The fan stepping for this type of sensor is disabled 364 return 0x00 365 366 try: 367 idx = self._fan_steps.index(fan_speed) 368 except: 369 raise error.TestError("Unexpected fan speed: %d" % fan_speed) 370 371 if idx == 0: 372 lower_bound = -self.INT_MAX 373 upper_bound = self._thermal_setting[sensor_type][3] 374 elif idx == len(self._fan_steps) - 1: 375 lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3 376 upper_bound = self.INT_MAX 377 else: 378 lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3 379 upper_bound = self._thermal_setting[sensor_type][idx + 3] 380 381 temp_reading = self._get_temp_reading(sensor_id) 382 logging.info("Sensor %d = %d C", sensor_id, temp_reading) 383 logging.info(" Expecting %d - %d C", lower_bound, upper_bound) 384 if temp_reading > upper_bound: 385 return 0x00 386 elif temp_reading < lower_bound: 387 return 0x10 388 else: 389 return 0x01 390 391 392 def check_auto_fan(self): 393 """ 394 Checker of thermal engine automatic fan speed control. 395 396 Stress DUT system for a longer period to make temperature more stable 397 and check if fan speed is controlled as expected. 398 399 Raises: 400 error.TestFail: Raised when fan speed is not as expected. 401 """ 402 self._stress_dut() 403 time.sleep(self.STRESS_DELAY) 404 fan_rpm = int(self.servo.get('fan_target_rpm')) 405 logging.info('Fan speed is %d RPM', fan_rpm) 406 try: 407 result = reduce(lambda x, y: x | y, 408 [self._check_fan_speed_per_sensor(fan_rpm, x) 409 for x in range(self._num_temp_sensor)]) 410 finally: 411 self._stop_stressing() 412 if result == 0x00: 413 raise error.TestFail("Fan speed higher than expected") 414 if result == 0x10: 415 raise error.TestFail("Fan speed lower than expected") 416 417 418 def run_once(self): 419 if not self.check_ec_capability(['thermal']): 420 raise error.TestNAError("Nothing needs to be tested on this device") 421 logging.info("Checking host temperature report.") 422 self.check_temp_report() 423 424 self.turn_off_fan() 425 logging.info("Verifying fan is turned off.") 426 self.check_fan_off() 427 428 self.enable_auto_fan_control() 429 logging.info("Verifying automatic fan control functionality.") 430 self.check_auto_fan() 431