Home | History | Annotate | Download | only in firmware_ECThermal
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import re
      7 import time
      8 import xmlrpclib
      9 
     10 from autotest_lib.client.common_lib import error
     11 from autotest_lib.server.cros.faft.firmware_test import FirmwareTest
     12 
     13 class firmware_ECThermal(FirmwareTest):
     14     """
     15     Servo based EC thermal engine test.
     16     """
     17     version = 1
     18 
     19     # Delay for waiting fan to start or stop
     20     FAN_DELAY = 5
     21 
     22     # Delay for waiting device stressing to stablize
     23     STRESS_DELAY = 30
     24 
     25     # Delay for stressing device with fan off to check temperature increase
     26     STRESS_DELAY_NO_FAN = 12
     27 
     28     # Margin for comparing servo based and ectool based CPU temperature
     29     TEMP_MISMATCH_MARGIN = 3
     30 
     31     # Minimum increase of CPU temperature when stressing DUT
     32     TEMP_STRESS_INCREASE = 3
     33 
     34     # Pseudo INT_MAX. Used as infinity when comparing temperature readings
     35     INT_MAX = 10000
     36 
     37     # Sensor type ID of ignored sensors
     38     SENSOR_TYPE_IGNORED = 255
     39 
     40     # PID of DUT stressing processes
     41     _stress_pid = list()
     42 
     43     def enable_auto_fan_control(self):
     44         """Enable EC automatic fan speed control"""
     45         # We use set_nocheck because servo reports current target
     46         # RPM instead 'auto', and therefore servo.set always fails.
     47         self.servo.set_nocheck('fan_target_rpm', 'auto')
     48 
     49 
     50     def max_fan(self):
     51         """Maximize fan speed"""
     52         # We use set_nocheck because servo reports current target
     53         # RPM instead 'max', and therefore servo.set always fails.
     54         self.servo.set_nocheck('fan_target_rpm', 'max')
     55 
     56 
     57     def turn_off_fan(self):
     58         """Turn off fan"""
     59         self.servo.set('fan_target_rpm', 'off')
     60 
     61 
     62     def _get_setting_for_type(self, type_id):
     63         """
     64         Retrieve thermal setting for a given type of sensor
     65 
     66         Args:
     67           type_id: The ID of sensor type.
     68 
     69         Returns:
     70           A list containing thresholds in the following order:
     71             Warning
     72             CPU off
     73             All power off
     74             Fan speed thresholds
     75         """
     76         setting = list()
     77         current_id = 0
     78         while True:
     79             try:
     80                 lines = self.faft_client.system.run_shell_command_get_output(
     81                         'ectool thermalget %d %d' % (type_id, current_id))
     82             except xmlrpclib.Fault:
     83                 break
     84             pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.')
     85             for line in lines:
     86                 matched = pattern.match(line)
     87                 if matched is not None:
     88                     # Convert degree K to degree C
     89                     setting.append(int(matched.group(1)) - 273)
     90             current_id = current_id + 1
     91 
     92         if len(setting) == 0:
     93             return None
     94         return setting
     95 
     96 
     97     def get_fan_steps(self):
     98         """Retrieve fan step config from EC"""
     99         num_steps = len(self._thermal_setting[0]) - 3
    100         self._fan_steps = list()
    101         expected_pat = (["Lowest speed: ([0-9-]+) RPM"] +
    102                         ["\d+ K:\s+([0-9-]+) RPM"] * num_steps)
    103         match = self.ec.send_command_get_output("thermalfan 0", expected_pat)
    104         for m in match:
    105             self._fan_steps.append(int(m[1]))
    106 
    107         # Get the actual value of each fan step
    108         for i in xrange(num_steps + 1):
    109             if self._fan_steps[i] == 0:
    110                 continue
    111             self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i])
    112             self._fan_steps[i] = int(self.servo.get('fan_target_rpm'))
    113 
    114         logging.info("Actual fan steps: %s", self._fan_steps)
    115 
    116 
    117     def get_thermal_setting(self):
    118         """Retrieve thermal engine setting from EC"""
    119         self._thermal_setting = list()
    120         type_id = 0
    121         while True:
    122             setting = self._get_setting_for_type(type_id)
    123             if setting is None:
    124                 break
    125             self._thermal_setting.append(setting)
    126             type_id = type_id + 1
    127         logging.info("Number of tempearture sensor types: %d", type_id)
    128 
    129         # Get the number of temperature sensors
    130         self._num_temp_sensor = 0
    131         while True:
    132             try:
    133                 self.faft_client.system.run_shell_command('ectool temps %d' %
    134                                                    self._num_temp_sensor)
    135                 self._num_temp_sensor = self._num_temp_sensor + 1
    136             except xmlrpclib.Fault:
    137                 break
    138         logging.info("Number of temperature sensor: %d", self._num_temp_sensor)
    139 
    140 
    141     def initialize(self, host, cmdline_args):
    142         super(firmware_ECThermal, self).initialize(host, cmdline_args)
    143         self.ec.send_command("chan 0")
    144         try:
    145             self.faft_client.system.run_shell_command('stop temp_metrics')
    146         except xmlrpclib.Fault:
    147             self._has_temp_metrics = False
    148         else:
    149             logging.info('Stopped temp_metrics')
    150             self._has_temp_metrics = True
    151         if self.check_ec_capability(['thermal']):
    152             self.get_thermal_setting()
    153             self.get_fan_steps()
    154             self.enable_auto_fan_control()
    155 
    156 
    157     def cleanup(self):
    158         if self.check_ec_capability(['thermal']):
    159             self.enable_auto_fan_control()
    160         if self._has_temp_metrics:
    161             logging.info('Starting temp_metrics')
    162             self.faft_client.system.run_shell_command('start temp_metrics')
    163         self.ec.send_command("chan 0xffffffff")
    164         super(firmware_ECThermal, self).cleanup()
    165 
    166 
    167     def _find_cpu_sensor_id(self):
    168         """
    169         This function find CPU temperature sensor using ectool.
    170 
    171         Returns:
    172           Integer ID of CPU temperature sensor.
    173 
    174         Raises:
    175           error.TestFail: Raised if we fail to find PECI temparture through
    176             ectool.
    177         """
    178         for temp_id in range(self._num_temp_sensor):
    179             lines = self.faft_client.system.run_shell_command_get_output(
    180                     'ectool tempsinfo %d' % temp_id)
    181             for line in lines:
    182                 matched = re.match('Sensor name: (.*)', line)
    183                 if matched is not None and matched.group(1) == 'PECI':
    184                     return temp_id
    185         raise error.TestFail('Cannot find CPU temperature sensor ID.')
    186 
    187 
    188     def _get_temp_reading(self, sensor_id):
    189         """
    190         Get temperature reading on a sensor through ectool
    191 
    192         Args:
    193           sensor_id: Temperature sensor ID.
    194 
    195         Returns:
    196           Temperature reading in degree C.
    197 
    198         Raises:
    199           xmlrpclib.Fault: Raised when we fail to read temperature.
    200           error.TestError: Raised if ectool doesn't behave as we expected.
    201         """
    202         assert sensor_id < self._num_temp_sensor
    203         pattern = re.compile('Reading temperature...(\d*)')
    204         lines = self.faft_client.system.run_shell_command_get_output(
    205                 'ectool temps %d' % sensor_id)
    206         for line in lines:
    207             matched = pattern.match(line)
    208             if matched is not None:
    209                 return int(matched.group(1)) - 273
    210         # Should never reach here
    211         raise error.TestError("Unexpected error occurred")
    212 
    213 
    214     def check_temp_report(self):
    215         """
    216         Checker of temperature reporting.
    217 
    218         This function reads CPU temperature from servo and ectool. If
    219         the two readings mismatches by more than TEMP_MISMATCH_MARGIN,'
    220         test fails.
    221 
    222         Raises:
    223           error.TestFail: Raised when temperature reading mismatches by
    224             more than TEMP_MISMATCH_MARGIN.
    225         """
    226         cpu_temp_id = self._find_cpu_sensor_id()
    227         logging.info("CPU temperature sensor ID is %d", cpu_temp_id)
    228         ectool_cpu_temp = self._get_temp_reading(cpu_temp_id)
    229         servo_cpu_temp = int(self.servo.get('cpu_temp'))
    230         logging.info("CPU temperature from servo: %d C", servo_cpu_temp)
    231         logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp)
    232         if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN:
    233             raise error.TestFail(
    234                     'CPU temperature readings from servo and ectool differ')
    235 
    236 
    237     def _stress_dut(self, threads=4):
    238         """
    239         Stress DUT system.
    240 
    241         By reading from /dev/urandom and writing to /dev/null, we can stress
    242         DUT and cause CPU temperature to go up. We stress the system forever,
    243         until _stop_stressing is called to kill the stress threads. This
    244         function is non-blocking.
    245 
    246         Args:
    247           threads: Number of threads (processes) when stressing forever.
    248 
    249         Returns:
    250           A list of stress process IDs is returned.
    251         """
    252         logging.info("Stressing DUT with %d threads...", threads)
    253         self.faft_client.system.run_shell_command('pkill dd')
    254         stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M &'
    255         # Grep for [d]d instead of dd to prevent getting the PID of grep
    256         # itself.
    257         pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'"
    258         self._stress_pid = list()
    259         for _ in xrange(threads):
    260             self.faft_client.system.run_shell_command(stress_cmd)
    261         lines = self.faft_client.system.run_shell_command_get_output(
    262                     pid_cmd)
    263         for line in lines:
    264             logging.info("PID is %s", line)
    265             self._stress_pid.append(int(line.strip()))
    266         return self._stress_pid
    267 
    268 
    269     def _stop_stressing(self):
    270         """Stop stressing DUT system"""
    271         stop_cmd = 'kill -9 %d'
    272         for pid in self._stress_pid:
    273             self.faft_client.system.run_shell_command(stop_cmd % pid)
    274 
    275 
    276     def check_fan_off(self):
    277         """
    278         Checker of fan turned off.
    279 
    280         The function first delay FAN_DELAY seconds to ensure fan stops.
    281         Then it reads fan speed and return False if fan speed is non-zero.
    282         Then it stresses the system a bit and check if the temperature
    283         goes up by more than TEMP_STRESS_INCREASE.
    284 
    285         Raises:
    286           error.TestFail: Raised when temperature doesn't increase by more than
    287             TEMP_STRESS_INCREASE.
    288         """
    289         time.sleep(self.FAN_DELAY)
    290         fan_speed = self.servo.get('fan_actual_rpm')
    291         if int(fan_speed) != 0:
    292             raise error.TestFail("Fan is not turned off.")
    293         logging.info("EC reports fan turned off.")
    294         cpu_temp_before = int(self.servo.get('cpu_temp'))
    295         logging.info("CPU temperature before stressing is %d C",
    296                      cpu_temp_before)
    297         self._stress_dut()
    298         time.sleep(self.STRESS_DELAY_NO_FAN)
    299         cpu_temp_after = int(self.servo.get('cpu_temp'))
    300         self._stop_stressing()
    301         logging.info("CPU temperature after stressing is %d C",
    302                      cpu_temp_after)
    303         if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE:
    304             raise error.TestFail(
    305                     "CPU temperature did not go up by more than %d degrees" %
    306                     self.TEMP_STRESS_INCREASE)
    307 
    308 
    309     def _get_temp_sensor_type(self, sensor_id):
    310         """
    311         Get type of a given temperature sensor
    312 
    313         Args:
    314           sensor_id: Temperature sensor ID.
    315 
    316         Returns:
    317           Type ID of the temperature sensor.
    318 
    319         Raises:
    320           error.TestError: Raised when ectool doesn't behave as we expected.
    321         """
    322         assert sensor_id < self._num_temp_sensor
    323         pattern = re.compile('Sensor type: (\d*)')
    324         lines = self.faft_client.system.run_shell_command_get_output(
    325                 'ectool tempsinfo %d' % sensor_id)
    326         for line in lines:
    327             matched = pattern.match(line)
    328             if matched is not None:
    329                 return int(matched.group(1))
    330         # Should never reach here
    331         raise error.TestError("Unexpected error occurred")
    332 
    333 
    334     def _check_fan_speed_per_sensor(self, fan_speed, sensor_id):
    335         """
    336         Check if the given fan_speed is reasonable from the view of certain
    337         temperature sensor. There could be three types of outcome:
    338           1. Fan speed is higher than expected. This may be due to other
    339              sensor sensing higher temperature and setting fan to higher
    340              speed.
    341           2. Fan speed is as expected.
    342           3. Fan speed is lower than expected. In this case, EC is not
    343              working as expected and an error should be raised.
    344 
    345         Args:
    346           fan_speed: The current fan speed in RPM.
    347           sensor_id: The ID of temperature sensor.
    348 
    349         Returns:
    350           0x00: Fan speed is higher than expected.
    351           0x01: Fan speed is as expected.
    352           0x10: Fan speed is lower than expected.
    353 
    354         Raises:
    355           error.TestError: Raised when getting unexpected fan speed.
    356         """
    357         sensor_type = self._get_temp_sensor_type(sensor_id)
    358         if sensor_type == self.SENSOR_TYPE_IGNORED:
    359             # This sensor should be ignored
    360             return 0x00
    361 
    362         if self._thermal_setting[sensor_type][-1] == -273:
    363             # The fan stepping for this type of sensor is disabled
    364             return 0x00
    365 
    366         try:
    367             idx = self._fan_steps.index(fan_speed)
    368         except:
    369             raise error.TestError("Unexpected fan speed: %d" % fan_speed)
    370 
    371         if idx == 0:
    372             lower_bound = -self.INT_MAX
    373             upper_bound = self._thermal_setting[sensor_type][3]
    374         elif idx == len(self._fan_steps) - 1:
    375             lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
    376             upper_bound = self.INT_MAX
    377         else:
    378             lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
    379             upper_bound = self._thermal_setting[sensor_type][idx + 3]
    380 
    381         temp_reading = self._get_temp_reading(sensor_id)
    382         logging.info("Sensor %d = %d C", sensor_id, temp_reading)
    383         logging.info("  Expecting %d - %d C", lower_bound, upper_bound)
    384         if temp_reading > upper_bound:
    385             return 0x00
    386         elif temp_reading < lower_bound:
    387             return 0x10
    388         else:
    389             return 0x01
    390 
    391 
    392     def check_auto_fan(self):
    393         """
    394         Checker of thermal engine automatic fan speed control.
    395 
    396         Stress DUT system for a longer period to make temperature more stable
    397         and check if fan speed is controlled as expected.
    398 
    399         Raises:
    400           error.TestFail: Raised when fan speed is not as expected.
    401         """
    402         self._stress_dut()
    403         time.sleep(self.STRESS_DELAY)
    404         fan_rpm = int(self.servo.get('fan_target_rpm'))
    405         logging.info('Fan speed is %d RPM', fan_rpm)
    406         try:
    407             result = reduce(lambda x, y: x | y,
    408                             [self._check_fan_speed_per_sensor(fan_rpm, x)
    409                              for x in range(self._num_temp_sensor)])
    410         finally:
    411             self._stop_stressing()
    412         if result == 0x00:
    413             raise error.TestFail("Fan speed higher than expected")
    414         if result == 0x10:
    415             raise error.TestFail("Fan speed lower than expected")
    416 
    417 
    418     def run_once(self):
    419         if not self.check_ec_capability(['thermal']):
    420             raise error.TestNAError("Nothing needs to be tested on this device")
    421         logging.info("Checking host temperature report.")
    422         self.check_temp_report()
    423 
    424         self.turn_off_fan()
    425         logging.info("Verifying fan is turned off.")
    426         self.check_fan_off()
    427 
    428         self.enable_auto_fan_control()
    429         logging.info("Verifying automatic fan control functionality.")
    430         self.check_auto_fan()
    431