Home | History | Annotate | Download | only in power_Thermal
      1 # Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import glob, logging, os, tempfile, threading, time
      6 from autotest_lib.client.bin import test
      7 from autotest_lib.client.common_lib import error, utils
      8 
      9 class PlatformDescriptor(object):
     10     '''
     11     An object to keep platform specific information.
     12 
     13     @num_cores - number of CPU cores in this platform
     14     @max_cpu_freq - maximum frequency the CPU can be running at
     15     @min_cpu_freq - minimal frequency the CPU can be running at
     16     '''
     17 
     18     def __init__(self, num_cores, max_cpu_freq, min_cpu_freq):
     19         self.num_cores = num_cores
     20         self.max_cpu_freq = max_cpu_freq
     21         self.min_cpu_freq = min_cpu_freq
     22 
     23 
     24 # Base name of the sysfs file where CPU temperature is reported. The file is
     25 # exported by the temperature monitor driver and is located in the appropriate
     26 # device's subtree. We use the file name to locate the subtree, only one file
     27 # with this name is expected to exist in /sys. The ext_ prefix indicates that
     28 # this is a reading off a sensor located next to the CPU. This facility could
     29 # be not available on some platforms, the test would need to be updated to
     30 # accommodate those.
     31 #
     32 # The `standard' temperature reading available through
     33 # /sys/class/hwmon/hwmon0/device/temperature does not represent the actual CPU
     34 # temperature and when the CPU load changes, the 'standard' temperature
     35 # reading changes much slower and not to such a large extent than the value in
     36 # */ext_temperature.
     37 EXT_TEMP_SENSOR_FILE = 'ext_temperature'
     38 
     39 # Base name of the file where the throttling temperature is set (if CPU temp
     40 # exceeds this value, clock throttling starts).
     41 THROTTLE_EXT_LIMIT_FILE = 'throttle_ext_limit'
     42 
     43 # Root directory for all sysfs information about the CPU(s).
     44 CPU_INFO_ROOT = '/sys/devices/system/cpu'
     45 
     46 # Template to get access to the directory/file containing current per core
     47 # information.
     48 PER_CORE_FREQ_TEMPLATE = CPU_INFO_ROOT + '/cpu%d/cpufreq/%s'
     49 
     50 # Base name for the temporary files used by this test.
     51 TMP_FILE_TEMPLATE = '/tmp/thermal_'
     52 
     53 # Temperature difference expected to be caused by increased CPU activity.
     54 DELTA = 3.0
     55 
     56 # Name of the file controlling core's clocking discipline.
     57 GOVERNOR = 'scaling_governor'
     58 
     59 # Name of the file providing space separated list of available clocking
     60 # disciplines.
     61 AVAILABLE_GOVERNORS = 'scaling_available_governors'
     62 
     63 def clean_up(obj):
     64     '''
     65     A function to register with the autotest engine to ensure proper cleanup.
     66 
     67     It will be called after the test has run, either completing successfully
     68     or throwing an exception.
     69     '''
     70 
     71     obj.cleanup()
     72 
     73 
     74 class power_Thermal(test.test):
     75     version = 1
     76 
     77 
     78     def _cpu_heater(self):
     79         '''
     80         A function to execute some code to heat up the target.
     81 
     82         This function is run on a separate thread, all it does - opens a file
     83         for writing, writes it with 100K characters, closes and removes the
     84         file, it is running in a tight loop until the stop_all_workers flag
     85         turns True.
     86 
     87         Multiple threads are spawn to cause maximum CPU activity.
     88         '''
     89 
     90         (handle, fname) = tempfile.mkstemp(
     91             prefix=os.path.basename(TMP_FILE_TEMPLATE),
     92             dir=os.path.dirname(TMP_FILE_TEMPLATE))
     93         os.close(handle)
     94         os.remove(fname)
     95         while not self.stop_all_workers:
     96             f = open(fname, 'w')
     97             f.write('x' * 100000)
     98             f.close()
     99             os.remove(fname)
    100 
    101 
    102     def _add_heater_thread(self):
    103         '''Add a thread to run another instance of _cpu_heater().'''
    104 
    105         thread_count = len(self.worker_threads)
    106         logging.info('adding thread number %d' % thread_count)
    107         new_thread = threading.Thread(target=self._cpu_heater)
    108         self.worker_threads.append(new_thread)
    109         new_thread.daemon = True
    110         new_thread.start()
    111 
    112 
    113     def _throttle_count(self):
    114         '''
    115         Return current throttling status of all cores.
    116 
    117         The return integer value is the sum of all cores' throttling status.
    118         When the sum is equal the core number - all cores are throttling.
    119         '''
    120 
    121         count = 0
    122         for cpu in range(self.pl_desc.num_cores):
    123             count += int(utils.read_file(
    124                     PER_CORE_FREQ_TEMPLATE % (cpu, 'throttle')))
    125         return count
    126 
    127 
    128     def _cpu_freq(self, cpu):
    129         '''Return current clock frequency of a CPU, integer in Kilohertz.'''
    130 
    131         return int(utils.read_file(
    132                 PER_CORE_FREQ_TEMPLATE % (cpu, 'cpuinfo_cur_freq')))
    133 
    134 
    135     def _cpu_temp(self):
    136         '''Return current CPU temperature, a float value.'''
    137 
    138         return float(utils.read_file(
    139                 os.path.join(self.temperature_data_path, EXT_TEMP_SENSOR_FILE)))
    140 
    141 
    142     def _throttle_limit(self):
    143         '''
    144         Return current CPU throttling temperature threshold.
    145 
    146         If CPU temperature exceeds this value, clock throttling is activated,
    147         causing CPU slowdown.
    148 
    149         Returns the limit as a float value.
    150         '''
    151 
    152         return float(utils.read_file(
    153                 os.path.join(self.temperature_data_path,
    154                              THROTTLE_EXT_LIMIT_FILE)))
    155 
    156 
    157     def _set_throttle_limit(self, new_limit):
    158         '''
    159         Set current CPU throttling temperature threshold.
    160 
    161         The passed in float value is rounded to the nearest integer.
    162         '''
    163 
    164         utils.open_write_close(
    165             os.path.join(
    166                 self.temperature_data_path, THROTTLE_EXT_LIMIT_FILE),
    167             '%d' % int(round(new_limit)))
    168 
    169 
    170     def _check_freq(self):
    171         '''Verify that all CPU clocks are in range for this target.'''
    172 
    173         for cpu in range(self.pl_desc.num_cores):
    174             freq = self._cpu_freq(cpu)
    175             if self.pl_desc.min_cpu_freq <= freq <= self.pl_desc.max_cpu_freq:
    176                 return
    177             raise error.TestError('Wrong cpu %d frequency reading %d' % (
    178                     cpu, freq))
    179 
    180 
    181     def _get_cpu_freq_raised(self):
    182         '''
    183         Bring all cores clock to max frequency.
    184 
    185         This function uses the scaling_governor mechanism to force the cores
    186         to run at maximum frequency, writing the string 'performance' into
    187         each core's governor file.
    188 
    189         The current value (if not 'performance') is preserved to be restored
    190         in the end of the test.
    191 
    192         Returns a dictionary where keys are the core numbers and values are
    193         the preserved governor setting.
    194 
    195         raises TestError in case 'performance' setting is not allowed on any
    196                of the cores, or the clock frequency does not reach max on any
    197                of the cores in 1 second.
    198         '''
    199 
    200         rv = {}
    201         for cpu in range(self.pl_desc.num_cores):
    202             target = 'performance'
    203             gov_file = PER_CORE_FREQ_TEMPLATE % (cpu, GOVERNOR)
    204             current_gov = utils.read_file(gov_file).strip()
    205             available_govs = utils.read_file(PER_CORE_FREQ_TEMPLATE % (
    206                     cpu, AVAILABLE_GOVERNORS)).split()
    207 
    208             if current_gov != target:
    209                 if not target in available_govs:
    210                     raise error.TestError('core %d does not allow setting %s'
    211                                           % (cpu, target))
    212                 logging.info('changing core %d governor from %s to %s' % (
    213                         cpu, current_gov, target))
    214                 utils.open_write_close(gov_file, target)
    215                 rv[cpu] = current_gov
    216 
    217         for _ in range(2):  # Wait for no more than 1 second
    218             for cpu in range(self.pl_desc.num_cores):
    219                 if self._cpu_freq(cpu) != self.pl_desc.max_cpu_freq:
    220                     break
    221             else:
    222                 return rv
    223 
    224         freqs = []
    225         for cpu in range(self.pl_desc.num_cores):
    226             freqs.append('%d' % self._cpu_freq(cpu))
    227         raise error.TestError('failed to speed up some CPU clocks: %s' %
    228                               ', '.join(freqs))
    229 
    230 
    231     def _get_cpu_temp_raised(self):
    232         '''
    233         Start more threads to increase CPU temperature.
    234 
    235         This function starts 10 threads and waits till either of the two
    236         events happen:
    237 
    238         - the throttling is activated (the threshold is expected to be set at
    239           DELTA/2 above the temperature when the test started). This is
    240           considered a success, the function returns.
    241 
    242         - the temperature raises DELTA degrees above the original temperature
    243           but throttling does not start. This is considered an overheating
    244           failure, a test error is raised.
    245 
    246         If the temperature does not reach the DELTA and throttling does not
    247         start in 30 seconds - a test error is also raised in this case.
    248         '''
    249 
    250         base_temp = self._cpu_temp()
    251         # Start 10 more cpu heater threads
    252         for _ in range(10):
    253             self._add_heater_thread()
    254 
    255         # Wait 30 seconds for the temp to raise DELTA degrees or throttling to
    256         # start
    257         for count in range(30):
    258             new_temp = self._cpu_temp()
    259             if new_temp - base_temp >= DELTA:
    260                 raise error.TestError(
    261                     'Reached temperature of %2.1fC in %d'
    262                     ' seconds, no throttling.'
    263                     % count)
    264             if self._throttle_count() == self.pl_desc.num_cores:
    265                 logging.info('full throttle after %d seconds' % count)
    266                 return
    267             time.sleep(1)
    268         raise error.TestError(
    269             'failed to raise CPU temperature from %s (reached %s), '
    270             '%d cores throttled' % (
    271                 str(base_temp), str(new_temp), self._throttle_count()))
    272 
    273     def _get_platform_descriptor(self):
    274         '''Fill out the platform descriptor to be used by the test.'''
    275 
    276         present = utils.read_file(os.path.join(CPU_INFO_ROOT, 'present'))
    277         if present.count('-') != 1:
    278             raise error.TestError(
    279                 "can't determine number of cores from %s" % present)
    280         (min_core, max_core) = tuple(int(x) for x in present.split('-'))
    281         min_freq = int(utils.read_file(
    282             PER_CORE_FREQ_TEMPLATE % (0, 'cpuinfo_min_freq')))
    283         max_freq = int(utils.read_file(
    284             PER_CORE_FREQ_TEMPLATE % (0, 'cpuinfo_max_freq')))
    285 
    286         return PlatformDescriptor(max_core - min_core + 1, max_freq, min_freq)
    287 
    288 
    289     def _prepare_test(self):
    290         '''Prepare test: check initial conditions and set variables.'''
    291 
    292         ext_temp_path = utils.system_output(
    293             'find /sys -name %s' % EXT_TEMP_SENSOR_FILE).splitlines()
    294         if len(ext_temp_path) != 1:
    295             raise error.TestError('found %d sensor files' % len(ext_temp_path))
    296 
    297         self.temperature_data_path = os.path.dirname(ext_temp_path[0])
    298 
    299         self.stop_all_workers = False
    300 
    301         self.pl_desc = self._get_platform_descriptor()
    302 
    303         # Verify CPU frequency is in range.
    304         self._check_freq()
    305 
    306         # Make sure we are not yet throttling.
    307         if self._throttle_count():
    308             raise error.TestError('Throttling active before test started')
    309 
    310         # Remember throttling level setting before test started.
    311         self.preserved_throttle_limit = self._throttle_limit()
    312 
    313         if self.preserved_throttle_limit - self._cpu_temp() < 4 * DELTA:
    314             raise error.TestError('Target is too hot: %s C' % str(
    315                     self._cpu_temp()))
    316 
    317         # list to keep track of threads started to heat up CPU.
    318         self.worker_threads = []
    319 
    320         # Dictionary of saved cores' scaling governor settings.
    321         self.saved_governors = {}
    322 
    323         self.register_after_iteration_hook(clean_up)
    324 
    325 
    326     def run_once(self):
    327         self._prepare_test()
    328         logging.info('starting temperature is %s' % str(self._cpu_temp()))
    329         logging.info('starting frequency is %s' % str(self._cpu_freq(0)))
    330 
    331         self.saved_governors = self._get_cpu_freq_raised()
    332         self._set_throttle_limit(self._cpu_temp() + DELTA/2)
    333         self._get_cpu_temp_raised()
    334         self._set_throttle_limit(self.preserved_throttle_limit)
    335 
    336         # Half a second after restoring the throttling limit is plenty for
    337         # throttling to stop.
    338         time.sleep(.5)
    339         if self._throttle_count():
    340             raise error.TestError('Throttling did not stop')
    341 
    342         logging.info('ending temperature is %s' % str(self._cpu_temp()))
    343         logging.info('ending frequency is %s' % str(self._cpu_freq(0)))
    344 
    345 
    346     def cleanup(self):
    347         self.stop_all_workers = True
    348         self._set_throttle_limit(self.preserved_throttle_limit)
    349         logging.info('stopping %d thread(s)' % len(self.worker_threads))
    350         runaway_threads = 0
    351         while self.worker_threads:
    352             t = self.worker_threads.pop()
    353             t.join(.5)
    354             if t.isAlive():
    355                 runaway_threads += 1
    356         if runaway_threads:
    357             for f in glob.glob('%s*' % TMP_FILE_TEMPLATE):
    358                 logging.info('removing %s' % f)
    359                 os.remove(f)
    360             raise error.TestError(
    361                 'Failed to join %d worker thread(s)' % runaway_threads)
    362 
    363         if not self.saved_governors:
    364             return
    365 
    366         for (cpu, gov) in self.saved_governors.iteritems():
    367             gov_file = PER_CORE_FREQ_TEMPLATE % (cpu, GOVERNOR)
    368             logging.info('restoring core %d governor to %s' % (cpu, gov))
    369             utils.open_write_close(gov_file, gov)
    370         self.saved_governors = {}
    371