Home | History | Annotate | Download | only in cros
      1 # Copyright 2014 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging, threading, time
      6 
      7 from autotest_lib.client.bin import utils
      8 from autotest_lib.client.cros import service_stopper
      9 
     10 
     11 # List of thermal throttling services that should be disabled.
     12 # - temp_metrics for link.
     13 # - thermal for daisy, snow, pit etc.
     14 # TODO(ihf): cpu_quiet on nyan isn't a service. We still need to disable it
     15 #            on nyan. See crbug.com/357457.
     16 _THERMAL_SERVICES = ['temp_metrics', 'thermal']
     17 
     18 
     19 class PerfControl(object):
     20     """
     21     Provides methods for setting the performance mode of a device.
     22 
     23     In particular it verifies the machine is idle and cold and tries to set
     24     it into a consistent, high performance state during initialization.
     25 
     26     Furthermore it monitors the state of the machine (in particular
     27     temperature) and verifies that nothing bad happened along the way.
     28 
     29     Example usage:
     30 
     31     with PerfControl() as pc:
     32         if not pc.verify_is_valid():
     33             raise error.TestError(pc.get_error_reason())
     34         # Do all performance testing.
     35         ...
     36         if not pc.verify_is_valid():
     37             raise error.TestError(pc.get_error_reason())
     38     """
     39     def __init__(self):
     40         self._service_stopper = None
     41         # Keep a copy of the current state for cleanup.
     42         self._temperature_init = utils.get_current_temperature_max()
     43         self._temperature_critical = utils.get_temperature_critical()
     44         self._original_governors = utils.set_high_performance_mode()
     45         self._error_reason = None
     46         if not utils.wait_for_idle_cpu(60.0, 0.1):
     47             self._error_reason = 'Could not get idle CPU.'
     48             return
     49         if not utils.wait_for_cool_machine():
     50             self._error_reason = 'Could not get cold machine.'
     51             return
     52         self._temperature_cold = utils.get_current_temperature_max()
     53         self._temperature_max = self._temperature_cold
     54         threading.Thread(target=self._monitor_performance_state).start()
     55         # Should be last just in case we had a runaway process.
     56         self._stop_thermal_throttling()
     57 
     58 
     59     def __enter__(self):
     60         return self
     61 
     62 
     63     def __exit__(self, _type, value, traceback):
     64         # First thing restart thermal management.
     65         self._restore_thermal_throttling()
     66         utils.restore_scaling_governor_states(self._original_governors)
     67 
     68 
     69     def get_error_reason(self):
     70         """
     71         Returns an error reason string if we encountered problems to pass
     72         on to harness/wmatrix.
     73         """
     74         return self._error_reason
     75 
     76 
     77     def verify_is_valid(self):
     78         """
     79         For now we declare performance results as valid if
     80         - we did not have an error before.
     81         - the monitoring thread never saw temperatures too close to critical.
     82 
     83         TODO(ihf): Search log files for thermal throttling messages like in
     84                    src/build/android/pylib/perf/thermal_throttle.py
     85         """
     86         if self._error_reason:
     87             return False
     88         temperature_bad = self._temperature_critical - 1.0
     89         logging.info("Max observed temperature = %.1f'C (bad limit = %.1f'C)",
     90                      self._temperature_max, temperature_bad)
     91         if (self._temperature_max > temperature_bad):
     92             self._error_reason = 'Machine got hot during testing.'
     93             return False
     94         return True
     95 
     96 
     97     def _monitor_performance_state(self):
     98         """
     99         Checks machine temperature once per second.
    100         TODO(ihf): make this more intelligent with regards to governor,
    101                    CPU, GPU and maybe zram as needed.
    102         """
    103         while True:
    104             time.sleep(1)
    105             current_temperature = utils.get_current_temperature_max()
    106             self._temperature_max = max(self._temperature_max,
    107                                         current_temperature)
    108             # TODO(ihf): Remove this spew once PerfControl is stable.
    109             logging.info('PerfControl CPU temperature = %.1f',
    110                           current_temperature)
    111 
    112 
    113     def _stop_thermal_throttling(self):
    114         """
    115         If exist on the platform/machine it stops the different thermal
    116         throttling scripts from running.
    117         Warning: this risks abnormal behavior if machine runs in high load.
    118         """
    119         self._service_stopper = service_stopper.ServiceStopper(
    120                                                     _THERMAL_SERVICES)
    121 
    122 
    123     def _restore_thermal_throttling(self):
    124         """
    125         Restores the original thermal throttling state.
    126         """
    127         if self._service_stopper:
    128             self._service_stopper.restore_services()
    129