Home | History | Annotate | Download | only in cros
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import random
      7 import signal
      8 import sys
      9 import threading
     10 import time
     11 
     12 from autotest_lib.client.common_lib import env
     13 from autotest_lib.client.common_lib import error
     14 
     15 
     16 def install_sigalarm_handler(new_handler):
     17     """
     18     Try installing a sigalarm handler.
     19 
     20     In order to protect apache, wsgi intercepts any attempt to install a
     21     sigalarm handler, so our function will feel the full force of a sigalarm
     22     even if we try to install a pacifying signal handler. To avoid this we
     23     need to confirm that the handler we tried to install really was installed.
     24 
     25     @param new_handler: The new handler to install. This must be a callable
     26                         object, or signal.SIG_IGN/SIG_DFL which correspond to
     27                         the numbers 1,0 respectively.
     28     @return: True if the installation of new_handler succeeded, False otherwise.
     29     """
     30     # Installing signal handlers does not and is never expected to work if we're
     31     # running in a mod_wsgi process.
     32     if env.IN_MOD_WSGI:
     33         return False
     34 
     35     if (new_handler is None or
     36         (not callable(new_handler) and
     37          new_handler != signal.SIG_IGN and
     38          new_handler != signal.SIG_DFL)):
     39         logging.warning('Trying to install an invalid sigalarm handler.')
     40         return False
     41 
     42     signal.signal(signal.SIGALRM, new_handler)
     43     installed_handler = signal.getsignal(signal.SIGALRM)
     44     return installed_handler == new_handler
     45 
     46 
     47 def set_sigalarm_timeout(timeout_secs, default_timeout=60):
     48     """
     49     Set the sigalarm timeout.
     50 
     51     This methods treats any timeout <= 0 as a possible error and falls back to
     52     using it's default timeout, since negative timeouts can have 'alarming'
     53     effects. Though 0 is a valid timeout, it is often used to cancel signals; in
     54     order to set a sigalarm of 0 please call signal.alarm directly as there are
     55     many situations where a 0 timeout is considered invalid.
     56 
     57     @param timeout_secs: The new timeout, in seconds.
     58     @param default_timeout: The default timeout to use, if timeout <= 0.
     59     @return: The old sigalarm timeout
     60     """
     61     timeout_sec_n = int(timeout_secs)
     62     if timeout_sec_n <= 0:
     63         timeout_sec_n = int(default_timeout)
     64     return signal.alarm(timeout_sec_n)
     65 
     66 
     67 def sigalarm_wrapper(message):
     68     """
     69     Raise a TimeoutException with the given message.  Needed because the body
     70     of a closure (lambda) can only be an expression, not a statement (such
     71     as "raise") :P :P :P
     72 
     73     @param message: the exception message.
     74     """
     75     raise error.TimeoutException(message)
     76 
     77 
     78 def custom_sigalarm_handler(func, timeout_sec):
     79     """
     80     Returns a sigalarm handler which produces an exception with a custom
     81     error message (function name and timeout length) instead of a generic
     82     one.
     83 
     84     @param func: the function that may time out
     85     @param timeout_sec: timeout length in seconds
     86     """
     87     try:
     88         name = str(func.__name__)
     89     except Exception as e:
     90         name = '(unavailable function name: exception: %s)' % e
     91     message = "sigalarm timeout (%d seconds) in %s" % (timeout_sec, name)
     92     return lambda signum, frame: sigalarm_wrapper(message)
     93 
     94 
     95 def timeout(func, args=(), kwargs={}, timeout_sec=60.0, default_result=None):
     96     """
     97     This function run the given function using the args, kwargs and
     98     return the given default value if the timeout_sec is exceeded.
     99 
    100     @param func: function to be called.
    101     @param args: arguments for function to be called.
    102     @param kwargs: keyword arguments for function to be called.
    103     @param timeout_sec: timeout setting for call to exit, in seconds.
    104     @param default_result: default return value for the function call.
    105 
    106     @return 1: is_timeout 2: result of the function call. If
    107             is_timeout is True, the call is timed out. If the
    108             value is False, the call is finished on time.
    109     """
    110     old_alarm_sec = 0
    111     old_handler = signal.getsignal(signal.SIGALRM)
    112     handler = custom_sigalarm_handler(func, timeout_sec)
    113     installed_handler = install_sigalarm_handler(handler)
    114     if installed_handler:
    115         old_alarm_sec = set_sigalarm_timeout(timeout_sec, default_timeout=60)
    116 
    117     # If old_timeout_time = 0 we either didn't install a handler, or sigalrm
    118     # had a signal.SIG_DFL handler with 0 timeout. In the latter case we still
    119     # need to restore the handler/timeout.
    120     old_timeout_time = (time.time() + old_alarm_sec) if old_alarm_sec > 0 else 0
    121 
    122     try:
    123         default_result = func(*args, **kwargs)
    124         return False, default_result
    125     except error.TimeoutException:
    126         return True, default_result
    127     finally:
    128         # If we installed a sigalarm handler, cancel it since our function
    129         # returned on time. If we can successfully restore the old handler,
    130         # reset the old timeout, or, if the old timeout's deadline has passed,
    131         # set the sigalarm to fire in one second. If the old_timeout_time is 0
    132         # we don't need to set the sigalarm timeout since we have already set it
    133         # as a byproduct of cancelling the current signal.
    134         if installed_handler:
    135             signal.alarm(0)
    136             if install_sigalarm_handler(old_handler) and old_timeout_time:
    137                 set_sigalarm_timeout(int(old_timeout_time - time.time()),
    138                                      default_timeout=1)
    139 
    140 
    141 
    142 def retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, blacklist=None,
    143           exception_to_raise=None, label=None, callback=None, backoff=1):
    144     """Retry calling the decorated function using a delay with jitter.
    145 
    146     Will raise RPC ValidationError exceptions from the decorated
    147     function without retrying; a malformed RPC isn't going to
    148     magically become good. Will raise exceptions in blacklist as well.
    149 
    150     If the retry is done in a child thread, timeout may not be enforced as
    151     signal only works in main thread. Therefore, the retry inside a child
    152     thread may run longer than timeout or even hang.
    153 
    154     original from:
    155       http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
    156 
    157     @param ExceptionToCheck: the exception to check.  May be a tuple of
    158                              exceptions to check.
    159     @param timeout_min: timeout in minutes until giving up.
    160     @param delay_sec: pre-jittered base delay between retries in seconds. Actual
    161                       delays will be first calculated with exponential backoff,
    162                       then randomized around this new value, ranging up to 50%
    163                       off this midpoint.
    164     @param blacklist: a list of exceptions that will be raised without retrying.
    165     @param exception_to_raise: the exception to raise. Callers can specify the
    166                                exception they want to raise.
    167     @param label: a label added to the exception message to help debug.
    168     @param callback: a function to call before each retry.
    169     @param backoff: exponent to calculate exponential backoff for the actual
    170                     delay. Set to 1 to disable exponential backoff.
    171     """
    172     def deco_retry(func):
    173         """
    174         Decorator wrapper.
    175 
    176         @param func: the function to be retried and timed-out.
    177         """
    178         random.seed()
    179 
    180 
    181         def delay(delay_with_backoff_sec):
    182             """
    183             'Jitter' the delay with backoff, up to 50% in either direction.
    184             """
    185             random_delay = random.uniform(0.5 * delay_with_backoff_sec,
    186                                           1.5 * delay_with_backoff_sec)
    187             logging.warning('Retrying in %f seconds...', random_delay)
    188             time.sleep(random_delay)
    189 
    190 
    191         def func_retry(*args, **kwargs):
    192             """
    193             Used to cache exception to be raised later.
    194             """
    195             exc_info = None
    196             delayed_enabled = False
    197             exception_tuple = () if blacklist is None else tuple(blacklist)
    198             start_time = time.time()
    199             remaining_time = timeout_min * 60
    200             delay_with_backoff_sec = delay_sec
    201             is_main_thread = isinstance(threading.current_thread(),
    202                                         threading._MainThread)
    203             if label:
    204                 details = 'label="%s"' % label
    205             elif hasattr(func, '__name__'):
    206                 details = 'function="%s()"' % func.__name__
    207             else:
    208                 details = 'unknown function'
    209 
    210             exception_message = ('retry exception (%s), timeout = %ds' %
    211                                  (details, timeout_min * 60))
    212 
    213             while remaining_time > 0:
    214                 if delayed_enabled:
    215                     delay(delay_with_backoff_sec)
    216                     delay_with_backoff_sec *= backoff
    217                 else:
    218                     delayed_enabled = True
    219                 try:
    220                     # Clear the cache
    221                     exc_info = None
    222                     if is_main_thread:
    223                         is_timeout, result = timeout(func, args, kwargs,
    224                                                      remaining_time)
    225                         if not is_timeout:
    226                             return result
    227                     else:
    228                         return func(*args, **kwargs)
    229                 except exception_tuple:
    230                     raise
    231                 except error.CrosDynamicSuiteException:
    232                     raise
    233                 except ExceptionToCheck as e:
    234                     logging.warning('%s(%s)', e.__class__, e)
    235                     # Cache the exception to be raised later.
    236                     exc_info = sys.exc_info()
    237 
    238                 remaining_time = int(timeout_min * 60 -
    239                                      (time.time() - start_time))
    240 
    241                 if remaining_time > 0 and callback:
    242                     callback()
    243                     remaining_time = int(timeout_min * 60 -
    244                                          (time.time() - start_time))
    245 
    246 
    247             # The call must have timed out or raised ExceptionToCheck.
    248             if not exc_info:
    249                 if exception_to_raise:
    250                     raise exception_to_raise(exception_message)
    251                 else:
    252                     raise error.TimeoutException(exception_message)
    253             # Raise the cached exception with original backtrace.
    254             if exception_to_raise:
    255                 raise exception_to_raise('%s: %s' % (exc_info[0], exc_info[1]))
    256             raise exc_info[0], exc_info[1], exc_info[2]
    257 
    258 
    259         return func_retry  # true decorator
    260     return deco_retry
    261