Home | History | Annotate | Download | only in cros
      1 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import logging
      6 import random
      7 import signal
      8 import sys
      9 import threading
     10 import time
     11 
     12 from autotest_lib.client.common_lib import env
     13 from autotest_lib.client.common_lib import error
     14 
     15 
     16 def install_sigalarm_handler(new_handler):
     17     """
     18     Try installing a sigalarm handler.
     19 
     20     In order to protect apache, wsgi intercepts any attempt to install a
     21     sigalarm handler, so our function will feel the full force of a sigalarm
     22     even if we try to install a pacifying signal handler. To avoid this we
     23     need to confirm that the handler we tried to install really was installed.
     24 
     25     @param new_handler: The new handler to install. This must be a callable
     26                         object, or signal.SIG_IGN/SIG_DFL which correspond to
     27                         the numbers 1,0 respectively.
     28     @return: True if the installation of new_handler succeeded, False otherwise.
     29     """
     30     # Installing signal handlers does not and is never expected to work if we're
     31     # running in a mod_wsgi process.
     32     if env.IN_MOD_WSGI:
     33         return False
     34 
     35     if (new_handler is None or
     36         (not callable(new_handler) and
     37          new_handler != signal.SIG_IGN and
     38          new_handler != signal.SIG_DFL)):
     39         logging.warning('Trying to install an invalid sigalarm handler.')
     40         return False
     41 
     42     signal.signal(signal.SIGALRM, new_handler)
     43     installed_handler = signal.getsignal(signal.SIGALRM)
     44     return installed_handler == new_handler
     45 
     46 
     47 def set_sigalarm_timeout(timeout_secs, default_timeout=60):
     48     """
     49     Set the sigalarm timeout.
     50 
     51     This methods treats any timeout <= 0 as a possible error and falls back to
     52     using it's default timeout, since negative timeouts can have 'alarming'
     53     effects. Though 0 is a valid timeout, it is often used to cancel signals; in
     54     order to set a sigalarm of 0 please call signal.alarm directly as there are
     55     many situations where a 0 timeout is considered invalid.
     56 
     57     @param timeout_secs: The new timeout, in seconds.
     58     @param default_timeout: The default timeout to use, if timeout <= 0.
     59     @return: The old sigalarm timeout
     60     """
     61     timeout_sec_n = int(timeout_secs)
     62     if timeout_sec_n <= 0:
     63         timeout_sec_n = int(default_timeout)
     64     return signal.alarm(timeout_sec_n)
     65 
     66 
     67 def sigalarm_wrapper(message):
     68     """
     69     Raise a TimeoutException with the given message.  Needed because the body
     70     of a closure (lambda) can only be an expression, not a statement (such
     71     as "raise") :P :P :P
     72 
     73     @param message: the exception message.
     74     """
     75     raise error.TimeoutException(message)
     76 
     77 
     78 def custom_sigalarm_handler(func, timeout_sec):
     79     """
     80     Returns a sigalarm handler which produces an exception with a custom
     81     error message (function name and timeout length) instead of a generic
     82     one.
     83 
     84     @param func: the function that may time out
     85     @param timeout_sec: timeout length in seconds
     86     """
     87     try:
     88         name = str(func.__name__)
     89     except Exception as e:
     90         name = '(unavailable function name: exception: %s)' % e
     91     message = "sigalarm timeout (%d seconds) in %s" % (timeout_sec, name)
     92     return lambda signum, frame: sigalarm_wrapper(message)
     93 
     94 
     95 def timeout(func, args=(), kwargs={}, timeout_sec=60.0, default_result=None):
     96     """
     97     This function run the given function using the args, kwargs and
     98     return the given default value if the timeout_sec is exceeded.
     99 
    100     @param func: function to be called.
    101     @param args: arguments for function to be called.
    102     @param kwargs: keyword arguments for function to be called.
    103     @param timeout_sec: timeout setting for call to exit, in seconds.
    104     @param default_result: default return value for the function call.
    105 
    106     @return 1: is_timeout 2: result of the function call. If
    107             is_timeout is True, the call is timed out. If the
    108             value is False, the call is finished on time.
    109     """
    110     old_alarm_sec = 0
    111     old_handler = signal.getsignal(signal.SIGALRM)
    112     handler = custom_sigalarm_handler(func, timeout_sec)
    113     installed_handler = install_sigalarm_handler(handler)
    114     if installed_handler:
    115         old_alarm_sec = set_sigalarm_timeout(timeout_sec, default_timeout=60)
    116 
    117     # If old_timeout_time = 0 we either didn't install a handler, or sigalrm
    118     # had a signal.SIG_DFL handler with 0 timeout. In the latter case we still
    119     # need to restore the handler/timeout.
    120     old_timeout_time = (time.time() + old_alarm_sec) if old_alarm_sec > 0 else 0
    121 
    122     try:
    123         default_result = func(*args, **kwargs)
    124         return False, default_result
    125     except error.TimeoutException:
    126         return True, default_result
    127     finally:
    128         # If we installed a sigalarm handler, cancel it since our function
    129         # returned on time. If we can successfully restore the old handler,
    130         # reset the old timeout, or, if the old timeout's deadline has passed,
    131         # set the sigalarm to fire in one second. If the old_timeout_time is 0
    132         # we don't need to set the sigalarm timeout since we have already set it
    133         # as a byproduct of cancelling the current signal.
    134         if installed_handler:
    135             signal.alarm(0)
    136             if install_sigalarm_handler(old_handler) and old_timeout_time:
    137                 set_sigalarm_timeout(int(old_timeout_time - time.time()),
    138                                      default_timeout=1)
    139 
    140 
    141 
    142 def retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, blacklist=None,
    143           exception_to_raise=None, label=None, callback=None):
    144     """Retry calling the decorated function using a delay with jitter.
    145 
    146     Will raise RPC ValidationError exceptions from the decorated
    147     function without retrying; a malformed RPC isn't going to
    148     magically become good. Will raise exceptions in blacklist as well.
    149 
    150     If the retry is done in a child thread, timeout may not be enforced as
    151     signal only works in main thread. Therefore, the retry inside a child
    152     thread may run longer than timeout or even hang.
    153 
    154     original from:
    155       http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
    156 
    157     @param ExceptionToCheck: the exception to check.  May be a tuple of
    158                              exceptions to check.
    159     @param timeout_min: timeout in minutes until giving up.
    160     @param delay_sec: pre-jittered delay between retries in seconds.  Actual
    161                       delays will be centered around this value, ranging up to
    162                       50% off this midpoint.
    163     @param blacklist: a list of exceptions that will be raised without retrying.
    164     @param exception_to_raise: the exception to raise. Callers can specify the
    165                                exception they want to raise.
    166     @param label: a label added to the exception message to help debug.
    167     @param callback: a function to call before each retry.
    168     """
    169     def deco_retry(func):
    170         """
    171         Decorator wrapper.
    172 
    173         @param func: the function to be retried and timed-out.
    174         """
    175         random.seed()
    176 
    177 
    178         def delay():
    179             """
    180             'Jitter' the delay, up to 50% in either direction.
    181             """
    182             random_delay = random.uniform(.5 * delay_sec, 1.5 * delay_sec)
    183             logging.warning('Retrying in %f seconds...', random_delay)
    184             time.sleep(random_delay)
    185 
    186 
    187         def func_retry(*args, **kwargs):
    188             """
    189             Used to cache exception to be raised later.
    190             """
    191             exc_info = None
    192             delayed_enabled = False
    193             exception_tuple = () if blacklist is None else tuple(blacklist)
    194             start_time = time.time()
    195             remaining_time = timeout_min * 60
    196             is_main_thread = isinstance(threading.current_thread(),
    197                                         threading._MainThread)
    198             if label:
    199                 details = 'label="%s"' % label
    200             elif hasattr(func, '__name__'):
    201                 details = 'function="%s()"' % func.__name__
    202             else:
    203                 details = 'unknown function'
    204 
    205             exception_message = ('retry exception (%s), timeout = %ds' %
    206                                  (details, timeout_min * 60))
    207 
    208             while remaining_time > 0:
    209                 if delayed_enabled:
    210                     delay()
    211                 else:
    212                     delayed_enabled = True
    213                 try:
    214                     # Clear the cache
    215                     exc_info = None
    216                     if is_main_thread:
    217                         is_timeout, result = timeout(func, args, kwargs,
    218                                                      remaining_time)
    219                         if not is_timeout:
    220                             return result
    221                     else:
    222                         return func(*args, **kwargs)
    223                 except exception_tuple:
    224                     raise
    225                 except error.CrosDynamicSuiteException:
    226                     raise
    227                 except ExceptionToCheck as e:
    228                     logging.warning('%s(%s)', e.__class__, e)
    229                     # Cache the exception to be raised later.
    230                     exc_info = sys.exc_info()
    231 
    232                 remaining_time = int(timeout_min * 60 -
    233                                      (time.time() - start_time))
    234 
    235                 if remaining_time > 0 and callback:
    236                     callback()
    237                     remaining_time = int(timeout_min * 60 -
    238                                          (time.time() - start_time))
    239 
    240 
    241             # The call must have timed out or raised ExceptionToCheck.
    242             if not exc_info:
    243                 if exception_to_raise:
    244                     raise exception_to_raise(exception_message)
    245                 else:
    246                     raise error.TimeoutException(exception_message)
    247             # Raise the cached exception with original backtrace.
    248             if exception_to_raise:
    249                 raise exception_to_raise('%s: %s' % (exc_info[0], exc_info[1]))
    250             raise exc_info[0], exc_info[1], exc_info[2]
    251 
    252 
    253         return func_retry  # true decorator
    254     return deco_retry
    255