Home | History | Annotate | Download | only in cros
      1 # Copyright 2018 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import contextlib
      6 import logging
      7 import os
      8 import random
      9 import re
     10 
     11 from autotest_lib.client.bin import utils as client_utils
     12 from autotest_lib.client.common_lib import utils as common_utils
     13 from autotest_lib.client.common_lib import error
     14 from autotest_lib.server import utils
     15 from autotest_lib.server.cros import lockfile
     16 
     17 
     18 @contextlib.contextmanager
     19 def lock(filename):
     20     """Prevents other autotest/tradefed instances from accessing cache.
     21 
     22     @param filename: The file to be locked.
     23     """
     24     filelock = lockfile.FileLock(filename)
     25     # It is tempting just to call filelock.acquire(3600). But the implementation
     26     # has very poor temporal granularity (timeout/10), which is unsuitable for
     27     # our needs. See /usr/lib64/python2.7/site-packages/lockfile/
     28     attempts = 0
     29     while not filelock.i_am_locking():
     30         try:
     31             attempts += 1
     32             logging.info('Waiting for cache lock...')
     33             # We must not use a random integer as the filelock implementations
     34             # may underflow an integer division.
     35             filelock.acquire(random.uniform(0.0, pow(2.0, attempts)))
     36         except (lockfile.AlreadyLocked, lockfile.LockTimeout):
     37             # Our goal is to wait long enough to be sure something very bad
     38             # happened to the locking thread. 11 attempts is between 15 and
     39             # 30 minutes.
     40             if attempts > 11:
     41                 # Normally we should aqcuire the lock immediately. Once we
     42                 # wait on the order of 10 minutes either the dev server IO is
     43                 # overloaded or a lock didn't get cleaned up. Take one for the
     44                 # team, break the lock and report a failure. This should fix
     45                 # the lock for following tests. If the failure affects more than
     46                 # one job look for a deadlock or dev server overload.
     47                 logging.error('Permanent lock failure. Trying to break lock.')
     48                 # TODO(ihf): Think how to do this cleaner without having a
     49                 # recursive lock breaking problem. We may have to kill every
     50                 # job that is currently waiting. The main goal though really is
     51                 # to have a cache that does not corrupt. And cache updates
     52                 # only happen once a month or so, everything else are reads.
     53                 filelock.break_lock()
     54                 raise error.TestFail('Error: permanent cache lock failure.')
     55         else:
     56             logging.info('Acquired cache lock after %d attempts.', attempts)
     57     try:
     58         yield
     59     finally:
     60         filelock.release()
     61         logging.info('Released cache lock.')
     62 
     63 
     64 @contextlib.contextmanager
     65 def adb_keepalive(target, extra_paths):
     66     """A context manager that keeps the adb connection alive.
     67 
     68     AdbKeepalive will spin off a new process that will continuously poll for
     69     adb's connected state, and will attempt to reconnect if it ever goes down.
     70     This is the only way we can currently recover safely from (intentional)
     71     reboots.
     72 
     73     @param target: the hostname and port of the DUT.
     74     @param extra_paths: any additional components to the PATH environment
     75                         variable.
     76     """
     77     from autotest_lib.client.common_lib.cros import adb_keepalive as module
     78     # |__file__| returns the absolute path of the compiled bytecode of the
     79     # module. We want to run the original .py file, so we need to change the
     80     # extension back.
     81     script_filename = module.__file__.replace('.pyc', '.py')
     82     job = common_utils.BgJob(
     83         [script_filename, target],
     84         nickname='adb_keepalive',
     85         stderr_level=logging.DEBUG,
     86         stdout_tee=common_utils.TEE_TO_LOGS,
     87         stderr_tee=common_utils.TEE_TO_LOGS,
     88         extra_paths=extra_paths)
     89 
     90     try:
     91         yield
     92     finally:
     93         # The adb_keepalive.py script runs forever until SIGTERM is sent.
     94         common_utils.nuke_subprocess(job.sp)
     95         common_utils.join_bg_jobs([job])
     96 
     97 
     98 @contextlib.contextmanager
     99 def pushd(d):
    100     """Defines pushd.
    101     @param d: the directory to change to.
    102     """
    103     current = os.getcwd()
    104     os.chdir(d)
    105     try:
    106         yield
    107     finally:
    108         os.chdir(current)
    109 
    110 
    111 def parse_tradefed_result(result, waivers=None):
    112     """Check the result from the tradefed output.
    113 
    114     @param result: The result stdout string from the tradefed command.
    115     @param waivers: a set() of tests which are permitted to fail.
    116     @return 5-tuple (tests, passed, failed, notexecuted, waived)
    117     """
    118     # Regular expressions for start/end messages of each test-run chunk.
    119     abi_re = r'arm\S*|x86\S*'
    120     # TODO(kinaba): use the current running module name.
    121     module_re = r'\S+'
    122     start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with'
    123                           r' (\d+(?:,\d+)?) test' % (abi_re, module_re))
    124     end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.'
    125                         r' (\d+) passed, (\d+) failed, (\d+) not executed' %
    126                         (abi_re, module_re))
    127 
    128     # Records the result per each ABI.
    129     total_test = dict()
    130     total_pass = dict()
    131     total_fail = dict()
    132     last_notexec = dict()
    133 
    134     # ABI and the test count for the current chunk.
    135     abi = None
    136     ntest = None
    137     prev_npass = prev_nfail = prev_nnotexec = None
    138 
    139     for line in result.splitlines():
    140         # Beginning of a chunk of tests.
    141         match = start_re.search(line)
    142         if match:
    143             if abi:
    144                 raise error.TestFail('Error: Unexpected test start: ' + line)
    145             abi = match.group(1)
    146             ntest = int(match.group(2).replace(',', ''))
    147             prev_npass = prev_nfail = prev_nnotexec = None
    148         else:
    149             # End of the current chunk.
    150             match = end_re.search(line)
    151             if not match:
    152                 continue
    153 
    154             npass, nfail, nnotexec = map(int, match.group(2, 3, 4))
    155             if abi != match.group(1):
    156                 # When the last case crashed during teardown, tradefed emits two
    157                 # end-messages with possibly increased fail count. Ignore it.
    158                 if (prev_npass == npass and
    159                     (prev_nfail == nfail or prev_nfail == nfail - 1) and
    160                         prev_nnotexec == nnotexec):
    161                     continue
    162                 raise error.TestFail('Error: Unexpected test end: ' + line)
    163             prev_npass, prev_nfail, prev_nnotexec = npass, nfail, nnotexec
    164 
    165             # When the test crashes too ofen, tradefed seems to finish the
    166             # iteration by running "0 tests, 0 passed, ...". Do not count
    167             # that in.
    168             if ntest > 0:
    169                 total_test[abi] = (
    170                     total_test.get(abi, 0) + ntest - last_notexec.get(abi, 0))
    171                 total_pass[abi] = total_pass.get(abi, 0) + npass
    172                 total_fail[abi] = total_fail.get(abi, 0) + nfail
    173                 last_notexec[abi] = nnotexec
    174             abi = None
    175 
    176     if abi:
    177         # When tradefed crashes badly, it may exit without printing the counts
    178         # from the last chunk. Regard them as not executed and retry (rather
    179         # than aborting the test cycle at this point.)
    180         if ntest > 0:
    181             total_test[abi] = (
    182                 total_test.get(abi, 0) + ntest - last_notexec.get(abi, 0))
    183             last_notexec[abi] = ntest
    184         logging.warning('No result reported for the last chunk. ' +
    185                         'Assuming all not executed.')
    186 
    187     # TODO(rohitbm): make failure parsing more robust by extracting the list
    188     # of failing tests instead of searching in the result blob. As well as
    189     # only parse for waivers for the running ABI.
    190     waived = 0
    191     if waivers:
    192         abis = total_test.keys()
    193         for testname in waivers:
    194             # TODO(dhaddock): Find a more robust way to apply waivers.
    195             fail_count = (
    196                 result.count(testname + ' FAIL') +
    197                 result.count(testname + ' fail'))
    198             if fail_count:
    199                 if fail_count > len(abis):
    200                     # This should be an error.TestFail, but unfortunately
    201                     # tradefed has a bug that emits "fail" twice when a
    202                     # test failed during teardown. It will anyway causes
    203                     # a test count inconsistency and visible on the dashboard.
    204                     logging.error('Found %d failures for %s '
    205                                   'but there are only %d abis: %s', fail_count,
    206                                   testname, len(abis), abis)
    207                 waived += fail_count
    208                 logging.info('Waived failure for %s %d time(s)', testname,
    209                              fail_count)
    210     counts = tuple(
    211         sum(count_per_abi.values())
    212         for count_per_abi in (total_test, total_pass, total_fail,
    213                               last_notexec)) + (waived,)
    214     msg = (
    215         'tests=%d, passed=%d, failed=%d, not_executed=%d, waived=%d' % counts)
    216     logging.info(msg)
    217     if counts[2] - waived < 0:
    218         raise error.TestFail('Error: Internal waiver bookkeeping has '
    219                              'become inconsistent (%s)' % msg)
    220     return counts
    221 
    222 
    223 def select_32bit_java():
    224     """Switches to 32 bit java if installed (like in lab lxc images) to save
    225     about 30-40% server/shard memory during the run."""
    226     if utils.is_in_container() and not client_utils.is_moblab():
    227         java = '/usr/lib/jvm/java-8-openjdk-i386'
    228         if os.path.exists(java):
    229             logging.info('Found 32 bit java, switching to use it.')
    230             os.environ['JAVA_HOME'] = java
    231             os.environ['PATH'] = (
    232                 os.path.join(java, 'bin') + os.pathsep + os.environ['PATH'])
    233