Home | History | Annotate | Download | only in cros
      1 # Copyright 2018 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import contextlib
      6 import logging
      7 import os
      8 import random
      9 import re
     10 
     11 from autotest_lib.client.bin import utils as client_utils
     12 from autotest_lib.client.common_lib import utils as common_utils
     13 from autotest_lib.client.common_lib import error
     14 from autotest_lib.server import utils
     15 from autotest_lib.server.cros import lockfile
     16 
     17 
     18 @contextlib.contextmanager
     19 def lock(filename):
     20     """Prevents other autotest/tradefed instances from accessing cache.
     21 
     22     @param filename: The file to be locked.
     23     """
     24     filelock = lockfile.FileLock(filename)
     25     # It is tempting just to call filelock.acquire(3600). But the implementation
     26     # has very poor temporal granularity (timeout/10), which is unsuitable for
     27     # our needs. See /usr/lib64/python2.7/site-packages/lockfile/
     28     attempts = 0
     29     while not filelock.i_am_locking():
     30         try:
     31             attempts += 1
     32             logging.info('Waiting for cache lock...')
     33             # We must not use a random integer as the filelock implementations
     34             # may underflow an integer division.
     35             filelock.acquire(random.uniform(0.0, pow(2.0, attempts)))
     36         except (lockfile.AlreadyLocked, lockfile.LockTimeout):
     37             # Our goal is to wait long enough to be sure something very bad
     38             # happened to the locking thread. 11 attempts is between 15 and
     39             # 30 minutes.
     40             if attempts > 11:
     41                 # Normally we should aqcuire the lock immediately. Once we
     42                 # wait on the order of 10 minutes either the dev server IO is
     43                 # overloaded or a lock didn't get cleaned up. Take one for the
     44                 # team, break the lock and report a failure. This should fix
     45                 # the lock for following tests. If the failure affects more than
     46                 # one job look for a deadlock or dev server overload.
     47                 logging.error('Permanent lock failure. Trying to break lock.')
     48                 # TODO(ihf): Think how to do this cleaner without having a
     49                 # recursive lock breaking problem. We may have to kill every
     50                 # job that is currently waiting. The main goal though really is
     51                 # to have a cache that does not corrupt. And cache updates
     52                 # only happen once a month or so, everything else are reads.
     53                 filelock.break_lock()
     54                 raise error.TestFail('Error: permanent cache lock failure.')
     55         else:
     56             logging.info('Acquired cache lock after %d attempts.', attempts)
     57     try:
     58         yield
     59     finally:
     60         filelock.release()
     61         logging.info('Released cache lock.')
     62 
     63 
     64 @contextlib.contextmanager
     65 def adb_keepalive(targets, extra_paths):
     66     """A context manager that keeps the adb connection alive.
     67 
     68     AdbKeepalive will spin off a new process that will continuously poll for
     69     adb's connected state, and will attempt to reconnect if it ever goes down.
     70     This is the only way we can currently recover safely from (intentional)
     71     reboots.
     72 
     73     @param target: the hostname and port of the DUT.
     74     @param extra_paths: any additional components to the PATH environment
     75                         variable.
     76     """
     77     from autotest_lib.client.common_lib.cros import adb_keepalive as module
     78     # |__file__| returns the absolute path of the compiled bytecode of the
     79     # module. We want to run the original .py file, so we need to change the
     80     # extension back.
     81     script_filename = module.__file__.replace('.pyc', '.py')
     82     jobs = [common_utils.BgJob(
     83         [script_filename, target],
     84         nickname='adb_keepalive',
     85         stderr_level=logging.DEBUG,
     86         stdout_tee=common_utils.TEE_TO_LOGS,
     87         stderr_tee=common_utils.TEE_TO_LOGS,
     88         extra_paths=extra_paths) for target in targets]
     89 
     90     try:
     91         yield
     92     finally:
     93         # The adb_keepalive.py script runs forever until SIGTERM is sent.
     94         for job in jobs:
     95             common_utils.nuke_subprocess(job.sp)
     96         common_utils.join_bg_jobs(jobs)
     97 
     98 
     99 @contextlib.contextmanager
    100 def pushd(d):
    101     """Defines pushd.
    102     @param d: the directory to change to.
    103     """
    104     current = os.getcwd()
    105     os.chdir(d)
    106     try:
    107         yield
    108     finally:
    109         os.chdir(current)
    110 
    111 
    112 def parse_tradefed_result(result, waivers=None):
    113     """Check the result from the tradefed output.
    114 
    115     @param result: The result stdout string from the tradefed command.
    116     @param waivers: a set() of tests which are permitted to fail.
    117     @return List of the waived tests.
    118     """
    119     # Regular expressions for start/end messages of each test-run chunk.
    120     abi_re = r'arm\S*|x86\S*'
    121     # TODO(kinaba): use the current running module name.
    122     module_re = r'\S+'
    123     start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with'
    124                           r' (\d+(?:,\d+)?) test' % (abi_re, module_re))
    125     end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.'
    126                         r' (\d+) passed, (\d+) failed, (\d+) not executed' %
    127                         (abi_re, module_re))
    128     fail_re = re.compile(r'I/ConsoleReporter.* (\S+) fail:')
    129     inaccurate_re = re.compile(r'IMPORTANT: Some modules failed to run to '
    130                                 'completion, tests counts may be inaccurate')
    131     abis = set()
    132     waived_count = dict()
    133     failed_tests = set()
    134     accurate = True
    135     for line in result.splitlines():
    136         match = start_re.search(line)
    137         if match:
    138             abis = abis.union([match.group(1)])
    139             continue
    140         match = end_re.search(line)
    141         if match:
    142             abi = match.group(1)
    143             if abi not in abis:
    144                 logging.error('Trunk end with %s abi but have not seen '
    145                               'any trunk start with this abi.(%s)', abi, line)
    146             continue
    147         match = fail_re.search(line)
    148         if match:
    149             testname = match.group(1)
    150             if waivers and testname in waivers:
    151                 waived_count[testname] = waived_count.get(testname, 0) + 1
    152             else:
    153                 failed_tests.add(testname)
    154             continue
    155         # b/66899135, tradefed may reported inaccuratly with `list results`.
    156         # Add warning if summary section shows that the result is inacurrate.
    157         match = inaccurate_re.search(line)
    158         if match:
    159             accurate = False
    160 
    161     logging.info('Total ABIs: %s', abis)
    162     if failed_tests:
    163         logging.error('Failed (but not waived) tests:\n%s',
    164             '\n'.join(sorted(failed_tests)))
    165 
    166     # TODO(dhaddock): Find a more robust way to apply waivers.
    167     waived = []
    168     for testname, fail_count in waived_count.items():
    169         if fail_count > len(abis):
    170             # This should be an error.TestFail, but unfortunately
    171             # tradefed has a bug that emits "fail" twice when a
    172             # test failed during teardown. It will anyway causes
    173             # a test count inconsistency and visible on the dashboard.
    174             logging.error('Found %d failures for %s but there are only %d '
    175                           'abis: %s', fail_count, testname, len(abis), abis)
    176             fail_count = len(abis)
    177         waived += [testname] * fail_count
    178         logging.info('Waived failure for %s %d time(s)', testname, fail_count)
    179     logging.info('Total waived = %s', waived)
    180     return waived, accurate
    181 
    182 
    183 def select_32bit_java():
    184     """Switches to 32 bit java if installed (like in lab lxc images) to save
    185     about 30-40% server/shard memory during the run."""
    186     if utils.is_in_container() and not client_utils.is_moblab():
    187         java = '/usr/lib/jvm/java-8-openjdk-i386'
    188         if os.path.exists(java):
    189             logging.info('Found 32 bit java, switching to use it.')
    190             os.environ['JAVA_HOME'] = java
    191             os.environ['PATH'] = (
    192                 os.path.join(java, 'bin') + os.pathsep + os.environ['PATH'])
    193