1 # Copyright 2018 The Chromium OS Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import contextlib 6 import logging 7 import os 8 import random 9 import re 10 11 from autotest_lib.client.bin import utils as client_utils 12 from autotest_lib.client.common_lib import utils as common_utils 13 from autotest_lib.client.common_lib import error 14 from autotest_lib.server import utils 15 from autotest_lib.server.cros import lockfile 16 17 18 @contextlib.contextmanager 19 def lock(filename): 20 """Prevents other autotest/tradefed instances from accessing cache. 21 22 @param filename: The file to be locked. 23 """ 24 filelock = lockfile.FileLock(filename) 25 # It is tempting just to call filelock.acquire(3600). But the implementation 26 # has very poor temporal granularity (timeout/10), which is unsuitable for 27 # our needs. See /usr/lib64/python2.7/site-packages/lockfile/ 28 attempts = 0 29 while not filelock.i_am_locking(): 30 try: 31 attempts += 1 32 logging.info('Waiting for cache lock...') 33 # We must not use a random integer as the filelock implementations 34 # may underflow an integer division. 35 filelock.acquire(random.uniform(0.0, pow(2.0, attempts))) 36 except (lockfile.AlreadyLocked, lockfile.LockTimeout): 37 # Our goal is to wait long enough to be sure something very bad 38 # happened to the locking thread. 11 attempts is between 15 and 39 # 30 minutes. 40 if attempts > 11: 41 # Normally we should aqcuire the lock immediately. Once we 42 # wait on the order of 10 minutes either the dev server IO is 43 # overloaded or a lock didn't get cleaned up. Take one for the 44 # team, break the lock and report a failure. This should fix 45 # the lock for following tests. If the failure affects more than 46 # one job look for a deadlock or dev server overload. 47 logging.error('Permanent lock failure. Trying to break lock.') 48 # TODO(ihf): Think how to do this cleaner without having a 49 # recursive lock breaking problem. We may have to kill every 50 # job that is currently waiting. The main goal though really is 51 # to have a cache that does not corrupt. And cache updates 52 # only happen once a month or so, everything else are reads. 53 filelock.break_lock() 54 raise error.TestFail('Error: permanent cache lock failure.') 55 else: 56 logging.info('Acquired cache lock after %d attempts.', attempts) 57 try: 58 yield 59 finally: 60 filelock.release() 61 logging.info('Released cache lock.') 62 63 64 @contextlib.contextmanager 65 def adb_keepalive(target, extra_paths): 66 """A context manager that keeps the adb connection alive. 67 68 AdbKeepalive will spin off a new process that will continuously poll for 69 adb's connected state, and will attempt to reconnect if it ever goes down. 70 This is the only way we can currently recover safely from (intentional) 71 reboots. 72 73 @param target: the hostname and port of the DUT. 74 @param extra_paths: any additional components to the PATH environment 75 variable. 76 """ 77 from autotest_lib.client.common_lib.cros import adb_keepalive as module 78 # |__file__| returns the absolute path of the compiled bytecode of the 79 # module. We want to run the original .py file, so we need to change the 80 # extension back. 81 script_filename = module.__file__.replace('.pyc', '.py') 82 job = common_utils.BgJob( 83 [script_filename, target], 84 nickname='adb_keepalive', 85 stderr_level=logging.DEBUG, 86 stdout_tee=common_utils.TEE_TO_LOGS, 87 stderr_tee=common_utils.TEE_TO_LOGS, 88 extra_paths=extra_paths) 89 90 try: 91 yield 92 finally: 93 # The adb_keepalive.py script runs forever until SIGTERM is sent. 94 common_utils.nuke_subprocess(job.sp) 95 common_utils.join_bg_jobs([job]) 96 97 98 @contextlib.contextmanager 99 def pushd(d): 100 """Defines pushd. 101 @param d: the directory to change to. 102 """ 103 current = os.getcwd() 104 os.chdir(d) 105 try: 106 yield 107 finally: 108 os.chdir(current) 109 110 111 def parse_tradefed_result(result, waivers=None): 112 """Check the result from the tradefed output. 113 114 @param result: The result stdout string from the tradefed command. 115 @param waivers: a set() of tests which are permitted to fail. 116 @return 5-tuple (tests, passed, failed, notexecuted, waived) 117 """ 118 # Regular expressions for start/end messages of each test-run chunk. 119 abi_re = r'arm\S*|x86\S*' 120 # TODO(kinaba): use the current running module name. 121 module_re = r'\S+' 122 start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with' 123 r' (\d+(?:,\d+)?) test' % (abi_re, module_re)) 124 end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.' 125 r' (\d+) passed, (\d+) failed, (\d+) not executed' % 126 (abi_re, module_re)) 127 128 # Records the result per each ABI. 129 total_test = dict() 130 total_pass = dict() 131 total_fail = dict() 132 last_notexec = dict() 133 134 # ABI and the test count for the current chunk. 135 abi = None 136 ntest = None 137 prev_npass = prev_nfail = prev_nnotexec = None 138 139 for line in result.splitlines(): 140 # Beginning of a chunk of tests. 141 match = start_re.search(line) 142 if match: 143 if abi: 144 raise error.TestFail('Error: Unexpected test start: ' + line) 145 abi = match.group(1) 146 ntest = int(match.group(2).replace(',', '')) 147 prev_npass = prev_nfail = prev_nnotexec = None 148 else: 149 # End of the current chunk. 150 match = end_re.search(line) 151 if not match: 152 continue 153 154 npass, nfail, nnotexec = map(int, match.group(2, 3, 4)) 155 if abi != match.group(1): 156 # When the last case crashed during teardown, tradefed emits two 157 # end-messages with possibly increased fail count. Ignore it. 158 if (prev_npass == npass and 159 (prev_nfail == nfail or prev_nfail == nfail - 1) and 160 prev_nnotexec == nnotexec): 161 continue 162 raise error.TestFail('Error: Unexpected test end: ' + line) 163 prev_npass, prev_nfail, prev_nnotexec = npass, nfail, nnotexec 164 165 # When the test crashes too ofen, tradefed seems to finish the 166 # iteration by running "0 tests, 0 passed, ...". Do not count 167 # that in. 168 if ntest > 0: 169 total_test[abi] = ( 170 total_test.get(abi, 0) + ntest - last_notexec.get(abi, 0)) 171 total_pass[abi] = total_pass.get(abi, 0) + npass 172 total_fail[abi] = total_fail.get(abi, 0) + nfail 173 last_notexec[abi] = nnotexec 174 abi = None 175 176 if abi: 177 # When tradefed crashes badly, it may exit without printing the counts 178 # from the last chunk. Regard them as not executed and retry (rather 179 # than aborting the test cycle at this point.) 180 if ntest > 0: 181 total_test[abi] = ( 182 total_test.get(abi, 0) + ntest - last_notexec.get(abi, 0)) 183 last_notexec[abi] = ntest 184 logging.warning('No result reported for the last chunk. ' + 185 'Assuming all not executed.') 186 187 # TODO(rohitbm): make failure parsing more robust by extracting the list 188 # of failing tests instead of searching in the result blob. As well as 189 # only parse for waivers for the running ABI. 190 waived = 0 191 if waivers: 192 abis = total_test.keys() 193 for testname in waivers: 194 # TODO(dhaddock): Find a more robust way to apply waivers. 195 fail_count = ( 196 result.count(testname + ' FAIL') + 197 result.count(testname + ' fail')) 198 if fail_count: 199 if fail_count > len(abis): 200 # This should be an error.TestFail, but unfortunately 201 # tradefed has a bug that emits "fail" twice when a 202 # test failed during teardown. It will anyway causes 203 # a test count inconsistency and visible on the dashboard. 204 logging.error('Found %d failures for %s ' 205 'but there are only %d abis: %s', fail_count, 206 testname, len(abis), abis) 207 waived += fail_count 208 logging.info('Waived failure for %s %d time(s)', testname, 209 fail_count) 210 counts = tuple( 211 sum(count_per_abi.values()) 212 for count_per_abi in (total_test, total_pass, total_fail, 213 last_notexec)) + (waived,) 214 msg = ( 215 'tests=%d, passed=%d, failed=%d, not_executed=%d, waived=%d' % counts) 216 logging.info(msg) 217 if counts[2] - waived < 0: 218 raise error.TestFail('Error: Internal waiver bookkeeping has ' 219 'become inconsistent (%s)' % msg) 220 return counts 221 222 223 def select_32bit_java(): 224 """Switches to 32 bit java if installed (like in lab lxc images) to save 225 about 30-40% server/shard memory during the run.""" 226 if utils.is_in_container() and not client_utils.is_moblab(): 227 java = '/usr/lib/jvm/java-8-openjdk-i386' 228 if os.path.exists(java): 229 logging.info('Found 32 bit java, switching to use it.') 230 os.environ['JAVA_HOME'] = java 231 os.environ['PATH'] = ( 232 os.path.join(java, 'bin') + os.pathsep + os.environ['PATH']) 233