1 # Copyright 2007 Google Inc. Released under the GPL v2 2 #pylint: disable-msg=C0111 3 4 import re, os, sys, traceback, time, glob, tempfile 5 import logging 6 from autotest_lib.server import installable_object, prebuild, utils 7 from autotest_lib.client.common_lib import base_job, error, autotemp 8 from autotest_lib.client.common_lib import base_packages, packages 9 from autotest_lib.client.common_lib import global_config 10 from autotest_lib.client.common_lib import utils as client_utils 11 from autotest_lib.client.common_lib.cros.graphite import autotest_stats 12 13 AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client' 14 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client' 15 16 17 get_value = global_config.global_config.get_config_value 18 autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild', 19 type=bool, default=False) 20 21 22 class AutodirNotFoundError(Exception): 23 """No Autotest installation could be found.""" 24 25 26 class AutotestFailure(Exception): 27 """Gereric exception class for failures during a test run.""" 28 29 30 class AutotestAbort(AutotestFailure): 31 """ 32 AutotestAborts are thrown when the DUT seems fine, 33 and the test doesn't give us an explicit reason for 34 failure; In this case we have no choice but to abort. 35 """ 36 37 38 class AutotestDeviceError(AutotestFailure): 39 """ 40 Exceptions that inherit from AutotestDeviceError 41 are thrown when we can determine the current 42 state of the DUT and conclude that it probably 43 lead to the test failing; these exceptions lead 44 to failures instead of aborts. 45 """ 46 47 48 class AutotestDeviceNotPingable(AutotestDeviceError): 49 """Error for when a DUT becomes unpingable.""" 50 51 52 class AutotestDeviceNotSSHable(AutotestDeviceError): 53 """Error for when a DUT is pingable but not SSHable.""" 54 55 56 class AutotestDeviceRebooted(AutotestDeviceError): 57 """Error for when a DUT rebooted unexpectedly.""" 58 59 60 class BaseAutotest(installable_object.InstallableObject): 61 """ 62 This class represents the Autotest program. 63 64 Autotest is used to run tests automatically and collect the results. 65 It also supports profilers. 66 67 Implementation details: 68 This is a leaf class in an abstract class hierarchy, it must 69 implement the unimplemented methods in parent classes. 70 """ 71 72 def __init__(self, host=None): 73 self.host = host 74 self.got = False 75 self.installed = False 76 self.serverdir = utils.get_server_dir() 77 super(BaseAutotest, self).__init__() 78 79 80 install_in_tmpdir = False 81 @classmethod 82 def set_install_in_tmpdir(cls, flag): 83 """ Sets a flag that controls whether or not Autotest should by 84 default be installed in a "standard" directory (e.g. 85 /home/autotest, /usr/local/autotest) or a temporary directory. """ 86 cls.install_in_tmpdir = flag 87 88 89 @classmethod 90 def get_client_autodir_paths(cls, host): 91 return global_config.global_config.get_config_value( 92 'AUTOSERV', 'client_autodir_paths', type=list) 93 94 95 @classmethod 96 def get_installed_autodir(cls, host): 97 """ 98 Find where the Autotest client is installed on the host. 99 @returns an absolute path to an installed Autotest client root. 100 @raises AutodirNotFoundError if no Autotest installation can be found. 101 """ 102 autodir = host.get_autodir() 103 if autodir: 104 logging.debug('Using existing host autodir: %s', autodir) 105 return autodir 106 107 for path in Autotest.get_client_autodir_paths(host): 108 try: 109 autotest_binary = os.path.join(path, 'bin', 'autotest') 110 host.run('test -x %s' % utils.sh_escape(autotest_binary)) 111 host.run('test -w %s' % utils.sh_escape(path)) 112 logging.debug('Found existing autodir at %s', path) 113 return path 114 except error.AutoservRunError: 115 logging.debug('%s does not exist on %s', autotest_binary, 116 host.hostname) 117 raise AutodirNotFoundError 118 119 120 @classmethod 121 def get_install_dir(cls, host): 122 """ 123 Determines the location where autotest should be installed on 124 host. If self.install_in_tmpdir is set, it will return a unique 125 temporary directory that autotest can be installed in. Otherwise, looks 126 for an existing installation to use; if none is found, looks for a 127 usable directory in the global config client_autodir_paths. 128 """ 129 try: 130 install_dir = cls.get_installed_autodir(host) 131 except AutodirNotFoundError: 132 install_dir = cls._find_installable_dir(host) 133 134 if cls.install_in_tmpdir: 135 return host.get_tmp_dir(parent=install_dir) 136 return install_dir 137 138 139 @classmethod 140 def _find_installable_dir(cls, host): 141 client_autodir_paths = cls.get_client_autodir_paths(host) 142 for path in client_autodir_paths: 143 try: 144 host.run('mkdir -p %s' % utils.sh_escape(path)) 145 host.run('test -w %s' % utils.sh_escape(path)) 146 return path 147 except error.AutoservRunError: 148 logging.debug('Failed to create %s', path) 149 metadata = {'_type': 'AutoservInstallError', 150 'hostname': host.hostname} 151 autotest_stats.Counter('AutoservInstallError', 152 metadata=metadata).increment() 153 raise error.AutoservInstallError( 154 'Unable to find a place to install Autotest; tried %s' % 155 ', '.join(client_autodir_paths)) 156 157 158 def get_fetch_location(self): 159 c = global_config.global_config 160 repos = c.get_config_value("PACKAGES", 'fetch_location', type=list, 161 default=[]) 162 repos.reverse() 163 return repos 164 165 166 def install(self, host=None, autodir=None, use_packaging=True): 167 self._install(host=host, autodir=autodir, use_packaging=use_packaging) 168 169 170 def install_full_client(self, host=None, autodir=None): 171 self._install(host=host, autodir=autodir, use_autoserv=False, 172 use_packaging=False) 173 174 175 def install_no_autoserv(self, host=None, autodir=None): 176 self._install(host=host, autodir=autodir, use_autoserv=False) 177 178 179 def _install_using_packaging(self, host, autodir): 180 repos = self.get_fetch_location() 181 if not repos: 182 raise error.PackageInstallError("No repos to install an " 183 "autotest client from") 184 pkgmgr = packages.PackageManager(autodir, hostname=host.hostname, 185 repo_urls=repos, 186 do_locking=False, 187 run_function=host.run, 188 run_function_dargs=dict(timeout=600)) 189 # The packages dir is used to store all the packages that 190 # are fetched on that client. (for the tests,deps etc. 191 # too apart from the client) 192 pkg_dir = os.path.join(autodir, 'packages') 193 # clean up the autodir except for the packages directory 194 host.run('cd %s && ls | grep -v "^packages$"' 195 ' | xargs rm -rf && rm -rf .[!.]*' % autodir) 196 pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir, 197 preserve_install_dir=True) 198 self.installed = True 199 200 201 def _install_using_send_file(self, host, autodir): 202 dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers", 203 "packages"]) 204 light_files = [os.path.join(self.source_material, f) 205 for f in os.listdir(self.source_material) 206 if f not in dirs_to_exclude] 207 host.send_file(light_files, autodir, delete_dest=True) 208 209 # create empty dirs for all the stuff we excluded 210 commands = [] 211 for path in dirs_to_exclude: 212 abs_path = os.path.join(autodir, path) 213 abs_path = utils.sh_escape(abs_path) 214 commands.append("mkdir -p '%s'" % abs_path) 215 commands.append("touch '%s'/__init__.py" % abs_path) 216 host.run(';'.join(commands)) 217 218 219 def _install(self, host=None, autodir=None, use_autoserv=True, 220 use_packaging=True): 221 """ 222 Install autotest. If get() was not called previously, an 223 attempt will be made to install from the autotest svn 224 repository. 225 226 @param host A Host instance on which autotest will be installed 227 @param autodir Location on the remote host to install to 228 @param use_autoserv Enable install modes that depend on the client 229 running with the autoserv harness 230 @param use_packaging Enable install modes that use the packaging system 231 232 @exception AutoservError if a tarball was not specified and 233 the target host does not have svn installed in its path 234 """ 235 if not host: 236 host = self.host 237 if not self.got: 238 self.get() 239 host.wait_up(timeout=30) 240 host.setup() 241 logging.info("Installing autotest on %s", host.hostname) 242 243 # set up the autotest directory on the remote machine 244 if not autodir: 245 autodir = self.get_install_dir(host) 246 logging.info('Using installation dir %s', autodir) 247 host.set_autodir(autodir) 248 host.run('mkdir -p %s' % utils.sh_escape(autodir)) 249 250 # make sure there are no files in $AUTODIR/results 251 results_path = os.path.join(autodir, 'results') 252 host.run('rm -rf %s/*' % utils.sh_escape(results_path), 253 ignore_status=True) 254 255 # Fetch the autotest client from the nearest repository 256 if use_packaging: 257 try: 258 self._install_using_packaging(host, autodir) 259 logging.info("Installation of autotest completed using the " 260 "packaging system.") 261 return 262 except (error.PackageInstallError, error.AutoservRunError, 263 global_config.ConfigError), e: 264 logging.info("Could not install autotest using the packaging " 265 "system: %s. Trying other methods", e) 266 else: 267 # Delete the package checksum file to force dut updating local 268 # packages. 269 command = ('rm -f "%s"' % 270 (os.path.join(autodir, base_packages.CHECKSUM_FILE))) 271 host.run(command) 272 273 # try to install from file or directory 274 if self.source_material: 275 c = global_config.global_config 276 supports_autoserv_packaging = c.get_config_value( 277 "PACKAGES", "serve_packages_from_autoserv", type=bool) 278 # Copy autotest recursively 279 if supports_autoserv_packaging and use_autoserv: 280 self._install_using_send_file(host, autodir) 281 else: 282 host.send_file(self.source_material, autodir, delete_dest=True) 283 logging.info("Installation of autotest completed from %s", 284 self.source_material) 285 self.installed = True 286 return 287 288 # if that fails try to install using svn 289 if utils.run('which svn').exit_status: 290 raise error.AutoservError('svn not found on target machine: %s' % 291 host.name) 292 try: 293 host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir)) 294 except error.AutoservRunError, e: 295 host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir)) 296 logging.info("Installation of autotest completed using SVN.") 297 self.installed = True 298 299 300 def uninstall(self, host=None): 301 """ 302 Uninstall (i.e. delete) autotest. Removes the autotest client install 303 from the specified host. 304 305 @params host a Host instance from which the client will be removed 306 """ 307 if not self.installed: 308 return 309 if not host: 310 host = self.host 311 autodir = host.get_autodir() 312 if not autodir: 313 return 314 315 # perform the actual uninstall 316 host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True) 317 host.set_autodir(None) 318 self.installed = False 319 320 321 def get(self, location=None): 322 if not location: 323 location = os.path.join(self.serverdir, '../client') 324 location = os.path.abspath(location) 325 # If there's stuff run on our client directory already, it 326 # can cause problems. Try giving it a quick clean first. 327 cwd = os.getcwd() 328 os.chdir(location) 329 try: 330 utils.system('tools/make_clean', ignore_status=True) 331 finally: 332 os.chdir(cwd) 333 super(BaseAutotest, self).get(location) 334 self.got = True 335 336 337 def run(self, control_file, results_dir='.', host=None, timeout=None, 338 tag=None, parallel_flag=False, background=False, 339 client_disconnect_timeout=None, use_packaging=True): 340 """ 341 Run an autotest job on the remote machine. 342 343 @param control_file: An open file-like-obj of the control file. 344 @param results_dir: A str path where the results should be stored 345 on the local filesystem. 346 @param host: A Host instance on which the control file should 347 be run. 348 @param timeout: Maximum number of seconds to wait for the run or None. 349 @param tag: Tag name for the client side instance of autotest. 350 @param parallel_flag: Flag set when multiple jobs are run at the 351 same time. 352 @param background: Indicates that the client should be launched as 353 a background job; the code calling run will be responsible 354 for monitoring the client and collecting the results. 355 @param client_disconnect_timeout: Seconds to wait for the remote host 356 to come back after a reboot. Defaults to the host setting for 357 DEFAULT_REBOOT_TIMEOUT. 358 359 @raises AutotestRunError: If there is a problem executing 360 the control file. 361 """ 362 host = self._get_host_and_setup(host, use_packaging=use_packaging) 363 results_dir = os.path.abspath(results_dir) 364 365 if client_disconnect_timeout is None: 366 client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT 367 368 if tag: 369 results_dir = os.path.join(results_dir, tag) 370 371 atrun = _Run(host, results_dir, tag, parallel_flag, background) 372 self._do_run(control_file, results_dir, host, atrun, timeout, 373 client_disconnect_timeout, use_packaging=use_packaging) 374 375 376 def _get_host_and_setup(self, host, use_packaging=True): 377 if not host: 378 host = self.host 379 if not self.installed: 380 self.install(host, use_packaging=use_packaging) 381 382 host.wait_up(timeout=30) 383 return host 384 385 386 def _do_run(self, control_file, results_dir, host, atrun, timeout, 387 client_disconnect_timeout, use_packaging=True): 388 try: 389 atrun.verify_machine() 390 except: 391 logging.error("Verify failed on %s. Reinstalling autotest", 392 host.hostname) 393 self.install(host) 394 atrun.verify_machine() 395 debug = os.path.join(results_dir, 'debug') 396 try: 397 os.makedirs(debug) 398 except Exception: 399 pass 400 401 delete_file_list = [atrun.remote_control_file, 402 atrun.remote_control_file + '.state', 403 atrun.manual_control_file, 404 atrun.manual_control_file + '.state'] 405 cmd = ';'.join('rm -f ' + control for control in delete_file_list) 406 host.run(cmd, ignore_status=True) 407 408 tmppath = utils.get(control_file, local_copy=True) 409 410 # build up the initialization prologue for the control file 411 prologue_lines = [] 412 413 # Add the additional user arguments 414 prologue_lines.append("args = %r\n" % self.job.args) 415 416 # If the packaging system is being used, add the repository list. 417 repos = None 418 try: 419 if use_packaging: 420 repos = self.get_fetch_location() 421 prologue_lines.append('job.add_repository(%s)\n' % repos) 422 else: 423 logging.debug('use_packaging is set to False, do not add any ' 424 'repository.') 425 except global_config.ConfigError, e: 426 # If repos is defined packaging is enabled so log the error 427 if repos: 428 logging.error(e) 429 430 # on full-size installs, turn on any profilers the server is using 431 if not atrun.background: 432 running_profilers = host.job.profilers.add_log.iteritems() 433 for profiler, (args, dargs) in running_profilers: 434 call_args = [repr(profiler)] 435 call_args += [repr(arg) for arg in args] 436 call_args += ["%s=%r" % item for item in dargs.iteritems()] 437 prologue_lines.append("job.profilers.add(%s)\n" 438 % ", ".join(call_args)) 439 cfile = "".join(prologue_lines) 440 441 cfile += open(tmppath).read() 442 open(tmppath, "w").write(cfile) 443 444 # Create and copy state file to remote_control_file + '.state' 445 state_file = host.job.preprocess_client_state() 446 host.send_file(state_file, atrun.remote_control_file + '.init.state') 447 os.remove(state_file) 448 449 # Copy control_file to remote_control_file on the host 450 host.send_file(tmppath, atrun.remote_control_file) 451 if os.path.abspath(tmppath) != os.path.abspath(control_file): 452 os.remove(tmppath) 453 454 atrun.execute_control( 455 timeout=timeout, 456 client_disconnect_timeout=client_disconnect_timeout) 457 458 459 def run_timed_test(self, test_name, results_dir='.', host=None, 460 timeout=None, parallel_flag=False, background=False, 461 client_disconnect_timeout=None, *args, **dargs): 462 """ 463 Assemble a tiny little control file to just run one test, 464 and run it as an autotest client-side test 465 """ 466 if not host: 467 host = self.host 468 if not self.installed: 469 self.install(host) 470 opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()] 471 cmd = ", ".join([repr(test_name)] + map(repr, args) + opts) 472 control = "job.run_test(%s)\n" % cmd 473 self.run(control, results_dir, host, timeout=timeout, 474 parallel_flag=parallel_flag, background=background, 475 client_disconnect_timeout=client_disconnect_timeout) 476 477 478 def run_test(self, test_name, results_dir='.', host=None, 479 parallel_flag=False, background=False, 480 client_disconnect_timeout=None, *args, **dargs): 481 self.run_timed_test(test_name, results_dir, host, timeout=None, 482 parallel_flag=parallel_flag, background=background, 483 client_disconnect_timeout=client_disconnect_timeout, 484 *args, **dargs) 485 486 487 class _BaseRun(object): 488 """ 489 Represents a run of autotest control file. This class maintains 490 all the state necessary as an autotest control file is executed. 491 492 It is not intended to be used directly, rather control files 493 should be run using the run method in Autotest. 494 """ 495 def __init__(self, host, results_dir, tag, parallel_flag, background): 496 self.host = host 497 self.results_dir = results_dir 498 self.env = host.env 499 self.tag = tag 500 self.parallel_flag = parallel_flag 501 self.background = background 502 self.autodir = Autotest.get_installed_autodir(self.host) 503 control = os.path.join(self.autodir, 'control') 504 if tag: 505 control += '.' + tag 506 self.manual_control_file = control 507 self.remote_control_file = control + '.autoserv' 508 self.config_file = os.path.join(self.autodir, 'global_config.ini') 509 510 511 def verify_machine(self): 512 binary = os.path.join(self.autodir, 'bin/autotest') 513 try: 514 self.host.run('ls %s > /dev/null 2>&1' % binary) 515 except: 516 raise error.AutoservInstallError( 517 "Autotest does not appear to be installed") 518 519 if not self.parallel_flag: 520 tmpdir = os.path.join(self.autodir, 'tmp') 521 download = os.path.join(self.autodir, 'tests/download') 522 self.host.run('umount %s' % tmpdir, ignore_status=True) 523 self.host.run('umount %s' % download, ignore_status=True) 524 525 526 def get_base_cmd_args(self, section): 527 args = ['--verbose'] 528 if section > 0: 529 args.append('-c') 530 if self.tag: 531 args.append('-t %s' % self.tag) 532 if self.host.job.use_external_logging(): 533 args.append('-l') 534 if self.host.hostname: 535 args.append('--hostname=%s' % self.host.hostname) 536 args.append('--user=%s' % self.host.job.user) 537 538 args.append(self.remote_control_file) 539 return args 540 541 542 def get_background_cmd(self, section): 543 cmd = ['nohup', os.path.join(self.autodir, 'bin/autotest_client')] 544 cmd += self.get_base_cmd_args(section) 545 cmd += ['>/dev/null', '2>/dev/null', '&'] 546 return ' '.join(cmd) 547 548 549 def get_daemon_cmd(self, section, monitor_dir): 550 cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'), 551 monitor_dir, '-H autoserv'] 552 cmd += self.get_base_cmd_args(section) 553 cmd += ['>/dev/null', '2>/dev/null', '&'] 554 return ' '.join(cmd) 555 556 557 def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read): 558 cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'), 559 monitor_dir, str(stdout_read), str(stderr_read)] 560 return ' '.join(cmd) 561 562 563 def get_client_log(self): 564 """Find what the "next" client.* prefix should be 565 566 @returns A string of the form client.INTEGER that should be prefixed 567 to all client debug log files. 568 """ 569 max_digit = -1 570 debug_dir = os.path.join(self.results_dir, 'debug') 571 client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*')) 572 for log in client_logs: 573 _, number, _ = log.split('.', 2) 574 if number.isdigit(): 575 max_digit = max(max_digit, int(number)) 576 return 'client.%d' % (max_digit + 1) 577 578 579 def copy_client_config_file(self, client_log_prefix=None): 580 """ 581 Create and copy the client config file based on the server config. 582 583 @param client_log_prefix: Optional prefix to prepend to log files. 584 """ 585 client_config_file = self._create_client_config_file(client_log_prefix) 586 self.host.send_file(client_config_file, self.config_file) 587 os.remove(client_config_file) 588 589 590 def _create_client_config_file(self, client_log_prefix=None): 591 """ 592 Create a temporary file with the [CLIENT] section configuration values 593 taken from the server global_config.ini. 594 595 @param client_log_prefix: Optional prefix to prepend to log files. 596 597 @return: Path of the temporary file generated. 598 """ 599 config = global_config.global_config.get_section_values('CLIENT') 600 if client_log_prefix: 601 config.set('CLIENT', 'default_logging_name', client_log_prefix) 602 return self._create_aux_file(config.write) 603 604 605 def _create_aux_file(self, func, *args): 606 """ 607 Creates a temporary file and writes content to it according to a 608 content creation function. The file object is appended to *args, which 609 is then passed to the content creation function 610 611 @param func: Function that will be used to write content to the 612 temporary file. 613 @param *args: List of parameters that func takes. 614 @return: Path to the temporary file that was created. 615 """ 616 fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir) 617 aux_file = os.fdopen(fd, "w") 618 try: 619 list_args = list(args) 620 list_args.append(aux_file) 621 func(*list_args) 622 finally: 623 aux_file.close() 624 return path 625 626 627 @staticmethod 628 def is_client_job_finished(last_line): 629 return bool(re.match(r'^END .*\t----\t----\t.*$', last_line)) 630 631 632 @staticmethod 633 def is_client_job_rebooting(last_line): 634 return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line)) 635 636 637 def _diagnose_dut(self, old_boot_id=None): 638 """ 639 Run diagnostic checks on a DUT. 640 641 1. ping: A dead host will not respond to pings. 642 2. ssh (happens with 3.): DUT hangs usually fail in authentication 643 but respond to pings. 644 3. Check if a reboot occured: A healthy but unexpected reboot leaves the 645 host running with a new boot id. 646 647 This method will always raise an exception from the AutotestFailure 648 family and should only get called when the reason for a test failing 649 is ambiguous. 650 651 @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping. 652 @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT. 653 @raises AutotestDeviceRebooted: If the boot id changed. 654 @raises AutotestAbort: If none of the above exceptions were raised. 655 Since we have no recourse we must abort at this stage. 656 """ 657 msg = 'Autotest client terminated unexpectedly: ' 658 if utils.ping(self.host.hostname, tries=1, deadline=1) != 0: 659 msg += 'DUT is no longer pingable, it may have rebooted or hung.\n' 660 raise AutotestDeviceNotPingable(msg) 661 662 if old_boot_id: 663 try: 664 new_boot_id = self.host.get_boot_id(timeout=60) 665 except Exception as e: 666 msg += ('DUT is pingable but not SSHable, it most likely' 667 ' sporadically rebooted during testing. %s\n' % str(e)) 668 raise AutotestDeviceNotSSHable(msg) 669 else: 670 if new_boot_id != old_boot_id: 671 msg += 'DUT rebooted during the test run.\n' 672 raise AutotestDeviceRebooted(msg) 673 674 msg += ('DUT is pingable, SSHable and did NOT restart ' 675 'un-expectedly. We probably lost connectivity during the ' 676 'test.') 677 else: 678 msg += ('DUT is pingable, could not determine if an un-expected ' 679 'reboot occured during the test.') 680 681 raise AutotestAbort(msg) 682 683 684 def log_unexpected_abort(self, stderr_redirector, old_boot_id=None): 685 """ 686 Logs that something unexpected happened, then tries to diagnose the 687 failure. The purpose of this function is only to close out the status 688 log with the appropriate error message, not to critically terminate 689 the program. 690 691 @param stderr_redirector: log stream. 692 @param old_boot_id: boot id used to infer if a reboot occured. 693 """ 694 stderr_redirector.flush_all_buffers() 695 try: 696 self._diagnose_dut(old_boot_id) 697 except AutotestFailure as e: 698 self.host.job.record('END ABORT', None, None, str(e)) 699 700 701 def _execute_in_background(self, section, timeout): 702 full_cmd = self.get_background_cmd(section) 703 devnull = open(os.devnull, "w") 704 705 self.copy_client_config_file(self.get_client_log()) 706 707 self.host.job.push_execution_context(self.results_dir) 708 try: 709 result = self.host.run(full_cmd, ignore_status=True, 710 timeout=timeout, 711 stdout_tee=devnull, 712 stderr_tee=devnull) 713 finally: 714 self.host.job.pop_execution_context() 715 716 return result 717 718 719 @staticmethod 720 def _strip_stderr_prologue(stderr): 721 """Strips the 'standard' prologue that get pre-pended to every 722 remote command and returns the text that was actually written to 723 stderr by the remote command.""" 724 stderr_lines = stderr.split("\n")[1:] 725 if not stderr_lines: 726 return "" 727 elif stderr_lines[0].startswith("NOTE: autotestd_monitor"): 728 del stderr_lines[0] 729 return "\n".join(stderr_lines) 730 731 732 def _execute_daemon(self, section, timeout, stderr_redirector, 733 client_disconnect_timeout): 734 monitor_dir = self.host.get_tmp_dir() 735 daemon_cmd = self.get_daemon_cmd(section, monitor_dir) 736 737 # grab the location for the server-side client log file 738 client_log_prefix = self.get_client_log() 739 client_log_path = os.path.join(self.results_dir, 'debug', 740 client_log_prefix + '.log') 741 client_log = open(client_log_path, 'w', 0) 742 self.copy_client_config_file(client_log_prefix) 743 744 stdout_read = stderr_read = 0 745 self.host.job.push_execution_context(self.results_dir) 746 try: 747 self.host.run(daemon_cmd, ignore_status=True, timeout=timeout) 748 disconnect_warnings = [] 749 while True: 750 monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read, 751 stderr_read) 752 try: 753 result = self.host.run(monitor_cmd, ignore_status=True, 754 timeout=timeout, 755 stdout_tee=client_log, 756 stderr_tee=stderr_redirector) 757 except error.AutoservRunError, e: 758 result = e.result_obj 759 result.exit_status = None 760 disconnect_warnings.append(e.description) 761 762 stderr_redirector.log_warning( 763 "Autotest client was disconnected: %s" % e.description, 764 "NETWORK") 765 except error.AutoservSSHTimeout: 766 result = utils.CmdResult(monitor_cmd, "", "", None, 0) 767 stderr_redirector.log_warning( 768 "Attempt to connect to Autotest client timed out", 769 "NETWORK") 770 771 stdout_read += len(result.stdout) 772 stderr_read += len(self._strip_stderr_prologue(result.stderr)) 773 774 if result.exit_status is not None: 775 # TODO (crosbug.com/38224)- sbasi: Remove extra logging. 776 logging.debug('Result exit status is %d.', 777 result.exit_status) 778 return result 779 elif not self.host.wait_up(client_disconnect_timeout): 780 raise error.AutoservSSHTimeout( 781 "client was disconnected, reconnect timed out") 782 finally: 783 client_log.close() 784 self.host.job.pop_execution_context() 785 786 787 def execute_section(self, section, timeout, stderr_redirector, 788 client_disconnect_timeout): 789 logging.info("Executing %s/bin/autotest %s/control phase %d", 790 self.autodir, self.autodir, section) 791 792 if self.background: 793 result = self._execute_in_background(section, timeout) 794 else: 795 result = self._execute_daemon(section, timeout, stderr_redirector, 796 client_disconnect_timeout) 797 798 last_line = stderr_redirector.last_line 799 800 # check if we failed hard enough to warrant an exception 801 if result.exit_status == 1: 802 err = error.AutotestRunError("client job was aborted") 803 elif not self.background and not result.stderr: 804 err = error.AutotestRunError( 805 "execute_section %s failed to return anything\n" 806 "stdout:%s\n" % (section, result.stdout)) 807 else: 808 err = None 809 810 # log something if the client failed AND never finished logging 811 if err and not self.is_client_job_finished(last_line): 812 self.log_unexpected_abort(stderr_redirector) 813 814 if err: 815 raise err 816 else: 817 return stderr_redirector.last_line 818 819 820 def _wait_for_reboot(self, old_boot_id): 821 logging.info("Client is rebooting") 822 logging.info("Waiting for client to halt") 823 if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT, 824 old_boot_id=old_boot_id): 825 err = "%s failed to shutdown after %d" 826 err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT) 827 raise error.AutotestRunError(err) 828 logging.info("Client down, waiting for restart") 829 if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT): 830 # since reboot failed 831 # hardreset the machine once if possible 832 # before failing this control file 833 warning = "%s did not come back up, hard resetting" 834 warning %= self.host.hostname 835 logging.warning(warning) 836 try: 837 self.host.hardreset(wait=False) 838 except (AttributeError, error.AutoservUnsupportedError): 839 warning = "Hard reset unsupported on %s" 840 warning %= self.host.hostname 841 logging.warning(warning) 842 raise error.AutotestRunError("%s failed to boot after %ds" % 843 (self.host.hostname, 844 self.host.DEFAULT_REBOOT_TIMEOUT)) 845 self.host.reboot_followup() 846 847 848 def execute_control(self, timeout=None, client_disconnect_timeout=None): 849 if not self.background: 850 collector = log_collector(self.host, self.tag, self.results_dir) 851 hostname = self.host.hostname 852 remote_results = collector.client_results_dir 853 local_results = collector.server_results_dir 854 self.host.job.add_client_log(hostname, remote_results, 855 local_results) 856 job_record_context = self.host.job.get_record_context() 857 858 section = 0 859 start_time = time.time() 860 861 logger = client_logger(self.host, self.tag, self.results_dir) 862 try: 863 while not timeout or time.time() < start_time + timeout: 864 if timeout: 865 section_timeout = start_time + timeout - time.time() 866 else: 867 section_timeout = None 868 boot_id = self.host.get_boot_id() 869 last = self.execute_section(section, section_timeout, 870 logger, client_disconnect_timeout) 871 if self.background: 872 return 873 section += 1 874 if self.is_client_job_finished(last): 875 logging.info("Client complete") 876 return 877 elif self.is_client_job_rebooting(last): 878 try: 879 self._wait_for_reboot(boot_id) 880 except error.AutotestRunError, e: 881 self.host.job.record("ABORT", None, "reboot", str(e)) 882 self.host.job.record("END ABORT", None, None, str(e)) 883 raise 884 continue 885 886 # If a test fails without probable cause we try to bucket it's 887 # failure into one of 2 categories. If we can determine the 888 # current state of the device and it is suspicious, we close the 889 # status lines indicating a failure. If we either cannot 890 # determine the state of the device, or it appears totally 891 # healthy, we give up and abort. 892 try: 893 self._diagnose_dut(boot_id) 894 except AutotestDeviceError as e: 895 # The status lines of the test are pretty much tailed to 896 # our log, with indentation, from the client job on the DUT. 897 # So if the DUT goes down unexpectedly we'll end up with a 898 # malformed status log unless we manually unwind the status 899 # stack. Ideally we would want to write a nice wrapper like 900 # server_job methods run_reboot, run_group but they expect 901 # reboots and we don't. 902 self.host.job.record('FAIL', None, None, str(e)) 903 self.host.job.record('END FAIL', None, None) 904 self.host.job.record('END GOOD', None, None) 905 self.host.job.failed_with_device_error = True 906 return 907 except AutotestAbort as e: 908 self.host.job.record('ABORT', None, None, str(e)) 909 self.host.job.record('END ABORT', None, None) 910 911 # give the client machine a chance to recover from a crash 912 self.host.wait_up( 913 self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600) 914 msg = ("Aborting - unexpected final status message from " 915 "client on %s: %s\n") % (self.host.hostname, last) 916 raise error.AutotestRunError(msg) 917 finally: 918 logger.close() 919 if not self.background: 920 collector.collect_client_job_results() 921 collector.remove_redundant_client_logs() 922 state_file = os.path.basename(self.remote_control_file 923 + '.state') 924 state_path = os.path.join(self.results_dir, state_file) 925 self.host.job.postprocess_client_state(state_path) 926 self.host.job.remove_client_log(hostname, remote_results, 927 local_results) 928 job_record_context.restore() 929 930 # should only get here if we timed out 931 assert timeout 932 raise error.AutotestTimeoutError() 933 934 935 class log_collector(object): 936 def __init__(self, host, client_tag, results_dir): 937 self.host = host 938 if not client_tag: 939 client_tag = "default" 940 self.client_results_dir = os.path.join(host.get_autodir(), "results", 941 client_tag) 942 self.server_results_dir = results_dir 943 944 945 def collect_client_job_results(self): 946 """ A method that collects all the current results of a running 947 client job into the results dir. By default does nothing as no 948 client job is running, but when running a client job you can override 949 this with something that will actually do something. """ 950 951 # make an effort to wait for the machine to come up 952 try: 953 self.host.wait_up(timeout=30) 954 except error.AutoservError: 955 # don't worry about any errors, we'll try and 956 # get the results anyway 957 pass 958 959 # Copy all dirs in default to results_dir 960 timer = autotest_stats.Timer('collect_client_job_results') 961 timer.start() 962 try: 963 self.host.get_file(self.client_results_dir + '/', 964 self.server_results_dir, preserve_symlinks=True) 965 966 # Only report time used for successful get_file calls. 967 timer.stop(); 968 except Exception: 969 # well, don't stop running just because we couldn't get logs 970 e_msg = "Unexpected error copying test result logs, continuing ..." 971 logging.error(e_msg) 972 traceback.print_exc(file=sys.stdout) 973 974 975 def remove_redundant_client_logs(self): 976 """Remove client.*.log files in favour of client.*.DEBUG files.""" 977 debug_dir = os.path.join(self.server_results_dir, 'debug') 978 debug_files = [f for f in os.listdir(debug_dir) 979 if re.search(r'^client\.\d+\.DEBUG$', f)] 980 for debug_file in debug_files: 981 log_file = debug_file.replace('DEBUG', 'log') 982 log_file = os.path.join(debug_dir, log_file) 983 if os.path.exists(log_file): 984 os.remove(log_file) 985 986 987 # a file-like object for catching stderr from an autotest client and 988 # extracting status logs from it 989 class BaseClientLogger(object): 990 """Partial file object to write to both stdout and 991 the status log file. We only implement those methods 992 utils.run() actually calls. 993 """ 994 status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$") 995 test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$") 996 fetch_package_parser = re.compile( 997 r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$") 998 extract_indent = re.compile(r"^(\t*).*$") 999 extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$") 1000 1001 def __init__(self, host, tag, server_results_dir): 1002 self.host = host 1003 self.job = host.job 1004 self.log_collector = log_collector(host, tag, server_results_dir) 1005 self.leftover = "" 1006 self.last_line = "" 1007 self.logs = {} 1008 1009 1010 def _process_log_dict(self, log_dict): 1011 log_list = log_dict.pop("logs", []) 1012 for key in sorted(log_dict.iterkeys()): 1013 log_list += self._process_log_dict(log_dict.pop(key)) 1014 return log_list 1015 1016 1017 def _process_logs(self): 1018 """Go through the accumulated logs in self.log and print them 1019 out to stdout and the status log. Note that this processes 1020 logs in an ordering where: 1021 1022 1) logs to different tags are never interleaved 1023 2) logs to x.y come before logs to x.y.z for all z 1024 3) logs to x.y come before x.z whenever y < z 1025 1026 Note that this will in general not be the same as the 1027 chronological ordering of the logs. However, if a chronological 1028 ordering is desired that one can be reconstructed from the 1029 status log by looking at timestamp lines.""" 1030 log_list = self._process_log_dict(self.logs) 1031 for entry in log_list: 1032 self.job.record_entry(entry, log_in_subdir=False) 1033 if log_list: 1034 self.last_line = log_list[-1].render() 1035 1036 1037 def _process_quoted_line(self, tag, line): 1038 """Process a line quoted with an AUTOTEST_STATUS flag. If the 1039 tag is blank then we want to push out all the data we've been 1040 building up in self.logs, and then the newest line. If the 1041 tag is not blank, then push the line into the logs for handling 1042 later.""" 1043 entry = base_job.status_log_entry.parse(line) 1044 if entry is None: 1045 return # the line contains no status lines 1046 if tag == "": 1047 self._process_logs() 1048 self.job.record_entry(entry, log_in_subdir=False) 1049 self.last_line = line 1050 else: 1051 tag_parts = [int(x) for x in tag.split(".")] 1052 log_dict = self.logs 1053 for part in tag_parts: 1054 log_dict = log_dict.setdefault(part, {}) 1055 log_list = log_dict.setdefault("logs", []) 1056 log_list.append(entry) 1057 1058 1059 def _process_info_line(self, line): 1060 """Check if line is an INFO line, and if it is, interpret any control 1061 messages (e.g. enabling/disabling warnings) that it may contain.""" 1062 match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line) 1063 if not match: 1064 return # not an INFO line 1065 for field in match.group(1).split('\t'): 1066 if field.startswith("warnings.enable="): 1067 func = self.job.warning_manager.enable_warnings 1068 elif field.startswith("warnings.disable="): 1069 func = self.job.warning_manager.disable_warnings 1070 else: 1071 continue 1072 warning_type = field.split("=", 1)[1] 1073 func(warning_type) 1074 1075 1076 def _process_line(self, line): 1077 """Write out a line of data to the appropriate stream. Status 1078 lines sent by autotest will be prepended with 1079 "AUTOTEST_STATUS", and all other lines are ssh error 1080 messages.""" 1081 status_match = self.status_parser.search(line) 1082 test_complete_match = self.test_complete_parser.search(line) 1083 fetch_package_match = self.fetch_package_parser.search(line) 1084 if status_match: 1085 tag, line = status_match.groups() 1086 self._process_info_line(line) 1087 self._process_quoted_line(tag, line) 1088 elif test_complete_match: 1089 self._process_logs() 1090 fifo_path, = test_complete_match.groups() 1091 try: 1092 self.log_collector.collect_client_job_results() 1093 self.host.run("echo A > %s" % fifo_path) 1094 except Exception: 1095 msg = "Post-test log collection failed, continuing anyway" 1096 logging.exception(msg) 1097 elif fetch_package_match: 1098 pkg_name, dest_path, fifo_path = fetch_package_match.groups() 1099 serve_packages = global_config.global_config.get_config_value( 1100 "PACKAGES", "serve_packages_from_autoserv", type=bool) 1101 if serve_packages and pkg_name.endswith(".tar.bz2"): 1102 try: 1103 self._send_tarball(pkg_name, dest_path) 1104 except Exception: 1105 msg = "Package tarball creation failed, continuing anyway" 1106 logging.exception(msg) 1107 try: 1108 self.host.run("echo B > %s" % fifo_path) 1109 except Exception: 1110 msg = "Package tarball installation failed, continuing anyway" 1111 logging.exception(msg) 1112 else: 1113 logging.info(line) 1114 1115 1116 def _send_tarball(self, pkg_name, remote_dest): 1117 name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name) 1118 src_dirs = [] 1119 if pkg_type == 'test': 1120 for test_dir in ['site_tests', 'tests']: 1121 src_dir = os.path.join(self.job.clientdir, test_dir, name) 1122 if os.path.exists(src_dir): 1123 src_dirs += [src_dir] 1124 if autoserv_prebuild: 1125 prebuild.setup(self.job.clientdir, src_dir) 1126 break 1127 elif pkg_type == 'profiler': 1128 src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)] 1129 if autoserv_prebuild: 1130 prebuild.setup(self.job.clientdir, src_dir) 1131 elif pkg_type == 'dep': 1132 src_dirs += [os.path.join(self.job.clientdir, 'deps', name)] 1133 elif pkg_type == 'client': 1134 return # you must already have a client to hit this anyway 1135 else: 1136 return # no other types are supported 1137 1138 # iterate over src_dirs until we find one that exists, then tar it 1139 for src_dir in src_dirs: 1140 if os.path.exists(src_dir): 1141 try: 1142 logging.info('Bundling %s into %s', src_dir, pkg_name) 1143 temp_dir = autotemp.tempdir(unique_id='autoserv-packager', 1144 dir=self.job.tmpdir) 1145 tarball_path = self.job.pkgmgr.tar_package( 1146 pkg_name, src_dir, temp_dir.name, " .") 1147 self.host.send_file(tarball_path, remote_dest) 1148 finally: 1149 temp_dir.clean() 1150 return 1151 1152 1153 def log_warning(self, msg, warning_type): 1154 """Injects a WARN message into the current status logging stream.""" 1155 timestamp = int(time.time()) 1156 if self.job.warning_manager.is_valid(timestamp, warning_type): 1157 self.job.record('WARN', None, None, msg) 1158 1159 1160 def write(self, data): 1161 # now start processing the existing buffer and the new data 1162 data = self.leftover + data 1163 lines = data.split('\n') 1164 processed_lines = 0 1165 try: 1166 # process all the buffered data except the last line 1167 # ignore the last line since we may not have all of it yet 1168 for line in lines[:-1]: 1169 self._process_line(line) 1170 processed_lines += 1 1171 finally: 1172 # save any unprocessed lines for future processing 1173 self.leftover = '\n'.join(lines[processed_lines:]) 1174 1175 1176 def flush(self): 1177 sys.stdout.flush() 1178 1179 1180 def flush_all_buffers(self): 1181 if self.leftover: 1182 self._process_line(self.leftover) 1183 self.leftover = "" 1184 self._process_logs() 1185 self.flush() 1186 1187 1188 def close(self): 1189 self.flush_all_buffers() 1190 1191 1192 SiteAutotest = client_utils.import_site_class( 1193 __file__, "autotest_lib.server.site_autotest", "SiteAutotest", 1194 BaseAutotest) 1195 1196 1197 _SiteRun = client_utils.import_site_class( 1198 __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun) 1199 1200 1201 SiteClientLogger = client_utils.import_site_class( 1202 __file__, "autotest_lib.server.site_autotest", "SiteClientLogger", 1203 BaseClientLogger) 1204 1205 1206 class Autotest(SiteAutotest): 1207 pass 1208 1209 1210 class client_logger(SiteClientLogger): 1211 pass 1212 1213 1214 class _Run(_SiteRun): 1215 pass 1216