1 # Copyright 2007 Google Inc. Released under the GPL v2 2 #pylint: disable-msg=C0111 3 4 import re, os, sys, traceback, time, glob, tempfile 5 import logging 6 7 import common 8 from autotest_lib.server import installable_object, prebuild, utils 9 from autotest_lib.client.common_lib import base_job, error, autotemp 10 from autotest_lib.client.common_lib import packages 11 from autotest_lib.client.common_lib import global_config 12 from autotest_lib.client.common_lib import utils as client_utils 13 14 try: 15 from chromite.lib import metrics 16 except ImportError: 17 metrics = client_utils.metrics_mock 18 19 20 AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client' 21 AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client' 22 23 24 get_value = global_config.global_config.get_config_value 25 autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild', 26 type=bool, default=False) 27 28 29 class AutodirNotFoundError(Exception): 30 """No Autotest installation could be found.""" 31 32 33 class AutotestFailure(Exception): 34 """Gereric exception class for failures during a test run.""" 35 36 37 class AutotestAbort(AutotestFailure): 38 """ 39 AutotestAborts are thrown when the DUT seems fine, 40 and the test doesn't give us an explicit reason for 41 failure; In this case we have no choice but to abort. 42 """ 43 44 45 class AutotestDeviceError(AutotestFailure): 46 """ 47 Exceptions that inherit from AutotestDeviceError 48 are thrown when we can determine the current 49 state of the DUT and conclude that it probably 50 lead to the test failing; these exceptions lead 51 to failures instead of aborts. 52 """ 53 54 55 class AutotestDeviceNotPingable(AutotestDeviceError): 56 """Error for when a DUT becomes unpingable.""" 57 58 59 class AutotestDeviceNotSSHable(AutotestDeviceError): 60 """Error for when a DUT is pingable but not SSHable.""" 61 62 63 class AutotestDeviceRebooted(AutotestDeviceError): 64 """Error for when a DUT rebooted unexpectedly.""" 65 66 67 class BaseAutotest(installable_object.InstallableObject): 68 """ 69 This class represents the Autotest program. 70 71 Autotest is used to run tests automatically and collect the results. 72 It also supports profilers. 73 74 Implementation details: 75 This is a leaf class in an abstract class hierarchy, it must 76 implement the unimplemented methods in parent classes. 77 """ 78 79 def __init__(self, host=None): 80 self.host = host 81 self.got = False 82 self.installed = False 83 self.serverdir = utils.get_server_dir() 84 super(BaseAutotest, self).__init__() 85 86 87 install_in_tmpdir = False 88 @classmethod 89 def set_install_in_tmpdir(cls, flag): 90 """ Sets a flag that controls whether or not Autotest should by 91 default be installed in a "standard" directory (e.g. 92 /home/autotest, /usr/local/autotest) or a temporary directory. """ 93 cls.install_in_tmpdir = flag 94 95 96 @classmethod 97 def get_client_autodir_paths(cls, host): 98 return global_config.global_config.get_config_value( 99 'AUTOSERV', 'client_autodir_paths', type=list) 100 101 102 @classmethod 103 def get_installed_autodir(cls, host): 104 """ 105 Find where the Autotest client is installed on the host. 106 @returns an absolute path to an installed Autotest client root. 107 @raises AutodirNotFoundError if no Autotest installation can be found. 108 """ 109 autodir = host.get_autodir() 110 if autodir: 111 logging.debug('Using existing host autodir: %s', autodir) 112 return autodir 113 114 for path in Autotest.get_client_autodir_paths(host): 115 try: 116 autotest_binary = os.path.join(path, 'bin', 'autotest') 117 host.run('test -x %s' % utils.sh_escape(autotest_binary)) 118 host.run('test -w %s' % utils.sh_escape(path)) 119 logging.debug('Found existing autodir at %s', path) 120 return path 121 except error.GenericHostRunError: 122 logging.debug('%s does not exist on %s', autotest_binary, 123 host.hostname) 124 raise AutodirNotFoundError 125 126 127 @classmethod 128 def get_install_dir(cls, host): 129 """ 130 Determines the location where autotest should be installed on 131 host. If self.install_in_tmpdir is set, it will return a unique 132 temporary directory that autotest can be installed in. Otherwise, looks 133 for an existing installation to use; if none is found, looks for a 134 usable directory in the global config client_autodir_paths. 135 """ 136 try: 137 install_dir = cls.get_installed_autodir(host) 138 except AutodirNotFoundError: 139 install_dir = cls._find_installable_dir(host) 140 141 if cls.install_in_tmpdir: 142 return host.get_tmp_dir(parent=install_dir) 143 return install_dir 144 145 146 @classmethod 147 def _find_installable_dir(cls, host): 148 client_autodir_paths = cls.get_client_autodir_paths(host) 149 for path in client_autodir_paths: 150 try: 151 host.run('mkdir -p %s' % utils.sh_escape(path)) 152 host.run('test -w %s' % utils.sh_escape(path)) 153 return path 154 except error.AutoservRunError: 155 logging.debug('Failed to create %s', path) 156 metrics.Counter( 157 'chromeos/autotest/errors/no_autotest_install_path').increment( 158 fields={'dut_host_name': host.hostname}) 159 raise error.AutoservInstallError( 160 'Unable to find a place to install Autotest; tried %s' % 161 ', '.join(client_autodir_paths)) 162 163 164 def get_fetch_location(self): 165 c = global_config.global_config 166 repos = c.get_config_value("PACKAGES", 'fetch_location', type=list, 167 default=[]) 168 repos.reverse() 169 return repos 170 171 172 def install(self, host=None, autodir=None, use_packaging=True): 173 self._install(host=host, autodir=autodir, use_packaging=use_packaging) 174 175 176 def install_full_client(self, host=None, autodir=None): 177 self._install(host=host, autodir=autodir, use_autoserv=False, 178 use_packaging=False) 179 180 181 def install_no_autoserv(self, host=None, autodir=None): 182 self._install(host=host, autodir=autodir, use_autoserv=False) 183 184 185 def _install_using_packaging(self, host, autodir): 186 repos = self.get_fetch_location() 187 if not repos: 188 raise error.PackageInstallError("No repos to install an " 189 "autotest client from") 190 pkgmgr = packages.PackageManager(autodir, hostname=host.hostname, 191 repo_urls=repos, 192 do_locking=False, 193 run_function=host.run, 194 run_function_dargs=dict(timeout=600)) 195 # The packages dir is used to store all the packages that 196 # are fetched on that client. (for the tests,deps etc. 197 # too apart from the client) 198 pkg_dir = os.path.join(autodir, 'packages') 199 # clean up the autodir except for the packages directory 200 host.run('cd %s && ls | grep -v "^packages$"' 201 ' | xargs rm -rf && rm -rf .[!.]*' % autodir) 202 pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir, 203 preserve_install_dir=True) 204 self.installed = True 205 206 207 def _install_using_send_file(self, host, autodir): 208 dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers", 209 "packages"]) 210 light_files = [os.path.join(self.source_material, f) 211 for f in os.listdir(self.source_material) 212 if f not in dirs_to_exclude] 213 host.send_file(light_files, autodir, delete_dest=True) 214 215 # create empty dirs for all the stuff we excluded 216 commands = [] 217 for path in dirs_to_exclude: 218 abs_path = os.path.join(autodir, path) 219 abs_path = utils.sh_escape(abs_path) 220 commands.append("mkdir -p '%s'" % abs_path) 221 commands.append("touch '%s'/__init__.py" % abs_path) 222 host.run(';'.join(commands)) 223 224 225 def _install(self, host=None, autodir=None, use_autoserv=True, 226 use_packaging=True): 227 """ 228 Install autotest. If get() was not called previously, an 229 attempt will be made to install from the autotest svn 230 repository. 231 232 @param host A Host instance on which autotest will be installed 233 @param autodir Location on the remote host to install to 234 @param use_autoserv Enable install modes that depend on the client 235 running with the autoserv harness 236 @param use_packaging Enable install modes that use the packaging system 237 238 @exception AutoservError if a tarball was not specified and 239 the target host does not have svn installed in its path 240 """ 241 if not host: 242 host = self.host 243 if not self.got: 244 self.get() 245 host.wait_up(timeout=30) 246 host.setup() 247 logging.info("Installing autotest on %s", host.hostname) 248 249 # set up the autotest directory on the remote machine 250 if not autodir: 251 autodir = self.get_install_dir(host) 252 logging.info('Using installation dir %s', autodir) 253 host.set_autodir(autodir) 254 host.run('mkdir -p %s' % utils.sh_escape(autodir)) 255 256 # make sure there are no files in $AUTODIR/results 257 results_path = os.path.join(autodir, 'results') 258 host.run('rm -rf %s/*' % utils.sh_escape(results_path), 259 ignore_status=True) 260 261 # Fetch the autotest client from the nearest repository 262 if use_packaging: 263 try: 264 self._install_using_packaging(host, autodir) 265 logging.info("Installation of autotest completed using the " 266 "packaging system.") 267 return 268 except (error.PackageInstallError, error.AutoservRunError, 269 global_config.ConfigError), e: 270 logging.info("Could not install autotest using the packaging " 271 "system: %s. Trying other methods", e) 272 else: 273 # Delete the package checksum file to force dut updating local 274 # packages. 275 command = ('rm -f "%s"' % 276 (os.path.join(autodir, packages.CHECKSUM_FILE))) 277 host.run(command) 278 279 # try to install from file or directory 280 if self.source_material: 281 c = global_config.global_config 282 supports_autoserv_packaging = c.get_config_value( 283 "PACKAGES", "serve_packages_from_autoserv", type=bool) 284 # Copy autotest recursively 285 if supports_autoserv_packaging and use_autoserv: 286 self._install_using_send_file(host, autodir) 287 else: 288 host.send_file(self.source_material, autodir, delete_dest=True) 289 logging.info("Installation of autotest completed from %s", 290 self.source_material) 291 self.installed = True 292 return 293 294 # if that fails try to install using svn 295 if utils.run('which svn').exit_status: 296 raise error.AutoservError('svn not found on target machine: %s' % 297 host.name) 298 try: 299 host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir)) 300 except error.AutoservRunError, e: 301 host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir)) 302 logging.info("Installation of autotest completed using SVN.") 303 self.installed = True 304 305 306 def uninstall(self, host=None): 307 """ 308 Uninstall (i.e. delete) autotest. Removes the autotest client install 309 from the specified host. 310 311 @params host a Host instance from which the client will be removed 312 """ 313 if not self.installed: 314 return 315 if not host: 316 host = self.host 317 autodir = host.get_autodir() 318 if not autodir: 319 return 320 321 # perform the actual uninstall 322 host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True) 323 host.set_autodir(None) 324 self.installed = False 325 326 327 def get(self, location=None): 328 if not location: 329 location = os.path.join(self.serverdir, '../client') 330 location = os.path.abspath(location) 331 # If there's stuff run on our client directory already, it 332 # can cause problems. Try giving it a quick clean first. 333 cwd = os.getcwd() 334 os.chdir(location) 335 try: 336 utils.system('tools/make_clean', ignore_status=True) 337 finally: 338 os.chdir(cwd) 339 super(BaseAutotest, self).get(location) 340 self.got = True 341 342 343 def run(self, control_file, results_dir='.', host=None, timeout=None, 344 tag=None, parallel_flag=False, background=False, 345 client_disconnect_timeout=None, use_packaging=True): 346 """ 347 Run an autotest job on the remote machine. 348 349 @param control_file: An open file-like-obj of the control file. 350 @param results_dir: A str path where the results should be stored 351 on the local filesystem. 352 @param host: A Host instance on which the control file should 353 be run. 354 @param timeout: Maximum number of seconds to wait for the run or None. 355 @param tag: Tag name for the client side instance of autotest. 356 @param parallel_flag: Flag set when multiple jobs are run at the 357 same time. 358 @param background: Indicates that the client should be launched as 359 a background job; the code calling run will be responsible 360 for monitoring the client and collecting the results. 361 @param client_disconnect_timeout: Seconds to wait for the remote host 362 to come back after a reboot. Defaults to the host setting for 363 DEFAULT_REBOOT_TIMEOUT. 364 365 @raises AutotestRunError: If there is a problem executing 366 the control file. 367 """ 368 host = self._get_host_and_setup(host, use_packaging=use_packaging) 369 logging.debug('Autotest job starts on remote host: %s', 370 host.hostname) 371 results_dir = os.path.abspath(results_dir) 372 373 if client_disconnect_timeout is None: 374 client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT 375 376 if tag: 377 results_dir = os.path.join(results_dir, tag) 378 379 atrun = _Run(host, results_dir, tag, parallel_flag, background) 380 self._do_run(control_file, results_dir, host, atrun, timeout, 381 client_disconnect_timeout, use_packaging=use_packaging) 382 383 384 def _get_host_and_setup(self, host, use_packaging=True): 385 if not host: 386 host = self.host 387 if not self.installed: 388 self.install(host, use_packaging=use_packaging) 389 390 host.wait_up(timeout=30) 391 return host 392 393 394 def _do_run(self, control_file, results_dir, host, atrun, timeout, 395 client_disconnect_timeout, use_packaging=True): 396 try: 397 atrun.verify_machine() 398 except: 399 logging.error("Verify failed on %s. Reinstalling autotest", 400 host.hostname) 401 self.install(host) 402 atrun.verify_machine() 403 debug = os.path.join(results_dir, 'debug') 404 try: 405 os.makedirs(debug) 406 except Exception: 407 pass 408 409 delete_file_list = [atrun.remote_control_file, 410 atrun.remote_control_file + '.state', 411 atrun.manual_control_file, 412 atrun.manual_control_file + '.state'] 413 cmd = ';'.join('rm -f ' + control for control in delete_file_list) 414 host.run(cmd, ignore_status=True) 415 416 tmppath = utils.get(control_file, local_copy=True) 417 418 # build up the initialization prologue for the control file 419 prologue_lines = [] 420 421 # Add the additional user arguments 422 prologue_lines.append("args = %r\n" % self.job.args) 423 424 # If the packaging system is being used, add the repository list. 425 repos = None 426 try: 427 if use_packaging: 428 repos = self.get_fetch_location() 429 prologue_lines.append('job.add_repository(%s)\n' % repos) 430 else: 431 logging.debug('use_packaging is set to False, do not add any ' 432 'repository.') 433 except global_config.ConfigError, e: 434 # If repos is defined packaging is enabled so log the error 435 if repos: 436 logging.error(e) 437 438 # on full-size installs, turn on any profilers the server is using 439 if not atrun.background: 440 running_profilers = host.job.profilers.add_log.iteritems() 441 for profiler, (args, dargs) in running_profilers: 442 call_args = [repr(profiler)] 443 call_args += [repr(arg) for arg in args] 444 call_args += ["%s=%r" % item for item in dargs.iteritems()] 445 prologue_lines.append("job.profilers.add(%s)\n" 446 % ", ".join(call_args)) 447 cfile = "".join(prologue_lines) 448 449 cfile += open(tmppath).read() 450 open(tmppath, "w").write(cfile) 451 452 # Create and copy state file to remote_control_file + '.state' 453 state_file = host.job.preprocess_client_state() 454 host.send_file(state_file, atrun.remote_control_file + '.init.state') 455 os.remove(state_file) 456 457 # Copy control_file to remote_control_file on the host 458 host.send_file(tmppath, atrun.remote_control_file) 459 if os.path.abspath(tmppath) != os.path.abspath(control_file): 460 os.remove(tmppath) 461 462 atrun.execute_control( 463 timeout=timeout, 464 client_disconnect_timeout=client_disconnect_timeout) 465 466 467 @classmethod 468 def _check_client_test_result(cls, host, test_name): 469 """ 470 Check result of client test. 471 Autotest will store results in the file name status. 472 We check that second to last line in that file begins with 'END GOOD' 473 474 @raises TestFail: If client test does not pass. 475 """ 476 client_result_dir = '%s/results/default' % host.autodir 477 command = 'tail -2 %s/status | head -1' % client_result_dir 478 status = host.run(command).stdout.strip() 479 logging.info(status) 480 if status[:8] != 'END GOOD': 481 raise error.TestFail('%s client test did not pass.' % test_name) 482 483 484 def run_timed_test(self, test_name, results_dir='.', host=None, 485 timeout=None, parallel_flag=False, background=False, 486 client_disconnect_timeout=None, *args, **dargs): 487 """ 488 Assemble a tiny little control file to just run one test, 489 and run it as an autotest client-side test 490 """ 491 if not host: 492 host = self.host 493 if not self.installed: 494 self.install(host) 495 opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()] 496 cmd = ", ".join([repr(test_name)] + map(repr, args) + opts) 497 control = "job.run_test(%s)\n" % cmd 498 self.run(control, results_dir, host, timeout=timeout, 499 parallel_flag=parallel_flag, background=background, 500 client_disconnect_timeout=client_disconnect_timeout) 501 502 if dargs.get('check_client_result', False): 503 self._check_client_test_result(host, test_name) 504 505 506 def run_test(self, test_name, results_dir='.', host=None, 507 parallel_flag=False, background=False, 508 client_disconnect_timeout=None, *args, **dargs): 509 self.run_timed_test(test_name, results_dir, host, timeout=None, 510 parallel_flag=parallel_flag, background=background, 511 client_disconnect_timeout=client_disconnect_timeout, 512 *args, **dargs) 513 514 515 class _BaseRun(object): 516 """ 517 Represents a run of autotest control file. This class maintains 518 all the state necessary as an autotest control file is executed. 519 520 It is not intended to be used directly, rather control files 521 should be run using the run method in Autotest. 522 """ 523 def __init__(self, host, results_dir, tag, parallel_flag, background): 524 self.host = host 525 self.results_dir = results_dir 526 self.env = host.env 527 self.tag = tag 528 self.parallel_flag = parallel_flag 529 self.background = background 530 self.autodir = Autotest.get_installed_autodir(self.host) 531 control = os.path.join(self.autodir, 'control') 532 if tag: 533 control += '.' + tag 534 self.manual_control_file = control 535 self.remote_control_file = control + '.autoserv' 536 self.config_file = os.path.join(self.autodir, 'global_config.ini') 537 538 539 def verify_machine(self): 540 binary = os.path.join(self.autodir, 'bin/autotest') 541 try: 542 self.host.run('ls %s > /dev/null 2>&1' % binary) 543 except: 544 raise error.AutoservInstallError( 545 "Autotest does not appear to be installed") 546 547 if not self.parallel_flag: 548 tmpdir = os.path.join(self.autodir, 'tmp') 549 download = os.path.join(self.autodir, 'tests/download') 550 self.host.run('umount %s' % tmpdir, ignore_status=True) 551 self.host.run('umount %s' % download, ignore_status=True) 552 553 554 def get_base_cmd_args(self, section): 555 args = ['--verbose'] 556 if section > 0: 557 args.append('-c') 558 if self.tag: 559 args.append('-t %s' % self.tag) 560 if self.host.job.use_external_logging(): 561 args.append('-l') 562 if self.host.hostname: 563 args.append('--hostname=%s' % self.host.hostname) 564 args.append('--user=%s' % self.host.job.user) 565 566 args.append(self.remote_control_file) 567 return args 568 569 570 def get_background_cmd(self, section): 571 cmd = ['nohup', os.path.join(self.autodir, 'bin/autotest_client')] 572 cmd += self.get_base_cmd_args(section) 573 cmd += ['>/dev/null', '2>/dev/null', '&'] 574 return ' '.join(cmd) 575 576 577 def get_daemon_cmd(self, section, monitor_dir): 578 cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'), 579 monitor_dir, '-H autoserv'] 580 cmd += self.get_base_cmd_args(section) 581 cmd += ['>/dev/null', '2>/dev/null', '&'] 582 return ' '.join(cmd) 583 584 585 def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read): 586 cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'), 587 monitor_dir, str(stdout_read), str(stderr_read)] 588 return ' '.join(cmd) 589 590 591 def get_client_log(self): 592 """Find what the "next" client.* prefix should be 593 594 @returns A string of the form client.INTEGER that should be prefixed 595 to all client debug log files. 596 """ 597 max_digit = -1 598 debug_dir = os.path.join(self.results_dir, 'debug') 599 client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*')) 600 for log in client_logs: 601 _, number, _ = log.split('.', 2) 602 if number.isdigit(): 603 max_digit = max(max_digit, int(number)) 604 return 'client.%d' % (max_digit + 1) 605 606 607 def copy_client_config_file(self, client_log_prefix=None): 608 """ 609 Create and copy the client config file based on the server config. 610 611 @param client_log_prefix: Optional prefix to prepend to log files. 612 """ 613 client_config_file = self._create_client_config_file(client_log_prefix) 614 self.host.send_file(client_config_file, self.config_file) 615 os.remove(client_config_file) 616 617 618 def _create_client_config_file(self, client_log_prefix=None): 619 """ 620 Create a temporary file with the [CLIENT] section configuration values 621 taken from the server global_config.ini. 622 623 @param client_log_prefix: Optional prefix to prepend to log files. 624 625 @return: Path of the temporary file generated. 626 """ 627 config = global_config.global_config.get_section_values('CLIENT') 628 if client_log_prefix: 629 config.set('CLIENT', 'default_logging_name', client_log_prefix) 630 return self._create_aux_file(config.write) 631 632 633 def _create_aux_file(self, func, *args): 634 """ 635 Creates a temporary file and writes content to it according to a 636 content creation function. The file object is appended to *args, which 637 is then passed to the content creation function 638 639 @param func: Function that will be used to write content to the 640 temporary file. 641 @param *args: List of parameters that func takes. 642 @return: Path to the temporary file that was created. 643 """ 644 fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir) 645 aux_file = os.fdopen(fd, "w") 646 try: 647 list_args = list(args) 648 list_args.append(aux_file) 649 func(*list_args) 650 finally: 651 aux_file.close() 652 return path 653 654 655 @staticmethod 656 def is_client_job_finished(last_line): 657 return bool(re.match(r'^END .*\t----\t----\t.*$', last_line)) 658 659 660 @staticmethod 661 def is_client_job_rebooting(last_line): 662 return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line)) 663 664 665 def _diagnose_dut(self, old_boot_id=None): 666 """ 667 Run diagnostic checks on a DUT. 668 669 1. ping: A dead host will not respond to pings. 670 2. ssh (happens with 3.): DUT hangs usually fail in authentication 671 but respond to pings. 672 3. Check if a reboot occured: A healthy but unexpected reboot leaves the 673 host running with a new boot id. 674 675 This method will always raise an exception from the AutotestFailure 676 family and should only get called when the reason for a test failing 677 is ambiguous. 678 679 @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping. 680 @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT. 681 @raises AutotestDeviceRebooted: If the boot id changed. 682 @raises AutotestAbort: If none of the above exceptions were raised. 683 Since we have no recourse we must abort at this stage. 684 """ 685 msg = 'Autotest client terminated unexpectedly: ' 686 if utils.ping(self.host.hostname, tries=1, deadline=1) != 0: 687 msg += 'DUT is no longer pingable, it may have rebooted or hung.\n' 688 raise AutotestDeviceNotPingable(msg) 689 690 if old_boot_id: 691 try: 692 new_boot_id = self.host.get_boot_id(timeout=60) 693 except Exception as e: 694 msg += ('DUT is pingable but not SSHable, it most likely' 695 ' sporadically rebooted during testing. %s\n' % str(e)) 696 raise AutotestDeviceNotSSHable(msg) 697 else: 698 if new_boot_id != old_boot_id: 699 msg += 'DUT rebooted during the test run.\n' 700 raise AutotestDeviceRebooted(msg) 701 702 msg += ('DUT is pingable, SSHable and did NOT restart ' 703 'un-expectedly. We probably lost connectivity during the ' 704 'test.') 705 else: 706 msg += ('DUT is pingable, could not determine if an un-expected ' 707 'reboot occured during the test.') 708 709 raise AutotestAbort(msg) 710 711 712 def log_unexpected_abort(self, stderr_redirector, old_boot_id=None): 713 """ 714 Logs that something unexpected happened, then tries to diagnose the 715 failure. The purpose of this function is only to close out the status 716 log with the appropriate error message, not to critically terminate 717 the program. 718 719 @param stderr_redirector: log stream. 720 @param old_boot_id: boot id used to infer if a reboot occured. 721 """ 722 stderr_redirector.flush_all_buffers() 723 try: 724 self._diagnose_dut(old_boot_id) 725 except AutotestFailure as e: 726 self.host.job.record('END ABORT', None, None, str(e)) 727 728 729 def _execute_in_background(self, section, timeout): 730 full_cmd = self.get_background_cmd(section) 731 devnull = open(os.devnull, "w") 732 733 self.copy_client_config_file(self.get_client_log()) 734 735 self.host.job.push_execution_context(self.results_dir) 736 try: 737 result = self.host.run(full_cmd, ignore_status=True, 738 timeout=timeout, 739 stdout_tee=devnull, 740 stderr_tee=devnull) 741 finally: 742 self.host.job.pop_execution_context() 743 744 return result 745 746 747 @staticmethod 748 def _strip_stderr_prologue(stderr): 749 """Strips the 'standard' prologue that get pre-pended to every 750 remote command and returns the text that was actually written to 751 stderr by the remote command.""" 752 stderr_lines = stderr.split("\n")[1:] 753 if not stderr_lines: 754 return "" 755 elif stderr_lines[0].startswith("NOTE: autotestd_monitor"): 756 del stderr_lines[0] 757 return "\n".join(stderr_lines) 758 759 760 def _execute_daemon(self, section, timeout, stderr_redirector, 761 client_disconnect_timeout): 762 monitor_dir = self.host.get_tmp_dir() 763 daemon_cmd = self.get_daemon_cmd(section, monitor_dir) 764 765 # grab the location for the server-side client log file 766 client_log_prefix = self.get_client_log() 767 client_log_path = os.path.join(self.results_dir, 'debug', 768 client_log_prefix + '.log') 769 client_log = open(client_log_path, 'w', 0) 770 self.copy_client_config_file(client_log_prefix) 771 772 stdout_read = stderr_read = 0 773 self.host.job.push_execution_context(self.results_dir) 774 try: 775 self.host.run(daemon_cmd, ignore_status=True, timeout=timeout) 776 disconnect_warnings = [] 777 while True: 778 monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read, 779 stderr_read) 780 try: 781 result = self.host.run(monitor_cmd, ignore_status=True, 782 timeout=timeout, 783 stdout_tee=client_log, 784 stderr_tee=stderr_redirector) 785 except error.AutoservRunError, e: 786 result = e.result_obj 787 result.exit_status = None 788 disconnect_warnings.append(e.description) 789 790 stderr_redirector.log_warning( 791 "Autotest client was disconnected: %s" % e.description, 792 "NETWORK") 793 except error.AutoservSSHTimeout: 794 result = utils.CmdResult(monitor_cmd, "", "", None, 0) 795 stderr_redirector.log_warning( 796 "Attempt to connect to Autotest client timed out", 797 "NETWORK") 798 799 stdout_read += len(result.stdout) 800 stderr_read += len(self._strip_stderr_prologue(result.stderr)) 801 802 if result.exit_status is not None: 803 # TODO (crosbug.com/38224)- sbasi: Remove extra logging. 804 logging.debug('Result exit status is %d.', 805 result.exit_status) 806 return result 807 elif not self.host.wait_up(client_disconnect_timeout): 808 raise error.AutoservSSHTimeout( 809 "client was disconnected, reconnect timed out") 810 finally: 811 client_log.close() 812 self.host.job.pop_execution_context() 813 814 815 def execute_section(self, section, timeout, stderr_redirector, 816 client_disconnect_timeout): 817 logging.info("Executing %s/bin/autotest %s/control phase %d", 818 self.autodir, self.autodir, section) 819 820 if self.background: 821 result = self._execute_in_background(section, timeout) 822 else: 823 result = self._execute_daemon(section, timeout, stderr_redirector, 824 client_disconnect_timeout) 825 826 last_line = stderr_redirector.last_line 827 828 # check if we failed hard enough to warrant an exception 829 if result.exit_status == 1: 830 err = error.AutotestRunError("client job was aborted") 831 elif not self.background and not result.stderr: 832 err = error.AutotestRunError( 833 "execute_section %s failed to return anything\n" 834 "stdout:%s\n" % (section, result.stdout)) 835 else: 836 err = None 837 838 # log something if the client failed AND never finished logging 839 if err and not self.is_client_job_finished(last_line): 840 self.log_unexpected_abort(stderr_redirector) 841 842 if err: 843 raise err 844 else: 845 return stderr_redirector.last_line 846 847 848 def _wait_for_reboot(self, old_boot_id): 849 logging.info("Client is rebooting") 850 logging.info("Waiting for client to halt") 851 if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT, 852 old_boot_id=old_boot_id): 853 err = "%s failed to shutdown after %d" 854 err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT) 855 raise error.AutotestRunError(err) 856 logging.info("Client down, waiting for restart") 857 if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT): 858 # since reboot failed 859 # hardreset the machine once if possible 860 # before failing this control file 861 warning = "%s did not come back up, hard resetting" 862 warning %= self.host.hostname 863 logging.warning(warning) 864 try: 865 self.host.hardreset(wait=False) 866 except (AttributeError, error.AutoservUnsupportedError): 867 warning = "Hard reset unsupported on %s" 868 warning %= self.host.hostname 869 logging.warning(warning) 870 raise error.AutotestRunError("%s failed to boot after %ds" % 871 (self.host.hostname, 872 self.host.DEFAULT_REBOOT_TIMEOUT)) 873 self.host.reboot_followup() 874 875 876 def execute_control(self, timeout=None, client_disconnect_timeout=None): 877 if not self.background: 878 collector = log_collector(self.host, self.tag, self.results_dir) 879 hostname = self.host.hostname 880 remote_results = collector.client_results_dir 881 local_results = collector.server_results_dir 882 self.host.job.add_client_log(hostname, remote_results, 883 local_results) 884 job_record_context = self.host.job.get_record_context() 885 886 section = 0 887 start_time = time.time() 888 889 logger = client_logger(self.host, self.tag, self.results_dir) 890 try: 891 while not timeout or time.time() < start_time + timeout: 892 if timeout: 893 section_timeout = start_time + timeout - time.time() 894 else: 895 section_timeout = None 896 boot_id = self.host.get_boot_id() 897 last = self.execute_section(section, section_timeout, 898 logger, client_disconnect_timeout) 899 if self.background: 900 return 901 section += 1 902 if self.is_client_job_finished(last): 903 logging.info("Client complete") 904 return 905 elif self.is_client_job_rebooting(last): 906 try: 907 self._wait_for_reboot(boot_id) 908 except error.AutotestRunError, e: 909 self.host.job.record("ABORT", None, "reboot", str(e)) 910 self.host.job.record("END ABORT", None, None, str(e)) 911 raise 912 continue 913 914 # If a test fails without probable cause we try to bucket it's 915 # failure into one of 2 categories. If we can determine the 916 # current state of the device and it is suspicious, we close the 917 # status lines indicating a failure. If we either cannot 918 # determine the state of the device, or it appears totally 919 # healthy, we give up and abort. 920 try: 921 self._diagnose_dut(boot_id) 922 except AutotestDeviceError as e: 923 # The status lines of the test are pretty much tailed to 924 # our log, with indentation, from the client job on the DUT. 925 # So if the DUT goes down unexpectedly we'll end up with a 926 # malformed status log unless we manually unwind the status 927 # stack. Ideally we would want to write a nice wrapper like 928 # server_job methods run_reboot, run_group but they expect 929 # reboots and we don't. 930 self.host.job.record('FAIL', None, None, str(e)) 931 self.host.job.record('END FAIL', None, None) 932 self.host.job.record('END GOOD', None, None) 933 self.host.job.failed_with_device_error = True 934 return 935 except AutotestAbort as e: 936 self.host.job.record('ABORT', None, None, str(e)) 937 self.host.job.record('END ABORT', None, None) 938 939 # give the client machine a chance to recover from a crash 940 self.host.wait_up( 941 self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600) 942 msg = ("Aborting - unexpected final status message from " 943 "client on %s: %s\n") % (self.host.hostname, last) 944 raise error.AutotestRunError(msg) 945 finally: 946 logging.debug('Autotest job finishes running. Below is the ' 947 'post-processing operations.') 948 logger.close() 949 if not self.background: 950 collector.collect_client_job_results() 951 collector.remove_redundant_client_logs() 952 state_file = os.path.basename(self.remote_control_file 953 + '.state') 954 state_path = os.path.join(self.results_dir, state_file) 955 self.host.job.postprocess_client_state(state_path) 956 self.host.job.remove_client_log(hostname, remote_results, 957 local_results) 958 job_record_context.restore() 959 960 logging.debug('Autotest job finishes.') 961 962 # should only get here if we timed out 963 assert timeout 964 raise error.AutotestTimeoutError() 965 966 967 class log_collector(object): 968 def __init__(self, host, client_tag, results_dir): 969 self.host = host 970 if not client_tag: 971 client_tag = "default" 972 self.client_results_dir = os.path.join(host.get_autodir(), "results", 973 client_tag) 974 self.server_results_dir = results_dir 975 976 977 def collect_client_job_results(self): 978 """ A method that collects all the current results of a running 979 client job into the results dir. By default does nothing as no 980 client job is running, but when running a client job you can override 981 this with something that will actually do something. """ 982 983 # make an effort to wait for the machine to come up 984 try: 985 self.host.wait_up(timeout=30) 986 except error.AutoservError: 987 # don't worry about any errors, we'll try and 988 # get the results anyway 989 pass 990 991 # Copy all dirs in default to results_dir 992 try: 993 with metrics.SecondsTimer( 994 'chromeos/autotest/job/log_collection_duration', 995 fields={'dut_host_name': self.host.hostname}): 996 self.host.get_file( 997 self.client_results_dir + '/', 998 self.server_results_dir, 999 preserve_symlinks=True) 1000 except Exception: 1001 # well, don't stop running just because we couldn't get logs 1002 e_msg = "Unexpected error copying test result logs, continuing ..." 1003 logging.error(e_msg) 1004 traceback.print_exc(file=sys.stdout) 1005 1006 1007 def remove_redundant_client_logs(self): 1008 """Remove client.*.log files in favour of client.*.DEBUG files.""" 1009 debug_dir = os.path.join(self.server_results_dir, 'debug') 1010 debug_files = [f for f in os.listdir(debug_dir) 1011 if re.search(r'^client\.\d+\.DEBUG$', f)] 1012 for debug_file in debug_files: 1013 log_file = debug_file.replace('DEBUG', 'log') 1014 log_file = os.path.join(debug_dir, log_file) 1015 if os.path.exists(log_file): 1016 os.remove(log_file) 1017 1018 1019 # a file-like object for catching stderr from an autotest client and 1020 # extracting status logs from it 1021 class BaseClientLogger(object): 1022 """Partial file object to write to both stdout and 1023 the status log file. We only implement those methods 1024 utils.run() actually calls. 1025 """ 1026 status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$") 1027 test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$") 1028 fetch_package_parser = re.compile( 1029 r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$") 1030 extract_indent = re.compile(r"^(\t*).*$") 1031 extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$") 1032 1033 def __init__(self, host, tag, server_results_dir): 1034 self.host = host 1035 self.job = host.job 1036 self.log_collector = log_collector(host, tag, server_results_dir) 1037 self.leftover = "" 1038 self.last_line = "" 1039 self.logs = {} 1040 1041 1042 def _process_log_dict(self, log_dict): 1043 log_list = log_dict.pop("logs", []) 1044 for key in sorted(log_dict.iterkeys()): 1045 log_list += self._process_log_dict(log_dict.pop(key)) 1046 return log_list 1047 1048 1049 def _process_logs(self): 1050 """Go through the accumulated logs in self.log and print them 1051 out to stdout and the status log. Note that this processes 1052 logs in an ordering where: 1053 1054 1) logs to different tags are never interleaved 1055 2) logs to x.y come before logs to x.y.z for all z 1056 3) logs to x.y come before x.z whenever y < z 1057 1058 Note that this will in general not be the same as the 1059 chronological ordering of the logs. However, if a chronological 1060 ordering is desired that one can be reconstructed from the 1061 status log by looking at timestamp lines.""" 1062 log_list = self._process_log_dict(self.logs) 1063 for entry in log_list: 1064 self.job.record_entry(entry, log_in_subdir=False) 1065 if log_list: 1066 self.last_line = log_list[-1].render() 1067 1068 1069 def _process_quoted_line(self, tag, line): 1070 """Process a line quoted with an AUTOTEST_STATUS flag. If the 1071 tag is blank then we want to push out all the data we've been 1072 building up in self.logs, and then the newest line. If the 1073 tag is not blank, then push the line into the logs for handling 1074 later.""" 1075 entry = base_job.status_log_entry.parse(line) 1076 if entry is None: 1077 return # the line contains no status lines 1078 if tag == "": 1079 self._process_logs() 1080 self.job.record_entry(entry, log_in_subdir=False) 1081 self.last_line = line 1082 else: 1083 tag_parts = [int(x) for x in tag.split(".")] 1084 log_dict = self.logs 1085 for part in tag_parts: 1086 log_dict = log_dict.setdefault(part, {}) 1087 log_list = log_dict.setdefault("logs", []) 1088 log_list.append(entry) 1089 1090 1091 def _process_info_line(self, line): 1092 """Check if line is an INFO line, and if it is, interpret any control 1093 messages (e.g. enabling/disabling warnings) that it may contain.""" 1094 match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line) 1095 if not match: 1096 return # not an INFO line 1097 for field in match.group(1).split('\t'): 1098 if field.startswith("warnings.enable="): 1099 func = self.job.warning_manager.enable_warnings 1100 elif field.startswith("warnings.disable="): 1101 func = self.job.warning_manager.disable_warnings 1102 else: 1103 continue 1104 warning_type = field.split("=", 1)[1] 1105 func(warning_type) 1106 1107 1108 def _process_line(self, line): 1109 """Write out a line of data to the appropriate stream. Status 1110 lines sent by autotest will be prepended with 1111 "AUTOTEST_STATUS", and all other lines are ssh error 1112 messages.""" 1113 status_match = self.status_parser.search(line) 1114 test_complete_match = self.test_complete_parser.search(line) 1115 fetch_package_match = self.fetch_package_parser.search(line) 1116 if status_match: 1117 tag, line = status_match.groups() 1118 self._process_info_line(line) 1119 self._process_quoted_line(tag, line) 1120 elif test_complete_match: 1121 self._process_logs() 1122 fifo_path, = test_complete_match.groups() 1123 try: 1124 self.log_collector.collect_client_job_results() 1125 self.host.run("echo A > %s" % fifo_path) 1126 except Exception: 1127 msg = "Post-test log collection failed, continuing anyway" 1128 logging.exception(msg) 1129 elif fetch_package_match: 1130 pkg_name, dest_path, fifo_path = fetch_package_match.groups() 1131 serve_packages = global_config.global_config.get_config_value( 1132 "PACKAGES", "serve_packages_from_autoserv", type=bool) 1133 if serve_packages and pkg_name.endswith(".tar.bz2"): 1134 try: 1135 self._send_tarball(pkg_name, dest_path) 1136 except Exception: 1137 msg = "Package tarball creation failed, continuing anyway" 1138 logging.exception(msg) 1139 try: 1140 self.host.run("echo B > %s" % fifo_path) 1141 except Exception: 1142 msg = "Package tarball installation failed, continuing anyway" 1143 logging.exception(msg) 1144 else: 1145 logging.info(line) 1146 1147 1148 def _send_tarball(self, pkg_name, remote_dest): 1149 name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name) 1150 src_dirs = [] 1151 if pkg_type == 'test': 1152 for test_dir in ['site_tests', 'tests']: 1153 src_dir = os.path.join(self.job.clientdir, test_dir, name) 1154 if os.path.exists(src_dir): 1155 src_dirs += [src_dir] 1156 if autoserv_prebuild: 1157 prebuild.setup(self.job.clientdir, src_dir) 1158 break 1159 elif pkg_type == 'profiler': 1160 src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)] 1161 if autoserv_prebuild: 1162 prebuild.setup(self.job.clientdir, src_dir) 1163 elif pkg_type == 'dep': 1164 src_dirs += [os.path.join(self.job.clientdir, 'deps', name)] 1165 elif pkg_type == 'client': 1166 return # you must already have a client to hit this anyway 1167 else: 1168 return # no other types are supported 1169 1170 # iterate over src_dirs until we find one that exists, then tar it 1171 for src_dir in src_dirs: 1172 if os.path.exists(src_dir): 1173 try: 1174 logging.info('Bundling %s into %s', src_dir, pkg_name) 1175 temp_dir = autotemp.tempdir(unique_id='autoserv-packager', 1176 dir=self.job.tmpdir) 1177 tarball_path = self.job.pkgmgr.tar_package( 1178 pkg_name, src_dir, temp_dir.name, " .") 1179 self.host.send_file(tarball_path, remote_dest) 1180 finally: 1181 temp_dir.clean() 1182 return 1183 1184 1185 def log_warning(self, msg, warning_type): 1186 """Injects a WARN message into the current status logging stream.""" 1187 timestamp = int(time.time()) 1188 if self.job.warning_manager.is_valid(timestamp, warning_type): 1189 self.job.record('WARN', None, None, msg) 1190 1191 1192 def write(self, data): 1193 # now start processing the existing buffer and the new data 1194 data = self.leftover + data 1195 lines = data.split('\n') 1196 processed_lines = 0 1197 try: 1198 # process all the buffered data except the last line 1199 # ignore the last line since we may not have all of it yet 1200 for line in lines[:-1]: 1201 self._process_line(line) 1202 processed_lines += 1 1203 finally: 1204 # save any unprocessed lines for future processing 1205 self.leftover = '\n'.join(lines[processed_lines:]) 1206 1207 1208 def flush(self): 1209 sys.stdout.flush() 1210 1211 1212 def flush_all_buffers(self): 1213 if self.leftover: 1214 self._process_line(self.leftover) 1215 self.leftover = "" 1216 self._process_logs() 1217 self.flush() 1218 1219 1220 def close(self): 1221 self.flush_all_buffers() 1222 1223 1224 SiteAutotest = client_utils.import_site_class( 1225 __file__, "autotest_lib.server.site_autotest", "SiteAutotest", 1226 BaseAutotest) 1227 1228 1229 _SiteRun = client_utils.import_site_class( 1230 __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun) 1231 1232 1233 SiteClientLogger = client_utils.import_site_class( 1234 __file__, "autotest_lib.server.site_autotest", "SiteClientLogger", 1235 BaseClientLogger) 1236 1237 1238 class Autotest(SiteAutotest): 1239 pass 1240 1241 1242 class client_logger(SiteClientLogger): 1243 pass 1244 1245 1246 class _Run(_SiteRun): 1247 pass 1248