Home | History | Annotate | Download | only in server
      1 #!/usr/bin/python -u
      2 # Copyright 2007-2008 Martin J. Bligh <mbligh (at] google.com>, Google Inc.
      3 # Released under the GPL v2
      4 
      5 """
      6 Run a control file through the server side engine
      7 """
      8 
      9 import datetime
     10 import contextlib
     11 import getpass
     12 import logging
     13 import os
     14 import re
     15 import signal
     16 import socket
     17 import sys
     18 import traceback
     19 import time
     20 import urllib2
     21 
     22 
     23 import common
     24 from autotest_lib.client.bin.result_tools import utils as result_utils
     25 from autotest_lib.client.bin.result_tools import view as result_view
     26 from autotest_lib.client.common_lib import control_data
     27 from autotest_lib.client.common_lib import error
     28 from autotest_lib.client.common_lib import global_config
     29 from autotest_lib.server import results_mocker
     30 
     31 try:
     32     from chromite.lib import metrics
     33     from chromite.lib import cloud_trace
     34 except ImportError:
     35     from autotest_lib.client.common_lib import utils as common_utils
     36     metrics = common_utils.metrics_mock
     37     import mock
     38     cloud_trace = mock.MagicMock()
     39 
     40 _CONFIG = global_config.global_config
     41 
     42 
     43 # Number of seconds to wait before returning if testing mode is enabled
     44 TESTING_MODE_SLEEP_SECS = 1
     45 
     46 
     47 from autotest_lib.server import frontend
     48 from autotest_lib.server import server_logging_config
     49 from autotest_lib.server import server_job, utils, autoserv_parser, autotest
     50 from autotest_lib.server import utils as server_utils
     51 from autotest_lib.server import site_utils
     52 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     53 from autotest_lib.site_utils import job_directories
     54 from autotest_lib.site_utils import job_overhead
     55 from autotest_lib.site_utils import lxc
     56 from autotest_lib.site_utils.lxc import utils as lxc_utils
     57 from autotest_lib.client.common_lib import pidfile, logging_manager
     58 
     59 
     60 # Control segment to stage server-side package.
     61 STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
     62         'stage_server_side_package')
     63 
     64 # Command line to start servod in a moblab.
     65 START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
     66 STOP_SERVOD_CMD = 'sudo stop servod'
     67 
     68 def log_alarm(signum, frame):
     69     logging.error("Received SIGALARM. Ignoring and continuing on.")
     70     sys.exit(1)
     71 
     72 
     73 def _get_machines(parser):
     74     """Get a list of machine names from command line arg -m or a file.
     75 
     76     @param parser: Parser for the command line arguments.
     77 
     78     @return: A list of machine names from command line arg -m or the
     79              machines file specified in the command line arg -M.
     80     """
     81     if parser.options.machines:
     82         machines = parser.options.machines.replace(',', ' ').strip().split()
     83     else:
     84         machines = []
     85     machines_file = parser.options.machines_file
     86     if machines_file:
     87         machines = []
     88         for m in open(machines_file, 'r').readlines():
     89             # remove comments, spaces
     90             m = re.sub('#.*', '', m).strip()
     91             if m:
     92                 machines.append(m)
     93         logging.debug('Read list of machines from file: %s', machines_file)
     94         logging.debug('Machines: %s', ','.join(machines))
     95 
     96     if machines:
     97         for machine in machines:
     98             if not machine or re.search('\s', machine):
     99                 parser.parser.error("Invalid machine: %s" % str(machine))
    100         machines = list(set(machines))
    101         machines.sort()
    102     return machines
    103 
    104 
    105 def _stage_ssp(parser, resultsdir):
    106     """Stage server-side package.
    107 
    108     This function calls a control segment to stage server-side package based on
    109     the job and autoserv command line option. The detail implementation could
    110     be different for each host type. Currently, only CrosHost has
    111     stage_server_side_package function defined.
    112     The script returns None if no server-side package is available. However,
    113     it may raise exception if it failed for reasons other than artifact (the
    114     server-side package) not found.
    115 
    116     @param parser: Command line arguments parser passed in the autoserv process.
    117     @param resultsdir: Folder to store results. This could be different from
    118             parser.options.results: parser.options.results  can be set to None
    119             for results to be stored in a temp folder. resultsdir can be None
    120             for autoserv run requires no logging.
    121 
    122     @return: (ssp_url, error_msg), where
    123               ssp_url is a url to the autotest server-side package. None if
    124               server-side package is not supported.
    125               error_msg is a string indicating the failures. None if server-
    126               side package is staged successfully.
    127     """
    128     machines_list = _get_machines(parser)
    129     machines_list = server_job.get_machine_dicts(machines_list, resultsdir,
    130                                                  parser.options.lab,
    131                                                  parser.options.host_attributes)
    132 
    133     # If test_source_build is not specified, default to use server-side test
    134     # code from build specified in --image.
    135     namespace = {'machines': machines_list,
    136                  'image': (parser.options.test_source_build or
    137                            parser.options.image),}
    138     script_locals = {}
    139     execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
    140     return script_locals['ssp_url'], script_locals['error_msg']
    141 
    142 
    143 def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
    144                   job_folder, machines):
    145     """Run the server job with server-side packaging.
    146 
    147     @param job: The server job object.
    148     @param container_id: ID of the container to run the test.
    149     @param job_id: ID of the test job.
    150     @param results: Folder to store results. This could be different from
    151                     parser.options.results:
    152                     parser.options.results  can be set to None for results to be
    153                     stored in a temp folder.
    154                     results can be None for autoserv run requires no logging.
    155     @param parser: Command line parser that contains the options.
    156     @param ssp_url: url of the staged server-side package.
    157     @param job_folder: Name of the job result folder.
    158     @param machines: A list of machines to run the test.
    159     """
    160     bucket = lxc.ContainerBucket()
    161     control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
    162                else None)
    163     try:
    164         dut_name = machines[0] if len(machines) >= 1 else None
    165         test_container = bucket.setup_test(container_id, job_id, ssp_url,
    166                                            results, control=control,
    167                                            job_folder=job_folder,
    168                                            dut_name=dut_name)
    169     except Exception as e:
    170         job.record('FAIL', None, None,
    171                    'Failed to setup container for test: %s. Check logs in '
    172                    'ssp_logs folder for more details.' % e)
    173         raise
    174 
    175     args = sys.argv[:]
    176     args.remove('--require-ssp')
    177     # --parent_job_id is only useful in autoserv running in host, not in
    178     # container. Include this argument will cause test to fail for builds before
    179     # CL 286265 was merged.
    180     if '--parent_job_id' in args:
    181         index = args.index('--parent_job_id')
    182         args.remove('--parent_job_id')
    183         # Remove the actual parent job id in command line arg.
    184         del args[index]
    185 
    186     # A dictionary of paths to replace in the command line. Key is the path to
    187     # be replaced with the one in value.
    188     paths_to_replace = {}
    189     # Replace the control file path with the one in container.
    190     if control:
    191         container_control_filename = os.path.join(
    192                 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
    193         paths_to_replace[control] = container_control_filename
    194     # Update result directory with the one in container.
    195     container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
    196     if parser.options.results:
    197         paths_to_replace[parser.options.results] = container_result_dir
    198     # Update parse_job directory with the one in container. The assumption is
    199     # that the result folder to be parsed is always the same as the results_dir.
    200     if parser.options.parse_job:
    201         paths_to_replace[parser.options.parse_job] = container_result_dir
    202 
    203     args = [paths_to_replace.get(arg, arg) for arg in args]
    204 
    205     # Apply --use-existing-results, results directory is aready created and
    206     # mounted in container. Apply this arg to avoid exception being raised.
    207     if not '--use-existing-results' in args:
    208         args.append('--use-existing-results')
    209 
    210     # Make sure autoserv running in container using a different pid file.
    211     if not '--pidfile-label' in args:
    212         args.extend(['--pidfile-label', 'container_autoserv'])
    213 
    214     cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
    215     logging.info('Run command in container: %s', cmd_line)
    216     success = False
    217     try:
    218         test_container.attach_run(cmd_line)
    219         success = True
    220     except Exception as e:
    221         # If the test run inside container fails without generating any log,
    222         # write a message to status.log to help troubleshooting.
    223         debug_files = os.listdir(os.path.join(results, 'debug'))
    224         if not debug_files:
    225             job.record('FAIL', None, None,
    226                        'Failed to run test inside the container: %s. Check '
    227                        'logs in ssp_logs folder for more details.' % e)
    228         raise
    229     finally:
    230         metrics.Counter(
    231             'chromeos/autotest/experimental/execute_job_in_ssp').increment(
    232                 fields={'success': success})
    233         test_container.destroy()
    234 
    235 
    236 def correct_results_folder_permission(results):
    237     """Make sure the results folder has the right permission settings.
    238 
    239     For tests running with server-side packaging, the results folder has the
    240     owner of root. This must be changed to the user running the autoserv
    241     process, so parsing job can access the results folder.
    242     TODO(dshi): crbug.com/459344 Remove this function when test container can be
    243     unprivileged container.
    244 
    245     @param results: Path to the results folder.
    246 
    247     """
    248     if not results:
    249         return
    250 
    251     utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
    252     utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
    253 
    254 
    255 def _start_servod(machine):
    256     """Try to start servod in moblab if it's not already running or running with
    257     different board or port.
    258 
    259     @param machine: Name of the dut used for test.
    260     """
    261     if not utils.is_moblab():
    262         return
    263 
    264     logging.debug('Trying to start servod.')
    265     try:
    266         afe = frontend.AFE()
    267         board = server_utils.get_board_from_afe(machine, afe)
    268         hosts = afe.get_hosts(hostname=machine)
    269         servo_host = hosts[0].attributes.get('servo_host', None)
    270         servo_port = hosts[0].attributes.get('servo_port', 9999)
    271         if not servo_host in ['localhost', '127.0.0.1']:
    272             logging.warn('Starting servod is aborted. The dut\'s servo_host '
    273                          'attribute is not set to localhost.')
    274             return
    275     except (urllib2.HTTPError, urllib2.URLError):
    276         # Ignore error if RPC failed to get board
    277         logging.error('Failed to get board name from AFE. Start servod is '
    278                       'aborted')
    279         return
    280 
    281     try:
    282         pid = utils.run('pgrep servod').stdout
    283         cmd_line = utils.run('ps -fp %s' % pid).stdout
    284         if ('--board %s' % board in cmd_line and
    285             '--port %s' % servo_port in cmd_line):
    286             logging.debug('Servod is already running with given board and port.'
    287                           ' There is no need to restart servod.')
    288             return
    289         logging.debug('Servod is running with different board or port. '
    290                       'Stopping existing servod.')
    291         utils.run('sudo stop servod')
    292     except error.CmdError:
    293         # servod is not running.
    294         pass
    295 
    296     try:
    297         utils.run(START_SERVOD_CMD % (board, servo_port))
    298         logging.debug('Servod is started')
    299     except error.CmdError as e:
    300         logging.error('Servod failed to be started, error: %s', e)
    301 
    302 
    303 def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
    304     """Run server job with given options.
    305 
    306     @param pid_file_manager: PidFileManager used to monitor the autoserv process
    307     @param results: Folder to store results.
    308     @param parser: Parser for the command line arguments.
    309     @param ssp_url: Url to server-side package.
    310     @param use_ssp: Set to True to run with server-side packaging.
    311     """
    312     if parser.options.warn_no_ssp:
    313         # Post a warning in the log.
    314         logging.warn('Autoserv is required to run with server-side packaging. '
    315                      'However, no drone is found to support server-side '
    316                      'packaging. The test will be executed in a drone without '
    317                      'server-side packaging supported.')
    318 
    319     # send stdin to /dev/null
    320     dev_null = os.open(os.devnull, os.O_RDONLY)
    321     os.dup2(dev_null, sys.stdin.fileno())
    322     os.close(dev_null)
    323 
    324     # Create separate process group if the process is not a process group
    325     # leader. This allows autoserv process to keep running after the caller
    326     # process (drone manager call) exits.
    327     if os.getpid() != os.getpgid(0):
    328         os.setsid()
    329 
    330     # Container name is predefined so the container can be destroyed in
    331     # handle_sigterm.
    332     job_or_task_id = job_directories.get_job_id_or_task_id(
    333             parser.options.results)
    334     container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
    335     job_folder = job_directories.get_job_folder_name(parser.options.results)
    336 
    337     # Implement SIGTERM handler
    338     def handle_sigterm(signum, frame):
    339         logging.debug('Received SIGTERM')
    340         if pid_file_manager:
    341             pid_file_manager.close_file(1, signal.SIGTERM)
    342         logging.debug('Finished writing to pid_file. Killing process.')
    343 
    344         # Update results folder's file permission. This needs to be done ASAP
    345         # before the parsing process tries to access the log.
    346         if use_ssp and results:
    347             correct_results_folder_permission(results)
    348 
    349         # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
    350         # This sleep allows the pending output to be logged before the kill
    351         # signal is sent.
    352         time.sleep(.1)
    353         if use_ssp:
    354             logging.debug('Destroy container %s before aborting the autoserv '
    355                           'process.', container_id)
    356             try:
    357                 bucket = lxc.ContainerBucket()
    358                 container = bucket.get_container(container_id)
    359                 if container:
    360                     container.destroy()
    361                 else:
    362                     logging.debug('Container %s is not found.', container_id)
    363             except:
    364                 # Handle any exception so the autoserv process can be aborted.
    365                 logging.exception('Failed to destroy container %s.',
    366                                   container_id)
    367             # Try to correct the result file permission again after the
    368             # container is destroyed, as the container might have created some
    369             # new files in the result folder.
    370             if results:
    371                 correct_results_folder_permission(results)
    372 
    373         os.killpg(os.getpgrp(), signal.SIGKILL)
    374 
    375     # Set signal handler
    376     signal.signal(signal.SIGTERM, handle_sigterm)
    377 
    378     # faulthandler is only needed to debug in the Lab and is not avaliable to
    379     # be imported in the chroot as part of VMTest, so Try-Except it.
    380     try:
    381         import faulthandler
    382         faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
    383         logging.debug('faulthandler registered on SIGTERM.')
    384     except ImportError:
    385         sys.exc_clear()
    386 
    387     # Ignore SIGTTOU's generated by output from forked children.
    388     signal.signal(signal.SIGTTOU, signal.SIG_IGN)
    389 
    390     # If we received a SIGALARM, let's be loud about it.
    391     signal.signal(signal.SIGALRM, log_alarm)
    392 
    393     # Server side tests that call shell scripts often depend on $USER being set
    394     # but depending on how you launch your autotest scheduler it may not be set.
    395     os.environ['USER'] = getpass.getuser()
    396 
    397     label = parser.options.label
    398     group_name = parser.options.group_name
    399     user = parser.options.user
    400     client = parser.options.client
    401     server = parser.options.server
    402     install_before = parser.options.install_before
    403     install_after = parser.options.install_after
    404     verify = parser.options.verify
    405     repair = parser.options.repair
    406     cleanup = parser.options.cleanup
    407     provision = parser.options.provision
    408     reset = parser.options.reset
    409     job_labels = parser.options.job_labels
    410     no_tee = parser.options.no_tee
    411     parse_job = parser.options.parse_job
    412     execution_tag = parser.options.execution_tag
    413     if not execution_tag:
    414         execution_tag = parse_job
    415     ssh_user = parser.options.ssh_user
    416     ssh_port = parser.options.ssh_port
    417     ssh_pass = parser.options.ssh_pass
    418     collect_crashinfo = parser.options.collect_crashinfo
    419     control_filename = parser.options.control_filename
    420     test_retry = parser.options.test_retry
    421     verify_job_repo_url = parser.options.verify_job_repo_url
    422     skip_crash_collection = parser.options.skip_crash_collection
    423     ssh_verbosity = int(parser.options.ssh_verbosity)
    424     ssh_options = parser.options.ssh_options
    425     no_use_packaging = parser.options.no_use_packaging
    426     host_attributes = parser.options.host_attributes
    427     in_lab = bool(parser.options.lab)
    428 
    429     # can't be both a client and a server side test
    430     if client and server:
    431         parser.parser.error("Can not specify a test as both server and client!")
    432 
    433     if provision and client:
    434         parser.parser.error("Cannot specify provisioning and client!")
    435 
    436     is_special_task = (verify or repair or cleanup or collect_crashinfo or
    437                        provision or reset)
    438     if len(parser.args) < 1 and not is_special_task:
    439         parser.parser.error("Missing argument: control file")
    440 
    441     if ssh_verbosity > 0:
    442         # ssh_verbosity is an integer between 0 and 3, inclusive
    443         ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
    444     else:
    445         ssh_verbosity_flag = ''
    446 
    447     # We have a control file unless it's just a verify/repair/cleanup job
    448     if len(parser.args) > 0:
    449         control = parser.args[0]
    450     else:
    451         control = None
    452 
    453     machines = _get_machines(parser)
    454     if group_name and len(machines) < 2:
    455         parser.parser.error('-G %r may only be supplied with more than one '
    456                             'machine.' % group_name)
    457 
    458     kwargs = {'group_name': group_name, 'tag': execution_tag,
    459               'disable_sysinfo': parser.options.disable_sysinfo}
    460     if parser.options.parent_job_id:
    461         kwargs['parent_job_id'] = int(parser.options.parent_job_id)
    462     if control_filename:
    463         kwargs['control_filename'] = control_filename
    464     if host_attributes:
    465         kwargs['host_attributes'] = host_attributes
    466     kwargs['in_lab'] = in_lab
    467     job = server_job.server_job(control, parser.args[1:], results, label,
    468                                 user, machines, client, parse_job,
    469                                 ssh_user, ssh_port, ssh_pass,
    470                                 ssh_verbosity_flag, ssh_options,
    471                                 test_retry, **kwargs)
    472 
    473     job.logging.start_logging()
    474     job.init_parser()
    475 
    476     # perform checks
    477     job.precheck()
    478 
    479     # run the job
    480     exit_code = 0
    481     auto_start_servod = _CONFIG.get_config_value(
    482             'AUTOSERV', 'auto_start_servod', type=bool, default=False)
    483 
    484     site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
    485                                      auto_flush=False, short_lived=True)
    486     try:
    487         try:
    488             if repair:
    489                 if auto_start_servod and len(machines) == 1:
    490                     _start_servod(machines[0])
    491                 job.repair(job_labels)
    492             elif verify:
    493                 job.verify(job_labels)
    494             elif provision:
    495                 job.provision(job_labels)
    496             elif reset:
    497                 job.reset(job_labels)
    498             elif cleanup:
    499                 job.cleanup(job_labels)
    500             else:
    501                 if auto_start_servod and len(machines) == 1:
    502                     _start_servod(machines[0])
    503                 if use_ssp:
    504                     try:
    505                         _run_with_ssp(job, container_id, job_or_task_id,
    506                                         results, parser, ssp_url, job_folder,
    507                                         machines)
    508                     finally:
    509                         # Update the ownership of files in result folder.
    510                         correct_results_folder_permission(results)
    511                 else:
    512                     if collect_crashinfo:
    513                         # Update the ownership of files in result folder. If the
    514                         # job to collect crashinfo was running inside container
    515                         # (SSP) and crashed before correcting folder permission,
    516                         # the result folder might have wrong permission setting.
    517                         try:
    518                             correct_results_folder_permission(results)
    519                         except:
    520                             # Ignore any error as the user may not have root
    521                             # permission to run sudo command.
    522                             pass
    523                     metric_name = ('chromeos/autotest/experimental/'
    524                                    'autoserv_job_run_duration')
    525                     f = {'in_container': utils.is_in_container(),
    526                          'success': False}
    527                     with metrics.SecondsTimer(metric_name, fields=f) as c:
    528                         job.run(install_before, install_after,
    529                                 verify_job_repo_url=verify_job_repo_url,
    530                                 only_collect_crashinfo=collect_crashinfo,
    531                                 skip_crash_collection=skip_crash_collection,
    532                                 job_labels=job_labels,
    533                                 use_packaging=(not no_use_packaging))
    534                         c['success'] = True
    535 
    536         finally:
    537             job.close()
    538             # Special task doesn't run parse, so result summary needs to be
    539             # built here.
    540             if results and (repair or verify or reset or cleanup or provision):
    541                 # Throttle the result on the server side.
    542                 try:
    543                     result_utils.execute(
    544                             results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
    545                 except:
    546                     logging.exception(
    547                             'Non-critical failure: Failed to throttle results '
    548                             'in directory %s.', results)
    549                 # Build result view and report metrics for result sizes.
    550                 site_utils.collect_result_sizes(results)
    551     except:
    552         exit_code = 1
    553         traceback.print_exc()
    554     finally:
    555         metrics.Flush()
    556 
    557     if pid_file_manager:
    558         pid_file_manager.num_tests_failed = job.num_tests_failed
    559         pid_file_manager.close_file(exit_code)
    560     job.cleanup_parser()
    561 
    562     sys.exit(exit_code)
    563 
    564 
    565 def record_autoserv(options, start_time):
    566     """Record autoserv end-to-end time in metadata db.
    567 
    568     @param options: parser options.
    569     @param start_time: When autoserv started
    570     """
    571     # Get machine hostname
    572     machines = options.machines.replace(
    573             ',', ' ').strip().split() if options.machines else []
    574     num_machines = len(machines)
    575     if num_machines > 1:
    576         # Skip the case where atomic group is used.
    577         return
    578     elif num_machines == 0:
    579         machines.append('hostless')
    580 
    581     # Determine the status that will be reported.
    582     status = get_job_status(options)
    583     is_special_task = status not in [
    584             job_overhead.STATUS.RUNNING, job_overhead.STATUS.GATHERING]
    585     job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
    586     duration_secs = (datetime.datetime.now() - start_time).total_seconds()
    587     job_overhead.record_state_duration(
    588             job_or_task_id, machines[0], status, duration_secs,
    589             is_special_task=is_special_task)
    590 
    591 
    592 def get_job_status(options):
    593     """Returns the HQE Status for this run.
    594 
    595     @param options: parser options.
    596     """
    597     s = job_overhead.STATUS
    598     task_mapping = {
    599             'reset': s.RESETTING, 'verify': s.VERIFYING,
    600             'provision': s.PROVISIONING, 'repair': s.REPAIRING,
    601             'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
    602     match = [task for task in task_mapping if getattr(options, task, False)]
    603     return task_mapping[match[0]] if match else s.RUNNING
    604 
    605 
    606 def main():
    607     start_time = datetime.datetime.now()
    608     # grab the parser
    609     parser = autoserv_parser.autoserv_parser
    610     parser.parse_args()
    611 
    612     if len(sys.argv) == 1:
    613         parser.parser.print_help()
    614         sys.exit(1)
    615 
    616     if parser.options.no_logging:
    617         results = None
    618     else:
    619         results = parser.options.results
    620         if not results:
    621             results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
    622         results = os.path.abspath(results)
    623         resultdir_exists = False
    624         for filename in ('control.srv', 'status.log', '.autoserv_execute'):
    625             if os.path.exists(os.path.join(results, filename)):
    626                 resultdir_exists = True
    627         if not parser.options.use_existing_results and resultdir_exists:
    628             error = "Error: results directory already exists: %s\n" % results
    629             sys.stderr.write(error)
    630             sys.exit(1)
    631 
    632         # Now that we certified that there's no leftover results dir from
    633         # previous jobs, lets create the result dir since the logging system
    634         # needs to create the log file in there.
    635         if not os.path.isdir(results):
    636             os.makedirs(results)
    637 
    638     # If the job requires to run with server-side package, try to stage server-
    639     # side package first. If that fails with error that autotest server package
    640     # does not exist, fall back to run the job without using server-side
    641     # packaging. If option warn_no_ssp is specified, that means autoserv is
    642     # running in a drone does not support SSP, thus no need to stage server-side
    643     # package.
    644     ssp_url = None
    645     ssp_url_warning = False
    646     if (not parser.options.warn_no_ssp and parser.options.require_ssp):
    647         ssp_url, ssp_error_msg = _stage_ssp(parser, results)
    648         # The build does not have autotest server package. Fall back to not
    649         # to use server-side package. Logging is postponed until logging being
    650         # set up.
    651         ssp_url_warning = not ssp_url
    652 
    653     # Server-side packaging will only be used if it's required and the package
    654     # is available. If warn_no_ssp is specified, it means that autoserv is
    655     # running in a drone does not have SSP supported and a warning will be logs.
    656     # Therefore, it should not run with SSP.
    657     use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
    658                and ssp_url)
    659     if use_ssp:
    660         log_dir = os.path.join(results, 'ssp_logs') if results else None
    661         if log_dir and not os.path.exists(log_dir):
    662             os.makedirs(log_dir)
    663     else:
    664         log_dir = results
    665 
    666     logging_manager.configure_logging(
    667             server_logging_config.ServerLoggingConfig(),
    668             results_dir=log_dir,
    669             use_console=not parser.options.no_tee,
    670             verbose=parser.options.verbose,
    671             no_console_prefix=parser.options.no_console_prefix)
    672 
    673     if ssp_url_warning:
    674         logging.warn(
    675                 'Autoserv is required to run with server-side packaging. '
    676                 'However, no server-side package can be staged based on '
    677                 '`--image`, host attribute job_repo_url or host OS version '
    678                 'label. It could be that the build to test is older than the '
    679                 'minimum version that supports server-side packaging, or no '
    680                 'devserver can be found to stage server-side package. The test '
    681                 'will be executed without using erver-side packaging. '
    682                 'Following is the detailed error:\n%s', ssp_error_msg)
    683 
    684     if results:
    685         logging.info("Results placed in %s" % results)
    686 
    687         # wait until now to perform this check, so it get properly logged
    688         if (parser.options.use_existing_results and not resultdir_exists and
    689             not utils.is_in_container()):
    690             logging.error("No existing results directory found: %s", results)
    691             sys.exit(1)
    692 
    693     logging.debug('autoserv is running in drone %s.', socket.gethostname())
    694     logging.debug('autoserv command was: %s', ' '.join(sys.argv))
    695 
    696     if parser.options.write_pidfile and results:
    697         pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
    698                                                   results)
    699         pid_file_manager.open_file()
    700     else:
    701         pid_file_manager = None
    702 
    703     autotest.Autotest.set_install_in_tmpdir(
    704         parser.options.install_in_tmpdir)
    705 
    706     exit_code = 0
    707     # TODO(beeps): Extend this to cover different failure modes.
    708     # Testing exceptions are matched against labels sent to autoserv. Eg,
    709     # to allow only the hostless job to run, specify
    710     # testing_exceptions: test_suite in the shadow_config. To allow both
    711     # the hostless job and dummy_Pass to run, specify
    712     # testing_exceptions: test_suite,dummy_Pass. You can figure out
    713     # what label autoserv is invoked with by looking through the logs of a test
    714     # for the autoserv command's -l option.
    715     testing_exceptions = _CONFIG.get_config_value(
    716             'AUTOSERV', 'testing_exceptions', type=list, default=[])
    717     test_mode = _CONFIG.get_config_value(
    718             'AUTOSERV', 'testing_mode', type=bool, default=False)
    719     test_mode = (results_mocker and test_mode and not
    720                  any([ex in parser.options.label
    721                       for ex in testing_exceptions]))
    722     is_task = (parser.options.verify or parser.options.repair or
    723                parser.options.provision or parser.options.reset or
    724                parser.options.cleanup or parser.options.collect_crashinfo)
    725 
    726     trace_labels = {
    727             'job_id': job_directories.get_job_id_or_task_id(
    728                     parser.options.results)
    729     }
    730     trace = cloud_trace.SpanStack(
    731             labels=trace_labels,
    732             global_context=parser.options.cloud_trace_context)
    733     trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
    734     try:
    735         try:
    736             if test_mode:
    737                 # The parser doesn't run on tasks anyway, so we can just return
    738                 # happy signals without faking results.
    739                 if not is_task:
    740                     machine = parser.options.results.split('/')[-1]
    741 
    742                     # TODO(beeps): The proper way to do this would be to
    743                     # refactor job creation so we can invoke job.record
    744                     # directly. To do that one needs to pipe the test_name
    745                     # through run_autoserv and bail just before invoking
    746                     # the server job. See the comment in
    747                     # puppylab/results_mocker for more context.
    748                     results_mocker.ResultsMocker(
    749                             'unknown-test', parser.options.results, machine
    750                             ).mock_results()
    751                 return
    752             else:
    753                 with trace.Span(get_job_status(parser.options)):
    754                     run_autoserv(pid_file_manager, results, parser, ssp_url,
    755                                  use_ssp)
    756         except SystemExit as e:
    757             exit_code = e.code
    758             if exit_code:
    759                 logging.exception('Uncaught SystemExit with code %s', exit_code)
    760         except Exception:
    761             # If we don't know what happened, we'll classify it as
    762             # an 'abort' and return 1.
    763             logging.exception('Uncaught Exception, exit_code = 1.')
    764             exit_code = 1
    765     finally:
    766         if pid_file_manager:
    767             pid_file_manager.close_file(exit_code)
    768         # Record the autoserv duration time. Must be called
    769         # just before the system exits to ensure accuracy.
    770         record_autoserv(parser.options, start_time)
    771     sys.exit(exit_code)
    772 
    773 
    774 if __name__ == '__main__':
    775     main()
    776