Home | History | Annotate | Download | only in server
      1 #!/usr/bin/python -u
      2 # Copyright 2007-2008 Martin J. Bligh <mbligh (at] google.com>, Google Inc.
      3 # Released under the GPL v2
      4 
      5 """
      6 Run a control file through the server side engine
      7 """
      8 
      9 import datetime
     10 import contextlib
     11 import getpass
     12 import logging
     13 import os
     14 import re
     15 import shutil
     16 import signal
     17 import socket
     18 import sys
     19 import traceback
     20 import time
     21 import urllib2
     22 
     23 import common
     24 from autotest_lib.client.bin.result_tools import utils as result_utils
     25 from autotest_lib.client.bin.result_tools import view as result_view
     26 from autotest_lib.client.common_lib import control_data
     27 from autotest_lib.client.common_lib import enum
     28 from autotest_lib.client.common_lib import error
     29 from autotest_lib.client.common_lib import global_config
     30 from autotest_lib.client.common_lib import host_queue_entry_states
     31 from autotest_lib.client.common_lib import host_states
     32 from autotest_lib.server import results_mocker
     33 from autotest_lib.server.cros.dynamic_suite import suite
     34 
     35 try:
     36     from chromite.lib import metrics
     37     from chromite.lib import cloud_trace
     38 except ImportError:
     39     from autotest_lib.client.common_lib import utils as common_utils
     40     metrics = common_utils.metrics_mock
     41     import mock
     42     cloud_trace = mock.MagicMock()
     43 
     44 _CONFIG = global_config.global_config
     45 
     46 # Number of seconds to wait before returning if testing mode is enabled
     47 TESTING_MODE_SLEEP_SECS = 1
     48 
     49 
     50 from autotest_lib.server import frontend
     51 from autotest_lib.server import server_logging_config
     52 from autotest_lib.server import server_job, utils, autoserv_parser, autotest
     53 from autotest_lib.server import utils as server_utils
     54 from autotest_lib.server import site_utils
     55 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     56 from autotest_lib.site_utils import job_directories
     57 from autotest_lib.site_utils import lxc
     58 from autotest_lib.site_utils.lxc import utils as lxc_utils
     59 from autotest_lib.client.common_lib import pidfile, logging_manager
     60 
     61 
     62 # Control segment to stage server-side package.
     63 STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
     64         'stage_server_side_package')
     65 
     66 # Command line to start servod in a moblab.
     67 START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
     68 STOP_SERVOD_CMD = 'sudo stop servod'
     69 
     70 _AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
     71 _CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name'
     72 
     73 _LXC_JOB_FOLDER = 'lxc_job_folder'
     74 
     75 def log_alarm(signum, frame):
     76     logging.error("Received SIGALARM. Ignoring and continuing on.")
     77     sys.exit(1)
     78 
     79 
     80 def _get_machines(parser):
     81     """Get a list of machine names from command line arg -m or a file.
     82 
     83     @param parser: Parser for the command line arguments.
     84 
     85     @return: A list of machine names from command line arg -m or the
     86              machines file specified in the command line arg -M.
     87     """
     88     if parser.options.machines:
     89         machines = parser.options.machines.replace(',', ' ').strip().split()
     90     else:
     91         machines = []
     92     machines_file = parser.options.machines_file
     93     if machines_file:
     94         machines = []
     95         for m in open(machines_file, 'r').readlines():
     96             # remove comments, spaces
     97             m = re.sub('#.*', '', m).strip()
     98             if m:
     99                 machines.append(m)
    100         logging.debug('Read list of machines from file: %s', machines_file)
    101         logging.debug('Machines: %s', ','.join(machines))
    102 
    103     if machines:
    104         for machine in machines:
    105             if not machine or re.search('\s', machine):
    106                 parser.parser.error("Invalid machine: %s" % str(machine))
    107         machines = list(set(machines))
    108         machines.sort()
    109     return machines
    110 
    111 
    112 def _stage_ssp(parser, resultsdir):
    113     """Stage server-side package.
    114 
    115     This function calls a control segment to stage server-side package based on
    116     the job and autoserv command line option. The detail implementation could
    117     be different for each host type. Currently, only CrosHost has
    118     stage_server_side_package function defined.
    119     The script returns None if no server-side package is available. However,
    120     it may raise exception if it failed for reasons other than artifact (the
    121     server-side package) not found.
    122 
    123     @param parser: Command line arguments parser passed in the autoserv process.
    124     @param resultsdir: Folder to store results. This could be different from
    125             parser.options.results: parser.options.results  can be set to None
    126             for results to be stored in a temp folder. resultsdir can be None
    127             for autoserv run requires no logging.
    128 
    129     @return: url to the autotest server-side package. None in case of errors.
    130     """
    131     machines_list = _get_machines(parser)
    132     machines_list = server_job.get_machine_dicts(
    133             machine_names=machines_list,
    134             store_dir=os.path.join(resultsdir, parser.options.host_info_subdir),
    135             in_lab=parser.options.lab,
    136             use_shadow_store=not parser.options.local_only_host_info,
    137             host_attributes=parser.options.host_attributes,
    138     )
    139 
    140     namespace = {'machines': machines_list,
    141                  'isolate_hash': parser.options.isolate,
    142                  'image': parser.options.test_source_build}
    143     script_locals = {}
    144     execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
    145     ssp_url = script_locals['ssp_url']
    146     if not ssp_url:
    147         logging.error('Failed to stage SSP package: %s',
    148                       script_locals['error_msg'])
    149         logging.error('This job will fail later, when attempting to run with'
    150                       ' SSP')
    151     return ssp_url
    152 
    153 
    154 def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url,
    155                   machines):
    156     """Run the server job with server-side packaging.
    157 
    158     @param job: The server job object.
    159     @param container_id: ID of the container to run the test.
    160     @param job_id: ID of the test job.
    161     @param results: Folder to store results. This could be different from
    162                     parser.options.results:
    163                     parser.options.results  can be set to None for results to be
    164                     stored in a temp folder.
    165                     results can be None for autoserv run requires no logging.
    166     @param parser: Command line parser that contains the options.
    167     @param ssp_url: url of the staged server-side package.
    168     @param machines: A list of machines to run the test.
    169     """
    170     if not ssp_url:
    171         job.record('FAIL', None, None,
    172                    'Failed to stage server-side package')
    173         raise error.AutoservError('Failed to stage server-side package')
    174 
    175     bucket = lxc.ContainerBucket()
    176     control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
    177                else None)
    178     try:
    179         dut_name = machines[0] if len(machines) >= 1 else None
    180         test_container = bucket.setup_test(container_id, job_id, ssp_url,
    181                                            results, control=control,
    182                                            job_folder=_LXC_JOB_FOLDER,
    183                                            dut_name=dut_name,
    184                                            isolate_hash=parser.options.isolate)
    185     except Exception as e:
    186         job.record('FAIL', None, None,
    187                    'Failed to setup container for test: %s. Check logs in '
    188                    'ssp_logs folder for more details.' % e)
    189         raise
    190 
    191     args = sys.argv[:]
    192     args.remove('--require-ssp')
    193     # --parent_job_id is only useful in autoserv running in host, not in
    194     # container. Include this argument will cause test to fail for builds before
    195     # CL 286265 was merged.
    196     if '--parent_job_id' in args:
    197         index = args.index('--parent_job_id')
    198         args.remove('--parent_job_id')
    199         # Remove the actual parent job id in command line arg.
    200         del args[index]
    201 
    202     # A dictionary of paths to replace in the command line. Key is the path to
    203     # be replaced with the one in value.
    204     paths_to_replace = {}
    205     # Replace the control file path with the one in container.
    206     if control:
    207         container_control_filename = os.path.join(
    208                 lxc.CONTROL_TEMP_PATH, os.path.basename(control))
    209         paths_to_replace[control] = container_control_filename
    210     # Update result directory with the one in container.
    211     container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER)
    212     if parser.options.results:
    213         paths_to_replace[parser.options.results] = container_result_dir
    214     args = [paths_to_replace.get(arg, arg) for arg in args]
    215 
    216     # Apply --use-existing-results, results directory is aready created and
    217     # mounted in container. Apply this arg to avoid exception being raised.
    218     if not '--use-existing-results' in args:
    219         args.append('--use-existing-results')
    220 
    221     # Make sure autoserv running in container using a different pid file.
    222     if not '--pidfile-label' in args:
    223         args.extend(['--pidfile-label', 'container_autoserv'])
    224 
    225     cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
    226     logging.info('Run command in container: %s', cmd_line)
    227     success = False
    228     try:
    229         test_container.attach_run(cmd_line)
    230         success = True
    231     except Exception as e:
    232         # If the test run inside container fails without generating any log,
    233         # write a message to status.log to help troubleshooting.
    234         debug_files = os.listdir(os.path.join(results, 'debug'))
    235         if not debug_files:
    236             job.record('FAIL', None, None,
    237                        'Failed to run test inside the container: %s. Check '
    238                        'logs in ssp_logs folder for more details.' % e)
    239         raise
    240     finally:
    241         metrics.Counter(
    242             'chromeos/autotest/experimental/execute_job_in_ssp').increment(
    243                 fields={'success': success})
    244         test_container.destroy()
    245 
    246 
    247 def correct_results_folder_permission(results):
    248     """Make sure the results folder has the right permission settings.
    249 
    250     For tests running with server-side packaging, the results folder has the
    251     owner of root. This must be changed to the user running the autoserv
    252     process, so parsing job can access the results folder.
    253     TODO(dshi): crbug.com/459344 Remove this function when test container can be
    254     unprivileged container.
    255 
    256     @param results: Path to the results folder.
    257 
    258     """
    259     if not results:
    260         return
    261 
    262     utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
    263     utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
    264 
    265 
    266 def _start_servod(machine):
    267     """Try to start servod in moblab if it's not already running or running with
    268     different board or port.
    269 
    270     @param machine: Name of the dut used for test.
    271     """
    272     if not utils.is_moblab():
    273         return
    274 
    275     logging.debug('Trying to start servod.')
    276     try:
    277         afe = frontend.AFE()
    278         board = server_utils.get_board_from_afe(machine, afe)
    279         hosts = afe.get_hosts(hostname=machine)
    280         servo_host = hosts[0].attributes.get('servo_host', None)
    281         servo_port = hosts[0].attributes.get('servo_port', 9999)
    282         if not servo_host in ['localhost', '127.0.0.1']:
    283             logging.warn('Starting servod is aborted. The dut\'s servo_host '
    284                          'attribute is not set to localhost.')
    285             return
    286     except (urllib2.HTTPError, urllib2.URLError):
    287         # Ignore error if RPC failed to get board
    288         logging.error('Failed to get board name from AFE. Start servod is '
    289                       'aborted')
    290         return
    291 
    292     try:
    293         pid = utils.run('pgrep servod').stdout
    294         cmd_line = utils.run('ps -fp %s' % pid).stdout
    295         if ('--board %s' % board in cmd_line and
    296             '--port %s' % servo_port in cmd_line):
    297             logging.debug('Servod is already running with given board and port.'
    298                           ' There is no need to restart servod.')
    299             return
    300         logging.debug('Servod is running with different board or port. '
    301                       'Stopping existing servod.')
    302         utils.run('sudo stop servod')
    303     except error.CmdError:
    304         # servod is not running.
    305         pass
    306 
    307     try:
    308         utils.run(START_SERVOD_CMD % (board, servo_port))
    309         logging.debug('Servod is started')
    310     except error.CmdError as e:
    311         logging.error('Servod failed to be started, error: %s', e)
    312 
    313 
    314 def _control_path_on_disk(control_name):
    315     """Find the control file corresponding to the given control name, on disk.
    316 
    317     @param control_name: NAME attribute of the control file to fetch.
    318     @return: Path to the control file.
    319     """
    320     cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT)
    321     control_name_predicate = suite.test_name_matches_pattern_predicate(
    322             '^%s$' % control_name)
    323     tests = suite.find_and_parse_tests(cf_getter, control_name_predicate)
    324     if not tests:
    325         raise error.AutoservError(
    326                 'Failed to find any control files with NAME %s' % control_name)
    327     if len(tests) > 1:
    328         logging.error('Found more than one control file with NAME %s: %s',
    329                       control_name, [t.path for t in tests])
    330         raise error.AutoservError(
    331                 'Found more than one control file with NAME %s' % control_name)
    332     return tests[0].path
    333 
    334 
    335 def _stage_control_file(control_name, results_dir):
    336     """Stage the control file to execute from local autotest checkout.
    337 
    338     @param control_name: Name of the control file to stage.
    339     @param results_dir: Results directory to stage the control file into.
    340     @return: Absolute path to the staged control file.
    341     """
    342     control_path = _control_path_on_disk(control_name)
    343     new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME)
    344     shutil.copy2(control_path, new_control)
    345     return new_control
    346 
    347 
    348 def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
    349     """Run server job with given options.
    350 
    351     @param pid_file_manager: PidFileManager used to monitor the autoserv process
    352     @param results: Folder to store results.
    353     @param parser: Parser for the command line arguments.
    354     @param ssp_url: Url to server-side package.
    355     @param use_ssp: Set to True to run with server-side packaging.
    356     """
    357     # send stdin to /dev/null
    358     dev_null = os.open(os.devnull, os.O_RDONLY)
    359     os.dup2(dev_null, sys.stdin.fileno())
    360     os.close(dev_null)
    361 
    362     # Create separate process group if the process is not a process group
    363     # leader. This allows autoserv process to keep running after the caller
    364     # process (drone manager call) exits.
    365     if os.getpid() != os.getpgid(0):
    366         os.setsid()
    367 
    368     # Container name is predefined so the container can be destroyed in
    369     # handle_sigterm.
    370     job_or_task_id = job_directories.get_job_id_or_task_id(
    371             parser.options.results)
    372     container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid())
    373 
    374     # Implement SIGTERM handler
    375     def handle_sigterm(signum, frame):
    376         logging.debug('Received SIGTERM')
    377         if pid_file_manager:
    378             pid_file_manager.close_file(1, signal.SIGTERM)
    379         logging.debug('Finished writing to pid_file. Killing process.')
    380 
    381         # Update results folder's file permission. This needs to be done ASAP
    382         # before the parsing process tries to access the log.
    383         if use_ssp and results:
    384             correct_results_folder_permission(results)
    385 
    386         # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
    387         # This sleep allows the pending output to be logged before the kill
    388         # signal is sent.
    389         time.sleep(.1)
    390         if use_ssp:
    391             logging.debug('Destroy container %s before aborting the autoserv '
    392                           'process.', container_id)
    393             try:
    394                 bucket = lxc.ContainerBucket()
    395                 container = bucket.get_container(container_id)
    396                 if container:
    397                     container.destroy()
    398                 else:
    399                     logging.debug('Container %s is not found.', container_id)
    400             except:
    401                 # Handle any exception so the autoserv process can be aborted.
    402                 logging.exception('Failed to destroy container %s.',
    403                                   container_id)
    404             # Try to correct the result file permission again after the
    405             # container is destroyed, as the container might have created some
    406             # new files in the result folder.
    407             if results:
    408                 correct_results_folder_permission(results)
    409 
    410         os.killpg(os.getpgrp(), signal.SIGKILL)
    411 
    412     # Set signal handler
    413     signal.signal(signal.SIGTERM, handle_sigterm)
    414 
    415     # faulthandler is only needed to debug in the Lab and is not avaliable to
    416     # be imported in the chroot as part of VMTest, so Try-Except it.
    417     try:
    418         import faulthandler
    419         faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
    420         logging.debug('faulthandler registered on SIGTERM.')
    421     except ImportError:
    422         sys.exc_clear()
    423 
    424     # Ignore SIGTTOU's generated by output from forked children.
    425     signal.signal(signal.SIGTTOU, signal.SIG_IGN)
    426 
    427     # If we received a SIGALARM, let's be loud about it.
    428     signal.signal(signal.SIGALRM, log_alarm)
    429 
    430     # Server side tests that call shell scripts often depend on $USER being set
    431     # but depending on how you launch your autotest scheduler it may not be set.
    432     os.environ['USER'] = getpass.getuser()
    433 
    434     label = parser.options.label
    435     group_name = parser.options.group_name
    436     user = parser.options.user
    437     client = parser.options.client
    438     server = parser.options.server
    439     verify = parser.options.verify
    440     repair = parser.options.repair
    441     cleanup = parser.options.cleanup
    442     provision = parser.options.provision
    443     reset = parser.options.reset
    444     job_labels = parser.options.job_labels
    445     no_tee = parser.options.no_tee
    446     execution_tag = parser.options.execution_tag
    447     ssh_user = parser.options.ssh_user
    448     ssh_port = parser.options.ssh_port
    449     ssh_pass = parser.options.ssh_pass
    450     collect_crashinfo = parser.options.collect_crashinfo
    451     control_filename = parser.options.control_filename
    452     verify_job_repo_url = parser.options.verify_job_repo_url
    453     skip_crash_collection = parser.options.skip_crash_collection
    454     ssh_verbosity = int(parser.options.ssh_verbosity)
    455     ssh_options = parser.options.ssh_options
    456     no_use_packaging = parser.options.no_use_packaging
    457     in_lab = bool(parser.options.lab)
    458 
    459     # can't be both a client and a server side test
    460     if client and server:
    461         parser.parser.error("Can not specify a test as both server and client!")
    462 
    463     if provision and client:
    464         parser.parser.error("Cannot specify provisioning and client!")
    465 
    466     is_special_task = (verify or repair or cleanup or collect_crashinfo or
    467                        provision or reset)
    468     use_client_trampoline = False
    469     if parser.options.control_name:
    470         if use_ssp:
    471             # When use_ssp is True, autoserv will be re-executed inside a
    472             # container preserving the --control-name argument. Control file
    473             # will be staged inside the rexecuted autoserv.
    474             control = None
    475         else:
    476             try:
    477                 control = _stage_control_file(parser.options.control_name,
    478                                               results)
    479             except error.AutoservError as e:
    480                 logging.info("Using client trampoline because of: %s", e)
    481                 control = parser.options.control_name
    482                 use_client_trampoline = True
    483 
    484     elif parser.args:
    485         control = parser.args[0]
    486     else:
    487         if not is_special_task:
    488             parser.parser.error("Missing argument: control file")
    489         control = None
    490 
    491     if ssh_verbosity > 0:
    492         # ssh_verbosity is an integer between 0 and 3, inclusive
    493         ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
    494     else:
    495         ssh_verbosity_flag = ''
    496 
    497     machines = _get_machines(parser)
    498     if group_name and len(machines) < 2:
    499         parser.parser.error('-G %r may only be supplied with more than one '
    500                             'machine.' % group_name)
    501 
    502     job_kwargs = {
    503             'control': control,
    504             'args': parser.args[1:],
    505             'resultdir': results,
    506             'label': label,
    507             'user': user,
    508             'machines': machines,
    509             'machine_dict_list': server_job.get_machine_dicts(
    510                     machine_names=machines,
    511                     store_dir=os.path.join(results,
    512                                            parser.options.host_info_subdir),
    513                     in_lab=in_lab,
    514                     use_shadow_store=not parser.options.local_only_host_info,
    515                     host_attributes=parser.options.host_attributes,
    516             ),
    517             'client': client,
    518             'ssh_user': ssh_user,
    519             'ssh_port': ssh_port,
    520             'ssh_pass': ssh_pass,
    521             'ssh_verbosity_flag': ssh_verbosity_flag,
    522             'ssh_options': ssh_options,
    523             'group_name': group_name,
    524             'tag': execution_tag,
    525             'disable_sysinfo': parser.options.disable_sysinfo,
    526             'in_lab': in_lab,
    527             'use_client_trampoline': use_client_trampoline,
    528     }
    529     if parser.options.parent_job_id:
    530         job_kwargs['parent_job_id'] = int(parser.options.parent_job_id)
    531     if control_filename:
    532         job_kwargs['control_filename'] = control_filename
    533     job = server_job.server_job(**job_kwargs)
    534 
    535     job.logging.start_logging()
    536 
    537     # perform checks
    538     job.precheck()
    539 
    540     # run the job
    541     exit_code = 0
    542     auto_start_servod = _CONFIG.get_config_value(
    543             'AUTOSERV', 'auto_start_servod', type=bool, default=False)
    544 
    545     site_utils.SetupTsMonGlobalState('autoserv', indirect=False,
    546                                      short_lived=True)
    547     try:
    548         try:
    549             if repair:
    550                 if auto_start_servod and len(machines) == 1:
    551                     _start_servod(machines[0])
    552                 job.repair(job_labels)
    553             elif verify:
    554                 job.verify(job_labels)
    555             elif provision:
    556                 job.provision(job_labels)
    557             elif reset:
    558                 job.reset(job_labels)
    559             elif cleanup:
    560                 job.cleanup(job_labels)
    561             else:
    562                 if auto_start_servod and len(machines) == 1:
    563                     _start_servod(machines[0])
    564                 if use_ssp:
    565                     try:
    566                         _run_with_ssp(job, container_id, job_or_task_id,
    567                                         results, parser, ssp_url, machines)
    568                     finally:
    569                         # Update the ownership of files in result folder.
    570                         correct_results_folder_permission(results)
    571                 else:
    572                     if collect_crashinfo:
    573                         # Update the ownership of files in result folder. If the
    574                         # job to collect crashinfo was running inside container
    575                         # (SSP) and crashed before correcting folder permission,
    576                         # the result folder might have wrong permission setting.
    577                         try:
    578                             correct_results_folder_permission(results)
    579                         except:
    580                             # Ignore any error as the user may not have root
    581                             # permission to run sudo command.
    582                             pass
    583                     metric_name = ('chromeos/autotest/experimental/'
    584                                    'autoserv_job_run_duration')
    585                     f = {'in_container': utils.is_in_container(),
    586                          'success': False}
    587                     with metrics.SecondsTimer(metric_name, fields=f) as c:
    588                         job.run(verify_job_repo_url=verify_job_repo_url,
    589                                 only_collect_crashinfo=collect_crashinfo,
    590                                 skip_crash_collection=skip_crash_collection,
    591                                 job_labels=job_labels,
    592                                 use_packaging=(not no_use_packaging))
    593                         c['success'] = True
    594 
    595         finally:
    596             job.close()
    597             # Special task doesn't run parse, so result summary needs to be
    598             # built here.
    599             if results and (repair or verify or reset or cleanup or provision):
    600                 # Throttle the result on the server side.
    601                 try:
    602                     result_utils.execute(
    603                             results, control_data.DEFAULT_MAX_RESULT_SIZE_KB)
    604                 except:
    605                     logging.exception(
    606                             'Non-critical failure: Failed to throttle results '
    607                             'in directory %s.', results)
    608                 # Build result view and report metrics for result sizes.
    609                 site_utils.collect_result_sizes(results)
    610     except:
    611         exit_code = 1
    612         traceback.print_exc()
    613     finally:
    614         metrics.Flush()
    615 
    616     sys.exit(exit_code)
    617 
    618 
    619 # Job breakdown statuses
    620 _hs = host_states.Status
    621 _qs = host_queue_entry_states.Status
    622 _status_list = [
    623         _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING,
    624         _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING,
    625         _qs.RUNNING, _qs.GATHERING, _qs.PARSING]
    626 _JOB_OVERHEAD_STATUS = enum.Enum(*_status_list, string_values=True)
    627 
    628 
    629 def get_job_status(options):
    630     """Returns the HQE Status for this run.
    631 
    632     @param options: parser options.
    633     """
    634     s = _JOB_OVERHEAD_STATUS
    635     task_mapping = {
    636             'reset': s.RESETTING, 'verify': s.VERIFYING,
    637             'provision': s.PROVISIONING, 'repair': s.REPAIRING,
    638             'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
    639     match = [task for task in task_mapping if getattr(options, task, False)]
    640     return task_mapping[match[0]] if match else s.RUNNING
    641 
    642 
    643 def _require_ssp_from_control(control_name):
    644     """Read the value of REQUIRE_SSP from test control file.
    645 
    646     This reads the control file from the prod checkout of autotest and uses that
    647     to determine whether to even stage the SSP package on a devserver.
    648 
    649     This means:
    650     [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go
    651     live.
    652     [2] This function may find that the control file does not exist but the SSP
    653     package may contain the test file. This function conservatively returns True
    654     in that case.
    655 
    656     This function is called very early in autoserv, before logging is setup.
    657     """
    658     if not control_name:
    659         return True
    660     try:
    661         path = _control_path_on_disk(control_name)
    662     except error.AutoservError as e:
    663         sys.stderr.write("autoserv: Could not determine control file path,"
    664                          " assuming we need SSP: %s\n" % e)
    665         sys.stderr.flush()
    666         return True
    667     if not os.path.isfile(path):
    668         return True
    669     control = control_data.parse_control(path)
    670     # There must be explicit directive in the control file to disable SSP.
    671     if not control or control.require_ssp is None:
    672         return True
    673     return control.require_ssp
    674 
    675 
    676 def main():
    677     start_time = datetime.datetime.now()
    678     parser = autoserv_parser.autoserv_parser
    679     parser.parse_args()
    680 
    681     if len(sys.argv) == 1:
    682         parser.parser.print_help()
    683         sys.exit(1)
    684 
    685     if parser.options.no_logging:
    686         results = None
    687     else:
    688         results = parser.options.results
    689         if not results:
    690             results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
    691         results = os.path.abspath(results)
    692         resultdir_exists = False
    693         for filename in ('control.srv', 'status.log', '.autoserv_execute'):
    694             if os.path.exists(os.path.join(results, filename)):
    695                 resultdir_exists = True
    696         if not parser.options.use_existing_results and resultdir_exists:
    697             error = "Error: results directory already exists: %s\n" % results
    698             sys.stderr.write(error)
    699             sys.exit(1)
    700 
    701         # Now that we certified that there's no leftover results dir from
    702         # previous jobs, lets create the result dir since the logging system
    703         # needs to create the log file in there.
    704         if not os.path.isdir(results):
    705             os.makedirs(results)
    706 
    707     if parser.options.require_ssp:
    708         # This is currently only used for skylab (i.e., when --control-name is
    709         # used).
    710         use_ssp = _require_ssp_from_control(parser.options.control_name)
    711     else:
    712         use_ssp = False
    713 
    714 
    715     if use_ssp:
    716         log_dir = os.path.join(results, 'ssp_logs') if results else None
    717         if log_dir and not os.path.exists(log_dir):
    718             os.makedirs(log_dir)
    719     else:
    720         log_dir = results
    721 
    722     logging_manager.configure_logging(
    723             server_logging_config.ServerLoggingConfig(),
    724             results_dir=log_dir,
    725             use_console=not parser.options.no_tee,
    726             verbose=parser.options.verbose,
    727             no_console_prefix=parser.options.no_console_prefix)
    728 
    729     logging.debug('autoserv is running in drone %s.', socket.gethostname())
    730     logging.debug('autoserv command was: %s', ' '.join(sys.argv))
    731     logging.debug('autoserv parsed options: %s', parser.options)
    732 
    733     if use_ssp:
    734         ssp_url = _stage_ssp(parser, results)
    735     else:
    736         ssp_url = None
    737 
    738     if results:
    739         logging.info("Results placed in %s" % results)
    740 
    741         # wait until now to perform this check, so it get properly logged
    742         if (parser.options.use_existing_results and not resultdir_exists and
    743             not utils.is_in_container()):
    744             logging.error("No existing results directory found: %s", results)
    745             sys.exit(1)
    746 
    747     if parser.options.write_pidfile and results:
    748         pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
    749                                                   results)
    750         pid_file_manager.open_file()
    751     else:
    752         pid_file_manager = None
    753 
    754     autotest.Autotest.set_install_in_tmpdir(
    755         parser.options.install_in_tmpdir)
    756 
    757     exit_code = 0
    758     # TODO(beeps): Extend this to cover different failure modes.
    759     # Testing exceptions are matched against labels sent to autoserv. Eg,
    760     # to allow only the hostless job to run, specify
    761     # testing_exceptions: test_suite in the shadow_config. To allow both
    762     # the hostless job and dummy_Pass to run, specify
    763     # testing_exceptions: test_suite,dummy_Pass. You can figure out
    764     # what label autoserv is invoked with by looking through the logs of a test
    765     # for the autoserv command's -l option.
    766     testing_exceptions = _CONFIG.get_config_value(
    767             'AUTOSERV', 'testing_exceptions', type=list, default=[])
    768     test_mode = _CONFIG.get_config_value(
    769             'AUTOSERV', 'testing_mode', type=bool, default=False)
    770     test_mode = (results_mocker and test_mode and not
    771                  any([ex in parser.options.label
    772                       for ex in testing_exceptions]))
    773     is_task = (parser.options.verify or parser.options.repair or
    774                parser.options.provision or parser.options.reset or
    775                parser.options.cleanup or parser.options.collect_crashinfo)
    776 
    777     trace_labels = {
    778             'job_id': job_directories.get_job_id_or_task_id(
    779                     parser.options.results)
    780     }
    781     trace = cloud_trace.SpanStack(
    782             labels=trace_labels,
    783             global_context=parser.options.cloud_trace_context)
    784     trace.enabled = parser.options.cloud_trace_context_enabled == 'True'
    785     try:
    786         try:
    787             if test_mode:
    788                 # The parser doesn't run on tasks anyway, so we can just return
    789                 # happy signals without faking results.
    790                 if not is_task:
    791                     machine = parser.options.results.split('/')[-1]
    792 
    793                     # TODO(beeps): The proper way to do this would be to
    794                     # refactor job creation so we can invoke job.record
    795                     # directly. To do that one needs to pipe the test_name
    796                     # through run_autoserv and bail just before invoking
    797                     # the server job. See the comment in
    798                     # puppylab/results_mocker for more context.
    799                     results_mocker.ResultsMocker(
    800                             'unknown-test', parser.options.results, machine
    801                             ).mock_results()
    802                 return
    803             else:
    804                 with trace.Span(get_job_status(parser.options)):
    805                     run_autoserv(pid_file_manager, results, parser, ssp_url,
    806                                  use_ssp)
    807         except SystemExit as e:
    808             exit_code = e.code
    809             if exit_code:
    810                 logging.exception('Uncaught SystemExit with code %s', exit_code)
    811         except Exception:
    812             # If we don't know what happened, we'll classify it as
    813             # an 'abort' and return 1.
    814             logging.exception('Uncaught Exception, exit_code = 1.')
    815             exit_code = 1
    816     finally:
    817         if pid_file_manager:
    818             pid_file_manager.close_file(exit_code)
    819     sys.exit(exit_code)
    820 
    821 
    822 if __name__ == '__main__':
    823     main()
    824