Home | History | Annotate | Download | only in hosts
      1 # Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 #
      5 # Expects to be run in an environment with sudo and no interactive password
      6 # prompt, such as within the Chromium OS development chroot.
      7 
      8 
      9 """This file provides core logic for servo verify/repair process."""
     10 
     11 
     12 import httplib
     13 import logging
     14 import socket
     15 import xmlrpclib
     16 
     17 from autotest_lib.client.bin import utils
     18 from autotest_lib.client.common_lib import control_data
     19 from autotest_lib.client.common_lib import error
     20 from autotest_lib.client.common_lib import global_config
     21 from autotest_lib.client.common_lib import host_states
     22 from autotest_lib.client.common_lib import hosts
     23 from autotest_lib.client.common_lib import lsbrelease_utils
     24 from autotest_lib.client.common_lib.cros import autoupdater
     25 from autotest_lib.client.common_lib.cros import dev_server
     26 from autotest_lib.client.common_lib.cros import retry
     27 from autotest_lib.client.common_lib.cros.network import ping_runner
     28 from autotest_lib.client.cros import constants as client_constants
     29 from autotest_lib.server import afe_utils
     30 from autotest_lib.server import site_utils as server_site_utils
     31 from autotest_lib.server.cros import dnsname_mangler
     32 from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
     33 from autotest_lib.server.cros.dynamic_suite import control_file_getter
     34 from autotest_lib.server.cros.servo import servo
     35 from autotest_lib.server.hosts import servo_repair
     36 from autotest_lib.server.hosts import ssh_host
     37 from autotest_lib.site_utils.rpm_control_system import rpm_client
     38 
     39 try:
     40     from chromite.lib import metrics
     41 except ImportError:
     42     metrics = utils.metrics_mock
     43 
     44 
     45 # Names of the host attributes in the database that represent the values for
     46 # the servo_host and servo_port for a servo connected to the DUT.
     47 SERVO_HOST_ATTR = 'servo_host'
     48 SERVO_PORT_ATTR = 'servo_port'
     49 SERVO_BOARD_ATTR = 'servo_board'
     50 SERVO_SERIAL_ATTR = 'servo_serial'
     51 
     52 _CONFIG = global_config.global_config
     53 ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
     54         'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
     55 
     56 AUTOTEST_BASE = _CONFIG.get_config_value(
     57         'SCHEDULER', 'drone_installation_directory',
     58         default='/usr/local/autotest')
     59 
     60 _SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
     61 _SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
     62 
     63 class ServoHost(ssh_host.SSHHost):
     64     """Host class for a host that controls a servo, e.g. beaglebone."""
     65 
     66     DEFAULT_PORT = 9999
     67 
     68     # Timeout for initializing servo signals.
     69     INITIALIZE_SERVO_TIMEOUT_SECS = 60
     70 
     71     # Ready test function
     72     SERVO_READY_METHOD = 'get_version'
     73 
     74     REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
     75 
     76 
     77     def _initialize(self, servo_host='localhost',
     78                     servo_port=DEFAULT_PORT, servo_board=None,
     79                     servo_serial=None, is_in_lab=None, *args, **dargs):
     80         """Initialize a ServoHost instance.
     81 
     82         A ServoHost instance represents a host that controls a servo.
     83 
     84         @param servo_host: Name of the host where the servod process
     85                            is running.
     86         @param servo_port: Port the servod process is listening on.
     87         @param servo_board: Board that the servo is connected to.
     88         @param is_in_lab: True if the servo host is in Cros Lab. Default is set
     89                           to None, for which utils.host_is_in_lab_zone will be
     90                           called to check if the servo host is in Cros lab.
     91 
     92         """
     93         super(ServoHost, self)._initialize(hostname=servo_host,
     94                                            *args, **dargs)
     95         self.servo_port = servo_port
     96         self.servo_board = servo_board
     97         self.servo_serial = servo_serial
     98         self._servo = None
     99         self._repair_strategy = (
    100                 servo_repair.create_servo_repair_strategy())
    101         self._is_localhost = (self.hostname == 'localhost')
    102         if self._is_localhost:
    103             self._is_in_lab = False
    104         elif is_in_lab is None:
    105             self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
    106         else:
    107             self._is_in_lab = is_in_lab
    108 
    109         # Commands on the servo host must be run by the superuser.
    110         # Our account on a remote host is root, but if our target is
    111         # localhost then we might be running unprivileged.  If so,
    112         # `sudo` will have to be added to the commands.
    113         if self._is_localhost:
    114             self._sudo_required = utils.system_output('id -u') != '0'
    115         else:
    116             self._sudo_required = False
    117 
    118 
    119     def connect_servo(self):
    120         """Establish a connection to the servod server on this host.
    121 
    122         Initializes `self._servo` and then verifies that all network
    123         connections are working.  This will create an ssh tunnel if
    124         it's required.
    125 
    126         As a side effect of testing the connection, all signals on the
    127         target servo are reset to default values, and the USB stick is
    128         set to the neutral (off) position.
    129         """
    130         servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
    131         timeout, _ = retry.timeout(
    132                 servo_obj.initialize_dut,
    133                 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
    134         if timeout:
    135             raise hosts.AutoservVerifyError(
    136                     'Servo initialize timed out.')
    137         self._servo = servo_obj
    138 
    139 
    140     def disconnect_servo(self):
    141         """Disconnect our servo if it exists.
    142 
    143         If we've previously successfully connected to our servo,
    144         disconnect any established ssh tunnel, and set `self._servo`
    145         back to `None`.
    146         """
    147         if self._servo:
    148             # N.B. This call is safe even without a tunnel:
    149             # rpc_server_tracker.disconnect() silently ignores
    150             # unknown ports.
    151             self.rpc_server_tracker.disconnect(self.servo_port)
    152             self._servo = None
    153 
    154 
    155     def is_in_lab(self):
    156         """Check whether the servo host is a lab device.
    157 
    158         @returns: True if the servo host is in Cros Lab, otherwise False.
    159 
    160         """
    161         return self._is_in_lab
    162 
    163 
    164     def is_localhost(self):
    165         """Checks whether the servo host points to localhost.
    166 
    167         @returns: True if it points to localhost, otherwise False.
    168 
    169         """
    170         return self._is_localhost
    171 
    172 
    173     def get_servod_server_proxy(self):
    174         """Return a proxy that can be used to communicate with servod server.
    175 
    176         @returns: An xmlrpclib.ServerProxy that is connected to the servod
    177                   server on the host.
    178         """
    179         if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
    180             return self.rpc_server_tracker.xmlrpc_connect(
    181                     None, self.servo_port,
    182                     ready_test_name=self.SERVO_READY_METHOD,
    183                     timeout_seconds=60)
    184         else:
    185             remote = 'http://%s:%s' % (self.hostname, self.servo_port)
    186             return xmlrpclib.ServerProxy(remote)
    187 
    188 
    189     def is_cros_host(self):
    190         """Check if a servo host is running chromeos.
    191 
    192         @return: True if the servo host is running chromeos.
    193             False if it isn't, or we don't have enough information.
    194         """
    195         try:
    196             result = self.run('grep -q CHROMEOS /etc/lsb-release',
    197                               ignore_status=True, timeout=10)
    198         except (error.AutoservRunError, error.AutoservSSHTimeout):
    199             return False
    200         return result.exit_status == 0
    201 
    202 
    203     def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
    204                          connect_timeout=None, alive_interval=None,
    205                          alive_count_max=None, connection_attempts=None):
    206         """Override default make_ssh_command to use tuned options.
    207 
    208         Tuning changes:
    209           - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
    210           connection failure. Consistency with remote_access.py.
    211 
    212           - ServerAliveInterval=180; which causes SSH to ping connection every
    213           180 seconds. In conjunction with ServerAliveCountMax ensures
    214           that if the connection dies, Autotest will bail out quickly.
    215 
    216           - ServerAliveCountMax=3; consistency with remote_access.py.
    217 
    218           - ConnectAttempts=4; reduce flakiness in connection errors;
    219           consistency with remote_access.py.
    220 
    221           - UserKnownHostsFile=/dev/null; we don't care about the keys.
    222 
    223           - SSH protocol forced to 2; needed for ServerAliveInterval.
    224 
    225         @param user User name to use for the ssh connection.
    226         @param port Port on the target host to use for ssh connection.
    227         @param opts Additional options to the ssh command.
    228         @param hosts_file Ignored.
    229         @param connect_timeout Ignored.
    230         @param alive_interval Ignored.
    231         @param alive_count_max Ignored.
    232         @param connection_attempts Ignored.
    233 
    234         @returns: An ssh command with the requested settings.
    235 
    236         """
    237         options = ' '.join([opts, '-o Protocol=2'])
    238         return super(ServoHost, self).make_ssh_command(
    239             user=user, port=port, opts=options, hosts_file='/dev/null',
    240             connect_timeout=30, alive_interval=180, alive_count_max=3,
    241             connection_attempts=4)
    242 
    243 
    244     def _make_scp_cmd(self, sources, dest):
    245         """Format scp command.
    246 
    247         Given a list of source paths and a destination path, produces the
    248         appropriate scp command for encoding it. Remote paths must be
    249         pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
    250         to allow additional ssh options.
    251 
    252         @param sources: A list of source paths to copy from.
    253         @param dest: Destination path to copy to.
    254 
    255         @returns: An scp command that copies |sources| on local machine to
    256                   |dest| on the remote servo host.
    257 
    258         """
    259         command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
    260                    '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
    261         return command % (self.master_ssh_option,
    262                           self.port, ' '.join(sources), dest)
    263 
    264 
    265     def run(self, command, timeout=3600, ignore_status=False,
    266             stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
    267             connect_timeout=30, ssh_failure_retry_ok=False,
    268             options='', stdin=None, verbose=True, args=()):
    269         """Run a command on the servo host.
    270 
    271         Extends method `run` in SSHHost. If the servo host is a remote device,
    272         it will call `run` in SSHost without changing anything.
    273         If the servo host is 'localhost', it will call utils.system_output.
    274 
    275         @param command: The command line string.
    276         @param timeout: Time limit in seconds before attempting to
    277                         kill the running process. The run() function
    278                         will take a few seconds longer than 'timeout'
    279                         to complete if it has to kill the process.
    280         @param ignore_status: Do not raise an exception, no matter
    281                               what the exit code of the command is.
    282         @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
    283         @param connect_timeout: SSH connection timeout (in seconds)
    284                                 Ignored if host is 'localhost'.
    285         @param options: String with additional ssh command options
    286                         Ignored if host is 'localhost'.
    287         @param ssh_failure_retry_ok: when True and ssh connection failure is
    288                                      suspected, OK to retry command (but not
    289                                      compulsory, and likely not needed here)
    290         @param stdin: Stdin to pass (a string) to the executed command.
    291         @param verbose: Log the commands.
    292         @param args: Sequence of strings to pass as arguments to command by
    293                      quoting them in " and escaping their contents if necessary.
    294 
    295         @returns: A utils.CmdResult object.
    296 
    297         @raises AutoservRunError if the command failed.
    298         @raises AutoservSSHTimeout SSH connection has timed out. Only applies
    299                 when servo host is not 'localhost'.
    300 
    301         """
    302         run_args = {'command': command, 'timeout': timeout,
    303                     'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
    304                     'stderr_tee': stderr_tee, 'stdin': stdin,
    305                     'verbose': verbose, 'args': args}
    306         if self.is_localhost():
    307             if self._sudo_required:
    308                 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
    309                         command)
    310             try:
    311                 return utils.run(**run_args)
    312             except error.CmdError as e:
    313                 logging.error(e)
    314                 raise error.AutoservRunError('command execution error',
    315                                              e.result_obj)
    316         else:
    317             run_args['connect_timeout'] = connect_timeout
    318             run_args['options'] = options
    319             return super(ServoHost, self).run(**run_args)
    320 
    321 
    322     def _get_release_version(self):
    323         """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
    324 
    325         @returns The version string in lsb-release, under attribute
    326                  CHROMEOS_RELEASE_VERSION.
    327         """
    328         lsb_release_content = self.run(
    329                     'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
    330         return lsbrelease_utils.get_chromeos_release_version(
    331                     lsb_release_content=lsb_release_content)
    332 
    333 
    334     def get_attached_duts(self, afe):
    335         """Gather a list of duts that use this servo host.
    336 
    337         @param afe: afe instance.
    338 
    339         @returns list of duts.
    340         """
    341         return afe.get_hosts_by_attribute(
    342                 attribute=SERVO_HOST_ATTR, value=self.hostname)
    343 
    344 
    345     def get_board(self):
    346         """Determine the board for this servo host.
    347 
    348         @returns a string representing this servo host's board.
    349         """
    350         return lsbrelease_utils.get_current_board(
    351                 lsb_release_content=self.run('cat /etc/lsb-release').stdout)
    352 
    353 
    354     def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
    355         """Choose which dut to schedule servo host reboot job.
    356 
    357         We'll want a semi-deterministic way of selecting which host should be
    358         scheduled for the servo host reboot job.  For now we'll sort the
    359         list with the expectation the dut list will stay consistent.
    360         From there we'll grab the first dut that is available so we
    361         don't schedule a job on a dut that will never run.
    362 
    363         @param dut_list:  List of the dut hostnames to choose from.
    364         @param afe:       Instance of the AFE.
    365 
    366         @return hostname of dut to schedule job on.
    367         """
    368         afe_hosts = afe.get_hosts(dut_list)
    369         afe_hosts.sort()
    370         for afe_host in afe_hosts:
    371             if afe_host.status not in host_states.UNAVAILABLE_STATES:
    372                 return afe_host.hostname
    373         # If they're all unavailable, just return the first sorted dut.
    374         dut_list.sort()
    375         return dut_list[0]
    376 
    377 
    378     def _sync_job_scheduled_for_duts(self, dut_list, afe):
    379         """Checks if a synchronized reboot has been scheduled for these duts.
    380 
    381         Grab all the host queue entries that aren't completed for the duts and
    382         see if any of them have the expected job name.
    383 
    384         @param dut_list:  List of duts to check on.
    385         @param afe:       Instance of the AFE.
    386 
    387         @returns True if the job is scheduled, False otherwise.
    388         """
    389         afe_hosts = afe.get_hosts(dut_list)
    390         for afe_host in afe_hosts:
    391             hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
    392             for hqe in hqes:
    393                 job = afe.get_jobs(id=hqe.job.id)
    394                 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
    395                                            _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
    396                     return True
    397         return False
    398 
    399 
    400     def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
    401         """Schedule a job to reboot the servo host.
    402 
    403         When we schedule a job, it will create a ServoHost object which will
    404         go through this entire flow of checking if a reboot is needed and
    405         trying to schedule it.  There is probably a better approach to setting
    406         up a synchronized reboot but I'm coming up short on better ideas so I
    407         apologize for this circus show.
    408 
    409         @param dut_list:      List of duts that need to be locked.
    410         @param afe:           Instance of afe.
    411         @param force_reboot:  Boolean to indicate if a forced reboot should be
    412                               scheduled or not.
    413         """
    414         # If we've already scheduled job on a dut, we're done here.
    415         if self._sync_job_scheduled_for_duts(dut_list, afe):
    416             return
    417 
    418         # Looks like we haven't scheduled a job yet.
    419         test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
    420                 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
    421         dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
    422         getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
    423         control_file = getter.get_control_file_contents_by_name(test)
    424         control_type = control_data.CONTROL_TYPE_NAMES.SERVER
    425         try:
    426             afe.create_job(control_file=control_file, name=test,
    427                            control_type=control_type, hosts=[dut])
    428         except Exception as e:
    429             # Sometimes creating the job will raise an exception. We'll log it
    430             # but we don't want to fail because of it.
    431             logging.exception('Scheduling reboot job failed due to Exception.')
    432 
    433 
    434     def reboot(self, *args, **dargs):
    435         """Reboot using special servo host reboot command."""
    436         super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
    437                                       *args, **dargs)
    438 
    439 
    440     def _check_for_reboot(self, updater):
    441         """Reboot this servo host if an upgrade is waiting.
    442 
    443         If the host has successfully downloaded and finalized a new
    444         build, reboot.
    445 
    446         @param updater: a ChromiumOSUpdater instance for checking
    447             whether reboot is needed.
    448         @return Return a (status, build) tuple reflecting the
    449             update_engine status and current build of the host
    450             at the end of the call.
    451         """
    452         current_build_number = self._get_release_version()
    453         status = updater.check_update_status()
    454         if status == autoupdater.UPDATER_NEED_REBOOT:
    455             # Check if we need to schedule an organized reboot.
    456             afe = frontend_wrappers.RetryingAFE(
    457                     timeout_min=5, delay_sec=10,
    458                     server=server_site_utils.get_global_afe_hostname())
    459             dut_list = self.get_attached_duts(afe)
    460             logging.info('servo host has the following duts: %s', dut_list)
    461             if len(dut_list) > 1:
    462                 logging.info('servo host has multiple duts, scheduling '
    463                              'synchronized reboot')
    464                 self.schedule_synchronized_reboot(dut_list, afe)
    465                 return status, current_build_number
    466 
    467             logging.info('Rebooting servo host %s from build %s',
    468                          self.hostname, current_build_number)
    469             # Tell the reboot() call not to wait for completion.
    470             # Otherwise, the call will log reboot failure if servo does
    471             # not come back.  The logged reboot failure will lead to
    472             # test job failure.  If the test does not require servo, we
    473             # don't want servo failure to fail the test with error:
    474             # `Host did not return from reboot` in status.log.
    475             self.reboot(fastsync=True, wait=False)
    476 
    477             # We told the reboot() call not to wait, but we need to wait
    478             # for the reboot before we continue.  Alas.  The code from
    479             # here below is basically a copy of Host.wait_for_restart(),
    480             # with the logging bits ripped out, so that they can't cause
    481             # the failure logging problem described above.
    482             #
    483             # The black stain that this has left on my soul can never be
    484             # erased.
    485             old_boot_id = self.get_boot_id()
    486             if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
    487                                   warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
    488                                   old_boot_id=old_boot_id):
    489                 raise error.AutoservHostError(
    490                         'servo host %s failed to shut down.' %
    491                         self.hostname)
    492             if self.wait_up(timeout=120):
    493                 current_build_number = self._get_release_version()
    494                 status = updater.check_update_status()
    495                 logging.info('servo host %s back from reboot, with build %s',
    496                              self.hostname, current_build_number)
    497             else:
    498                 raise error.AutoservHostError(
    499                         'servo host %s failed to come back from reboot.' %
    500                         self.hostname)
    501         return status, current_build_number
    502 
    503 
    504     def update_image(self, wait_for_update=False):
    505         """Update the image on the servo host, if needed.
    506 
    507         This method recognizes the following cases:
    508           * If the Host is not running Chrome OS, do nothing.
    509           * If a previously triggered update is now complete, reboot
    510             to the new version.
    511           * If the host is processing a previously triggered update,
    512             do nothing.
    513           * If the host is running a version of Chrome OS different
    514             from the default for servo Hosts, trigger an update, but
    515             don't wait for it to complete.
    516 
    517         @param wait_for_update If an update needs to be applied and
    518             this is true, then don't return until the update is
    519             downloaded and finalized, and the host rebooted.
    520         @raises dev_server.DevServerException: If all the devservers are down.
    521         @raises site_utils.ParseBuildNameException: If the devserver returns
    522             an invalid build name.
    523         @raises autoupdater.ChromiumOSError: If something goes wrong in the
    524             checking update engine client status or applying an update.
    525         @raises AutoservRunError: If the update_engine_client isn't present on
    526             the host, and the host is a cros_host.
    527 
    528         """
    529         # servod could be running in a Ubuntu workstation.
    530         if not self.is_cros_host():
    531             logging.info('Not attempting an update, either %s is not running '
    532                          'chromeos or we cannot find enough information about '
    533                          'the host.', self.hostname)
    534             return
    535 
    536         if lsbrelease_utils.is_moblab():
    537             logging.info('Not attempting an update, %s is running moblab.',
    538                          self.hostname)
    539             return
    540 
    541         target_build = afe_utils.get_stable_cros_image_name(self.get_board())
    542         target_build_number = server_site_utils.ParseBuildName(
    543                 target_build)[3]
    544         # For servo image staging, we want it as more widely distributed as
    545         # possible, so that devservers' load can be evenly distributed. So use
    546         # hostname instead of target_build as hash.
    547         ds = dev_server.ImageServer.resolve(self.hostname,
    548                                             hostname=self.hostname)
    549         url = ds.get_update_url(target_build)
    550 
    551         updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
    552         status, current_build_number = self._check_for_reboot(updater)
    553         update_pending = True
    554         if status in autoupdater.UPDATER_PROCESSING_UPDATE:
    555             logging.info('servo host %s already processing an update, update '
    556                          'engine client status=%s', self.hostname, status)
    557         elif status == autoupdater.UPDATER_NEED_REBOOT:
    558             return
    559         elif current_build_number != target_build_number:
    560             logging.info('Using devserver url: %s to trigger update on '
    561                          'servo host %s, from %s to %s', url, self.hostname,
    562                          current_build_number, target_build_number)
    563             try:
    564                 ds.stage_artifacts(target_build,
    565                                    artifacts=['full_payload'])
    566             except Exception as e:
    567                 logging.error('Staging artifacts failed: %s', str(e))
    568                 logging.error('Abandoning update for this cycle.')
    569             else:
    570                 try:
    571                     # TODO(jrbarnette): This 'touch' is a gross hack
    572                     # to get us past crbug.com/613603.  Once that
    573                     # bug is resolved, we should remove this code.
    574                     self.run('touch /home/chronos/.oobe_completed')
    575                     updater.trigger_update()
    576                 except autoupdater.RootFSUpdateError as e:
    577                     trigger_download_status = 'failed with %s' % str(e)
    578                     metrics.Counter('chromeos/autotest/servo/'
    579                                     'rootfs_update_failed').increment()
    580                 else:
    581                     trigger_download_status = 'passed'
    582                 logging.info('Triggered download and update %s for %s, '
    583                              'update engine currently in status %s',
    584                              trigger_download_status, self.hostname,
    585                              updater.check_update_status())
    586         else:
    587             logging.info('servo host %s does not require an update.',
    588                          self.hostname)
    589             update_pending = False
    590 
    591         if update_pending and wait_for_update:
    592             logging.info('Waiting for servo update to complete.')
    593             self.run('update_engine_client --follow', ignore_status=True)
    594 
    595 
    596     def verify(self, silent=False):
    597         """Update the servo host and verify it's in a good state.
    598 
    599         @param silent   If true, suppress logging in `status.log`.
    600         """
    601         # TODO(jrbarnette) Old versions of beaglebone_servo include
    602         # the powerd package.  If you touch the .oobe_completed file
    603         # (as we do to work around an update_engine problem), then
    604         # powerd will eventually shut down the beaglebone for lack
    605         # of (apparent) activity.  Current versions of
    606         # beaglebone_servo don't have powerd, but until we can purge
    607         # the lab of the old images, we need to make sure powerd
    608         # isn't running.
    609         self.run('stop powerd', ignore_status=True)
    610         try:
    611             self._repair_strategy.verify(self, silent)
    612         except:
    613             self.disconnect_servo()
    614             raise
    615 
    616 
    617     def repair(self, silent=False):
    618         """Attempt to repair servo host.
    619 
    620         @param silent   If true, suppress logging in `status.log`.
    621         """
    622         try:
    623             self._repair_strategy.repair(self, silent)
    624         except:
    625             self.disconnect_servo()
    626             raise
    627 
    628 
    629     def has_power(self):
    630         """Return whether or not the servo host is powered by PoE."""
    631         # TODO(fdeng): See crbug.com/302791
    632         # For now, assume all servo hosts in the lab have power.
    633         return self.is_in_lab()
    634 
    635 
    636     def power_cycle(self):
    637         """Cycle power to this host via PoE if it is a lab device.
    638 
    639         @raises AutoservRepairError if it fails to power cycle the
    640                 servo host.
    641 
    642         """
    643         if self.has_power():
    644             try:
    645                 rpm_client.set_power(self.hostname, 'CYCLE')
    646             except (socket.error, xmlrpclib.Error,
    647                     httplib.BadStatusLine,
    648                     rpm_client.RemotePowerException) as e:
    649                 raise hosts.AutoservRepairError(
    650                         'Power cycling %s failed: %s' % (self.hostname, e))
    651         else:
    652             logging.info('Skipping power cycling, not a lab device.')
    653 
    654 
    655     def get_servo(self):
    656         """Get the cached servo.Servo object.
    657 
    658         @return: a servo.Servo object.
    659         """
    660         return self._servo
    661 
    662 
    663 def make_servo_hostname(dut_hostname):
    664     """Given a DUT's hostname, return the hostname of its servo.
    665 
    666     @param dut_hostname: hostname of a DUT.
    667 
    668     @return hostname of the DUT's servo.
    669 
    670     """
    671     host_parts = dut_hostname.split('.')
    672     host_parts[0] = host_parts[0] + '-servo'
    673     return '.'.join(host_parts)
    674 
    675 
    676 def servo_host_is_up(servo_hostname):
    677     """Given a servo host name, return if it's up or not.
    678 
    679     @param servo_hostname: hostname of the servo host.
    680 
    681     @return True if it's up, False otherwise
    682     """
    683     # Technically, this duplicates the SSH ping done early in the servo
    684     # proxy initialization code.  However, this ping ends in a couple
    685     # seconds when if fails, rather than the 60 seconds it takes to decide
    686     # that an SSH ping has timed out.  Specifically, that timeout happens
    687     # when our servo DNS name resolves, but there is no host at that IP.
    688     logging.info('Pinging servo host at %s', servo_hostname)
    689     ping_config = ping_runner.PingConfig(
    690             servo_hostname, count=3,
    691             ignore_result=True, ignore_status=True)
    692     return ping_runner.PingRunner().ping(ping_config).received > 0
    693 
    694 
    695 def _map_afe_board_to_servo_board(afe_board):
    696     """Map a board we get from the AFE to a servo appropriate value.
    697 
    698     Many boards are identical to other boards for servo's purposes.
    699     This function makes that mapping.
    700 
    701     @param afe_board string board name received from AFE.
    702     @return board we expect servo to have.
    703 
    704     """
    705     KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
    706     BOARD_MAP = {'gizmo': 'panther'}
    707     mapped_board = afe_board
    708     if afe_board in BOARD_MAP:
    709         mapped_board = BOARD_MAP[afe_board]
    710     else:
    711         for suffix in KNOWN_SUFFIXES:
    712             if afe_board.endswith(suffix):
    713                 mapped_board = afe_board[0:-len(suffix)]
    714                 break
    715     if mapped_board != afe_board:
    716         logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
    717     return mapped_board
    718 
    719 
    720 def _get_standard_servo_args(dut_host):
    721     """Return servo data associated with a given DUT.
    722 
    723     This checks for the presence of servo host and port attached to the
    724     given `dut_host`.  This data should be stored in the
    725     `_afe_host.attributes` field in the provided `dut_host` parameter.
    726 
    727     @param dut_host   Instance of `Host` on which to find the servo
    728                       attributes.
    729     @return A tuple of `servo_args` dict with host and an option port,
    730             plus an `is_in_lab` flag indicating whether this in the CrOS
    731             test lab, or some different environment.
    732     """
    733     servo_args = None
    734     is_in_lab = False
    735     is_ssp_moblab = False
    736     if utils.is_in_container():
    737         is_moblab = _CONFIG.get_config_value(
    738                 'SSP', 'is_moblab', type=bool, default=False)
    739         is_ssp_moblab = is_moblab
    740     else:
    741         is_moblab = utils.is_moblab()
    742     attrs = dut_host._afe_host.attributes
    743     if attrs and SERVO_HOST_ATTR in attrs:
    744         servo_host = attrs[SERVO_HOST_ATTR]
    745         if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
    746             servo_host = _CONFIG.get_config_value(
    747                     'SSP', 'host_container_ip', type=str, default=None)
    748         servo_args = {SERVO_HOST_ATTR: servo_host}
    749         if SERVO_PORT_ATTR in attrs:
    750             try:
    751                 servo_port = attrs[SERVO_PORT_ATTR]
    752                 servo_args[SERVO_PORT_ATTR] = int(servo_port)
    753             except ValueError:
    754                 logging.error('servo port is not an int: %s', servo_port)
    755                 # Let's set the servo args to None since we're not creating
    756                 # the ServoHost object with the proper port now.
    757                 servo_args = None
    758         if SERVO_SERIAL_ATTR in attrs:
    759             servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
    760         is_in_lab = (not is_moblab
    761                      and utils.host_is_in_lab_zone(servo_host))
    762 
    763     # TODO(jrbarnette):  This test to use the default lab servo hostname
    764     # is a legacy that we need only until every host in the DB has
    765     # proper attributes.
    766     elif (not is_moblab and
    767             not dnsname_mangler.is_ip_address(dut_host.hostname)):
    768         servo_host = make_servo_hostname(dut_host.hostname)
    769         is_in_lab = utils.host_is_in_lab_zone(servo_host)
    770         if is_in_lab:
    771             servo_args = {SERVO_HOST_ATTR: servo_host}
    772     if servo_args is not None:
    773         info = dut_host.host_info_store.get()
    774         if info.board:
    775             servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
    776                     info.board)
    777     return servo_args, is_in_lab
    778 
    779 
    780 def create_servo_host(dut, servo_args, try_lab_servo=False,
    781                       try_servo_repair=False):
    782     """Create a ServoHost object for a given DUT, if appropriate.
    783 
    784     This function attempts to create and verify or repair a `ServoHost`
    785     object for a servo connected to the given `dut`, subject to various
    786     constraints imposed by the parameters:
    787       * When the `servo_args` parameter is not `None`, a servo
    788         host must be created, and must be checked with `repair()`.
    789       * Otherwise, if a servo exists in the lab and `try_lab_servo` is
    790         true:
    791           * If `try_servo_repair` is true, then create a servo host and
    792             check it with `repair()`.
    793           * Otherwise, if the servo responds to `ping` then create a
    794             servo host and check it with `verify()`.
    795 
    796     In cases where `servo_args` was not `None`, repair failure
    797     exceptions are passed back to the caller; otherwise, exceptions
    798     are logged and then discarded.  Note that this only happens in cases
    799     where we're called from a test (not special task) control file that
    800     has an explicit dependency on servo.  In that case, we require that
    801     repair not write to `status.log`, so as to avoid polluting test
    802     results.
    803 
    804     TODO(jrbarnette):  The special handling for servo in test control
    805     files is a thorn in my flesh; I dearly hope to see it cut out before
    806     my retirement.
    807 
    808     Parameters for a servo host consist of a host name, port number, and
    809     DUT board, and are determined from one of these sources, in order of
    810     priority:
    811       * Servo attributes from the `dut` parameter take precedence over
    812         all other sources of information.
    813       * If a DNS entry for the servo based on the DUT hostname exists in
    814         the CrOS lab network, that hostname is used with the default
    815         port and the DUT's board.
    816       * If no other options are found, the parameters will be taken
    817         from the `servo_args` dict passed in from the caller.
    818 
    819     @param dut            An instance of `Host` from which to take
    820                           servo parameters (if available).
    821     @param servo_args     A dictionary with servo parameters to use if
    822                           they can't be found from `dut`.  If this
    823                           argument is supplied, unrepaired exceptions
    824                           from `verify()` will be passed back to the
    825                           caller.
    826     @param try_lab_servo  If not true, servo host creation will be
    827                           skipped unless otherwise required by the
    828                           caller.
    829     @param try_servo_repair  If true, check a servo host with
    830                           `repair()` instead of `verify()`.
    831 
    832     @returns: A ServoHost object or None. See comments above.
    833 
    834     """
    835     servo_dependency = servo_args is not None
    836     is_in_lab = False
    837     if dut is not None and (try_lab_servo or servo_dependency):
    838         servo_args_override, is_in_lab = _get_standard_servo_args(dut)
    839         if servo_args_override is not None:
    840             servo_args = servo_args_override
    841     if servo_args is None:
    842         return None
    843     if (not servo_dependency and not try_servo_repair and
    844             not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
    845         return None
    846     newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
    847     # Note that the logic of repair() includes everything done
    848     # by verify().  It's sufficient to call one or the other;
    849     # we don't need both.
    850     if servo_dependency:
    851         newhost.repair(silent=True)
    852     else:
    853         try:
    854             if try_servo_repair:
    855                 newhost.repair()
    856             else:
    857                 newhost.verify()
    858         except Exception:
    859             operation = 'repair' if try_servo_repair else 'verification'
    860             logging.exception('Servo %s failed for %s',
    861                               operation, newhost.hostname)
    862     return newhost
    863