1 #!/usr/bin/python2 2 # 3 # Copyright 2015 Google INc. All Rights Reserved. 4 """This module controls locking and unlocking of test machines.""" 5 6 from __future__ import print_function 7 8 import argparse 9 import getpass 10 import os 11 import sys 12 import traceback 13 14 from cros_utils import logger 15 from cros_utils import machines 16 17 18 class AFELockException(Exception): 19 """Base class for exceptions in this module.""" 20 21 22 class MachineNotPingable(AFELockException): 23 """Raised when machine does not respond to ping.""" 24 25 26 class MissingHostInfo(AFELockException): 27 """Raised when cannot find info about machine on machine servers.""" 28 29 30 class UpdateNonLocalMachine(AFELockException): 31 """Raised when user requests to add/remove a ChromeOS HW Lab machine..""" 32 33 34 class DuplicateAdd(AFELockException): 35 """Raised when user requests to add a machine that's already on the server.""" 36 37 38 class UpdateServerError(AFELockException): 39 """Raised when attempt to add/remove a machine from local server fails.""" 40 41 42 class LockingError(AFELockException): 43 """Raised when server fails to lock/unlock machine as requested.""" 44 45 46 class DontOwnLock(AFELockException): 47 """Raised when user attmepts to unlock machine locked by someone else.""" 48 # This should not be raised if the user specified '--force' 49 50 51 class NoAFEServer(AFELockException): 52 """Raised when cannot find/access the autotest server.""" 53 54 55 class AFEAccessError(AFELockException): 56 """Raised when cannot get information about lab machine from lab server.""" 57 58 59 class AFELockManager(object): 60 """Class for locking/unlocking machines vie Autotest Front End servers. 61 62 This class contains methods for checking the locked status of machines 63 on both the ChromeOS HW Lab AFE server and a local AFE server. It also 64 has methods for adding/removing machines from the local server, and for 65 changing the lock status of machines on either server. For the ChromeOS 66 HW Lab, it only allows access to the toolchain team lab machines, as 67 defined in toolchain-utils/crosperf/default_remotes. By default it will 68 look for a local server on chrotomation2.mtv.corp.google.com, but an 69 alternative local AFE server can be supplied, if desired. 70 71 !!!IMPORTANT NOTE!!! The AFE server can only be called from the main 72 thread/process of a program. If you launch threads and try to call it 73 from a thread, you will get an error. This has to do with restrictions 74 in the Python virtual machine (and signal handling) and cannot be changed. 75 """ 76 77 LOCAL_SERVER = 'chrotomation2.mtv.corp.google.com' 78 79 def __init__(self, 80 remotes, 81 force_option, 82 chromeos_root, 83 local_server, 84 use_local=True, 85 log=None): 86 """Initializes an AFELockManager object. 87 88 Args: 89 remotes: A list of machine names or ip addresses to be managed. Names 90 and ip addresses should be represented as strings. If the list is 91 empty, the lock manager will get all known machines. 92 force_option: A Boolean indicating whether or not to force an unlock of 93 a machine that was locked by someone else. 94 chromeos_root: The ChromeOS chroot to use for the autotest scripts. 95 local_server: A string containing the name or ip address of the machine 96 that is running an AFE server, which is to be used for managing 97 machines that are not in the ChromeOS HW lab. 98 local: A Boolean indicating whether or not to use/allow a local AFE 99 server to be used (see local_server argument). 100 log: If not None, this is the logger object to be used for writing out 101 informational output messages. It is expected to be an instance of 102 Logger class from cros_utils/logger.py. 103 """ 104 self.chromeos_root = chromeos_root 105 self.user = getpass.getuser() 106 self.logger = log or logger.GetLogger() 107 autotest_path = os.path.join(chromeos_root, 108 'src/third_party/autotest/files') 109 110 sys.path.append(chromeos_root) 111 sys.path.append(autotest_path) 112 sys.path.append(os.path.join(autotest_path, 'server', 'cros')) 113 114 # We have to wait to do these imports until the paths above have 115 # been fixed. 116 # pylint: disable=import-error 117 from client import setup_modules 118 setup_modules.setup( 119 base_path=autotest_path, root_module_name='autotest_lib') 120 121 from dynamic_suite import frontend_wrappers 122 123 self.afe = frontend_wrappers.RetryingAFE( 124 timeout_min=30, delay_sec=10, debug=False, server='cautotest') 125 126 self.local = use_local 127 self.machines = list(set(remotes)) or [] 128 self.toolchain_lab_machines = self.GetAllToolchainLabMachines() 129 if self.machines and self.AllLabMachines(): 130 self.local = False 131 132 if not self.local: 133 self.local_afe = None 134 else: 135 dargs = {} 136 dargs['server'] = local_server or AFELockManager.LOCAL_SERVER 137 # Make sure local server is pingable. 138 error_msg = ('Local autotest server machine %s not responding to ping.' % 139 dargs['server']) 140 self.CheckMachine(dargs['server'], error_msg) 141 self.local_afe = frontend_wrappers.RetryingAFE( 142 timeout_min=30, delay_sec=10, debug=False, **dargs) 143 if not self.machines: 144 self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines() 145 self.force = force_option 146 147 def AllLabMachines(self): 148 """Check to see if all machines being used are HW Lab machines.""" 149 all_lab = True 150 for m in self.machines: 151 if m not in self.toolchain_lab_machines: 152 all_lab = False 153 break 154 return all_lab 155 156 def CheckMachine(self, machine, error_msg): 157 """Verifies that machine is responding to ping. 158 159 Args: 160 machine: String containing the name or ip address of machine to check. 161 error_msg: Message to print if ping fails. 162 163 Raises: 164 MachineNotPingable: If machine is not responding to 'ping' 165 """ 166 if not machines.MachineIsPingable(machine, logging_level='none'): 167 cros_machine = machine + '.cros' 168 if not machines.MachineIsPingable(cros_machine, logging_level='none'): 169 raise MachineNotPingable(error_msg) 170 171 def MachineIsKnown(self, machine): 172 """Checks to see if either AFE server knows the given machine. 173 174 Args: 175 machine: String containing name or ip address of machine to check. 176 177 Returns: 178 Boolean indicating if the machine is in the list of known machines for 179 either AFE server. 180 """ 181 if machine in self.toolchain_lab_machines: 182 return True 183 elif self.local_afe and machine in self.GetAllNonlabMachines(): 184 return True 185 186 return False 187 188 def GetAllToolchainLabMachines(self): 189 """Gets a list of all the toolchain machines in the ChromeOS HW lab. 190 191 Returns: 192 A list of names of the toolchain machines in the ChromeOS HW lab. 193 """ 194 machines_file = os.path.join( 195 os.path.dirname(__file__), 'crosperf', 'default_remotes') 196 machine_list = [] 197 with open(machines_file, 'r') as input_file: 198 lines = input_file.readlines() 199 for line in lines: 200 _, remotes = line.split(':') 201 remotes = remotes.strip() 202 for r in remotes.split(): 203 machine_list.append(r.strip()) 204 return machine_list 205 206 def GetAllNonlabMachines(self): 207 """Gets a list of all known machines on the local AFE server. 208 209 Returns: 210 A list of the names of the machines on the local AFE server. 211 """ 212 non_lab_machines = [] 213 if self.local_afe: 214 non_lab_machines = self.local_afe.get_hostnames() 215 return non_lab_machines 216 217 def PrintStatusHeader(self, is_lab_machine): 218 """Prints the status header lines for machines. 219 220 Args: 221 is_lab_machine: Boolean indicating whether to print HW Lab header or 222 local machine header (different spacing). 223 """ 224 if is_lab_machine: 225 print('\nMachine (Board)\t\t\t\t\tStatus') 226 print('---------------\t\t\t\t\t------\n') 227 else: 228 print('\nMachine (Board)\t\tStatus') 229 print('---------------\t\t------\n') 230 231 def RemoveLocalMachine(self, m): 232 """Removes a machine from the local AFE server. 233 234 Args: 235 m: The machine to remove. 236 237 Raises: 238 MissingHostInfo: Can't find machine to be removed. 239 """ 240 if self.local_afe: 241 host_info = self.local_afe.get_hosts(hostname=m) 242 if host_info: 243 host_info = host_info[0] 244 host_info.delete() 245 else: 246 raise MissingHostInfo('Cannot find/delete machine %s.' % m) 247 248 def AddLocalMachine(self, m): 249 """Adds a machine to the local AFE server. 250 251 Args: 252 m: The machine to be added. 253 """ 254 if self.local_afe: 255 error_msg = 'Machine %s is not responding to ping.' % m 256 self.CheckMachine(m, error_msg) 257 self.local_afe.create_host(m) 258 259 def AddMachinesToLocalServer(self): 260 """Adds one or more machines to the local AFE server. 261 262 Verify that the requested machines are legal to add to the local server, 263 i.e. that they are not ChromeOS HW lab machines, and they are not already 264 on the local server. Call AddLocalMachine for each valid machine. 265 266 Raises: 267 DuplicateAdd: Attempt to add a machine that is already on the server. 268 UpdateNonLocalMachine: Attempt to add a ChromeOS HW lab machine. 269 UpdateServerError: Something went wrong while attempting to add a 270 machine. 271 """ 272 for m in self.machines: 273 for cros_name in [m, m + '.cros']: 274 if cros_name in self.toolchain_lab_machines: 275 raise UpdateNonLocalMachine('Machine %s is already in the ChromeOS HW' 276 'Lab. Cannot add it to local server.' % 277 cros_name) 278 host_info = self.local_afe.get_hosts(hostname=m) 279 if host_info: 280 raise DuplicateAdd('Machine %s is already on the local server.' % m) 281 try: 282 self.AddLocalMachine(m) 283 self.logger.LogOutput('Successfully added %s to local server.' % m) 284 except Exception as e: 285 traceback.print_exc() 286 raise UpdateServerError( 287 'Error occurred while attempting to add %s. %s' % (m, str(e))) 288 289 def RemoveMachinesFromLocalServer(self): 290 """Removes one or more machines from the local AFE server. 291 292 Verify that the requested machines are legal to remove from the local 293 server, i.e. that they are not ChromeOS HW lab machines. Call 294 RemoveLocalMachine for each valid machine. 295 296 Raises: 297 UpdateServerError: Something went wrong while attempting to remove a 298 machine. 299 """ 300 for m in self.machines: 301 for cros_name in [m, m + '.cros']: 302 if cros_name in self.toolchain_lab_machines: 303 raise UpdateNonLocalMachine( 304 'Machine %s is in the ChromeOS HW Lab. ' 305 'This script cannot remove lab machines.' % cros_name) 306 try: 307 self.RemoveLocalMachine(m) 308 self.logger.LogOutput('Successfully removed %s from local server.' % m) 309 except Exception as e: 310 traceback.print_exc() 311 raise UpdateServerError('Error occurred while attempting to remove %s ' 312 '(%s).' % (m, str(e))) 313 314 def ListMachineStates(self, machine_states): 315 """Gets and prints the current status for a list of machines. 316 317 Prints out the current status for all of the machines in the current 318 AFELockManager's list of machines (set when the object is initialized). 319 320 Args: 321 machine_states: A dictionary of the current state of every machine in 322 the current AFELockManager's list of machines. Normally obtained by 323 calling AFELockManager::GetMachineStates. 324 """ 325 local_machines = [] 326 printed_hdr = False 327 for m in machine_states: 328 cros_name = m + '.cros' 329 if (m in self.toolchain_lab_machines or 330 cros_name in self.toolchain_lab_machines): 331 name = m if m in self.toolchain_lab_machines else cros_name 332 if not printed_hdr: 333 self.PrintStatusHeader(True) 334 printed_hdr = True 335 state = machine_states[m] 336 if state['locked']: 337 print('%s (%s)\tlocked by %s since %s' % 338 (name, state['board'], state['locked_by'], state['lock_time'])) 339 else: 340 print('%s (%s)\tunlocked' % (name, state['board'])) 341 else: 342 local_machines.append(m) 343 344 if local_machines: 345 self.PrintStatusHeader(False) 346 for m in local_machines: 347 state = machine_states[m] 348 if state['locked']: 349 print('%s (%s)\tlocked by %s since %s' % 350 (m, state['board'], state['locked_by'], state['lock_time'])) 351 else: 352 print('%s (%s)\tunlocked' % (m, state['board'])) 353 354 def UpdateLockInAFE(self, should_lock_machine, machine): 355 """Calls an AFE server to lock/unlock a machine. 356 357 Args: 358 should_lock_machine: Boolean indicating whether to lock the machine (True) 359 or unlock the machine (False). 360 machine: The machine to update. 361 362 Raises: 363 LockingError: An error occurred while attempting to update the machine 364 state. 365 """ 366 action = 'lock' 367 if not should_lock_machine: 368 action = 'unlock' 369 kwargs = {'locked': should_lock_machine} 370 kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user 371 372 cros_name = machine + '.cros' 373 if cros_name in self.toolchain_lab_machines: 374 machine = cros_name 375 if machine in self.toolchain_lab_machines: 376 m = machine.split('.')[0] 377 afe_server = self.afe 378 else: 379 m = machine 380 afe_server = self.local_afe 381 382 try: 383 afe_server.run('modify_hosts', 384 host_filter_data={'hostname__in': [m]}, 385 update_data=kwargs) 386 except Exception as e: 387 traceback.print_exc() 388 raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e))) 389 390 def UpdateMachines(self, lock_machines): 391 """Sets the locked state of the machines to the requested value. 392 393 The machines updated are the ones in self.machines (specified when the 394 class object was intialized). 395 396 Args: 397 lock_machines: Boolean indicating whether to lock the machines (True) or 398 unlock the machines (False). 399 400 Returns: 401 A list of the machines whose state was successfully updated. 402 """ 403 updated_machines = [] 404 for m in self.machines: 405 self.UpdateLockInAFE(lock_machines, m) 406 # Since we returned from self.UpdateLockInAFE we assume the request 407 # succeeded. 408 if lock_machines: 409 self.logger.LogOutput('Locked machine(s) %s.' % m) 410 else: 411 self.logger.LogOutput('Unlocked machine(s) %s.' % m) 412 updated_machines.append(m) 413 414 return updated_machines 415 416 def _InternalRemoveMachine(self, machine): 417 """Remove machine from internal list of machines. 418 419 Args: 420 machine: Name of machine to be removed from internal list. 421 """ 422 # Check to see if machine is lab machine and if so, make sure it has 423 # ".cros" on the end. 424 cros_machine = machine 425 if machine.find('rack') > 0 and machine.find('row') > 0: 426 if machine.find('.cros') == -1: 427 cros_machine = cros_machine + '.cros' 428 429 self.machines = [m for m in self.machines 430 if m != cros_machine and m != machine] 431 432 def CheckMachineLocks(self, machine_states, cmd): 433 """Check that every machine in requested list is in the proper state. 434 435 If the cmd is 'unlock' verify that every machine is locked by requestor. 436 If the cmd is 'lock' verify that every machine is currently unlocked. 437 438 Args: 439 machine_states: A dictionary of the current state of every machine in 440 the current AFELockManager's list of machines. Normally obtained by 441 calling AFELockManager::GetMachineStates. 442 cmd: The user-requested action for the machines: 'lock' or 'unlock'. 443 444 Raises: 445 DontOwnLock: The lock on a requested machine is owned by someone else. 446 """ 447 for k, state in machine_states.iteritems(): 448 if cmd == 'unlock': 449 if not state['locked']: 450 self.logger.LogWarning('Attempt to unlock already unlocked machine ' 451 '(%s).' % k) 452 self._InternalRemoveMachine(k) 453 454 if state['locked'] and state['locked_by'] != self.user: 455 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone ' 456 'else (%s).' % (k, state['locked_by'])) 457 elif cmd == 'lock': 458 if state['locked']: 459 self.logger.LogWarning('Attempt to lock already locked machine (%s)' % 460 k) 461 self._InternalRemoveMachine(k) 462 463 def HasAFEServer(self, local): 464 """Verifies that the AFELockManager has appropriate AFE server. 465 466 Args: 467 local: Boolean indicating whether we are checking for the local server 468 (True) or for the global server (False). 469 470 Returns: 471 A boolean indicating if the AFELockManager has the requested AFE server. 472 """ 473 if local: 474 return self.local_afe is not None 475 else: 476 return self.afe is not None 477 478 def GetMachineStates(self, cmd=''): 479 """Gets the current state of all the requested machines. 480 481 Gets the current state of all the requested machines, both from the HW lab 482 sever and from the local server. Stores the data in a dictionary keyed 483 by machine name. 484 485 Args: 486 cmd: The command for which we are getting the machine states. This is 487 important because if one of the requested machines is missing we raise 488 an exception, unless the requested command is 'add'. 489 490 Returns: 491 A dictionary of machine states for all the machines in the AFELockManager 492 object. 493 494 Raises: 495 NoAFEServer: Cannot find the HW Lab or local AFE server. 496 AFEAccessError: An error occurred when querying the server about a 497 machine. 498 """ 499 if not self.HasAFEServer(False): 500 raise NoAFEServer('Error: Cannot connect to main AFE server.') 501 502 if self.local and not self.HasAFEServer(True): 503 raise NoAFEServer('Error: Cannot connect to local AFE server.') 504 505 machine_list = {} 506 for m in self.machines: 507 host_info = None 508 cros_name = m + '.cros' 509 if (m in self.toolchain_lab_machines or 510 cros_name in self.toolchain_lab_machines): 511 mod_host = m.split('.')[0] 512 host_info = self.afe.get_hosts(hostname=mod_host) 513 if not host_info: 514 raise AFEAccessError('Unable to get information about %s from main' 515 ' autotest server.' % m) 516 else: 517 host_info = self.local_afe.get_hosts(hostname=m) 518 if not host_info and cmd != 'add': 519 raise AFEAccessError('Unable to get information about %s from ' 520 'local autotest server.' % m) 521 if host_info: 522 host_info = host_info[0] 523 name = host_info.hostname 524 values = {} 525 values['board'] = host_info.platform if host_info.platform else '??' 526 values['locked'] = host_info.locked 527 if host_info.locked: 528 values['locked_by'] = host_info.locked_by 529 values['lock_time'] = host_info.lock_time 530 else: 531 values['locked_by'] = '' 532 values['lock_time'] = '' 533 machine_list[name] = values 534 else: 535 machine_list[m] = {} 536 return machine_list 537 538 539 def Main(argv): 540 """Parse the options, initialize lock manager and dispatch proper method. 541 542 Args: 543 argv: The options with which this script was invoked. 544 545 Returns: 546 0 unless an exception is raised. 547 """ 548 parser = argparse.ArgumentParser() 549 550 parser.add_argument( 551 '--list', 552 dest='cmd', 553 action='store_const', 554 const='status', 555 help='List current status of all known machines.') 556 parser.add_argument( 557 '--lock', 558 dest='cmd', 559 action='store_const', 560 const='lock', 561 help='Lock given machine(s).') 562 parser.add_argument( 563 '--unlock', 564 dest='cmd', 565 action='store_const', 566 const='unlock', 567 help='Unlock given machine(s).') 568 parser.add_argument( 569 '--status', 570 dest='cmd', 571 action='store_const', 572 const='status', 573 help='List current status of given machine(s).') 574 parser.add_argument( 575 '--add_machine', 576 dest='cmd', 577 action='store_const', 578 const='add', 579 help='Add machine to local machine server.') 580 parser.add_argument( 581 '--remove_machine', 582 dest='cmd', 583 action='store_const', 584 const='remove', 585 help='Remove machine from the local machine server.') 586 parser.add_argument( 587 '--nolocal', 588 dest='local', 589 action='store_false', 590 default=True, 591 help='Do not try to use local machine server.') 592 parser.add_argument( 593 '--remote', dest='remote', help='machines on which to operate') 594 parser.add_argument( 595 '--chromeos_root', 596 dest='chromeos_root', 597 required=True, 598 help='ChromeOS root to use for autotest scripts.') 599 parser.add_argument( 600 '--local_server', 601 dest='local_server', 602 default=None, 603 help='Alternate local autotest server to use.') 604 parser.add_argument( 605 '--force', 606 dest='force', 607 action='store_true', 608 default=False, 609 help='Force lock/unlock of machines, even if not' 610 ' current lock owner.') 611 612 options = parser.parse_args(argv) 613 614 if not options.remote and options.cmd != 'status': 615 parser.error('No machines specified for operation.') 616 617 if not os.path.isdir(options.chromeos_root): 618 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root) 619 620 if not options.cmd: 621 parser.error('No operation selected (--list, --status, --lock, --unlock,' 622 ' --add_machine, --remove_machine).') 623 624 machine_list = [] 625 if options.remote: 626 machine_list = options.remote.split() 627 628 lock_manager = AFELockManager(machine_list, options.force, 629 options.chromeos_root, options.local_server, 630 options.local) 631 632 machine_states = lock_manager.GetMachineStates(cmd=options.cmd) 633 cmd = options.cmd 634 635 if cmd == 'status': 636 lock_manager.ListMachineStates(machine_states) 637 638 elif cmd == 'lock': 639 if not lock_manager.force: 640 lock_manager.CheckMachineLocks(machine_states, cmd) 641 lock_manager.UpdateMachines(True) 642 643 elif cmd == 'unlock': 644 if not lock_manager.force: 645 lock_manager.CheckMachineLocks(machine_states, cmd) 646 lock_manager.UpdateMachines(False) 647 648 elif cmd == 'add': 649 lock_manager.AddMachinesToLocalServer() 650 651 elif cmd == 'remove': 652 lock_manager.RemoveMachinesFromLocalServer() 653 654 return 0 655 656 657 if __name__ == '__main__': 658 sys.exit(Main(sys.argv[1:])) 659