1 #!/usr/bin/python 2 # Copyright 2017 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Utility to check the replication delay of the slave databases. 7 8 The utility checks the value of Seconds_Behind_Master of slave databases, 9 including: 10 Slave databases of AFE database, retrieved from server database. 11 Readonly replicas of TKO database, passed in by option --replicas. 12 """ 13 14 import argparse 15 import logging 16 import os 17 import re 18 19 import common 20 from autotest_lib.client.bin import utils 21 from autotest_lib.client.common_lib import error 22 from autotest_lib.client.common_lib import global_config 23 from autotest_lib.client.common_lib import logging_config 24 from autotest_lib.frontend import setup_django_environment 25 from autotest_lib.site_utils import server_manager_utils 26 27 from chromite.lib import metrics 28 29 30 CONFIG = global_config.global_config 31 32 # SQL command to remove old test results in TKO database. 33 SLAVE_STATUS_CMD = 'show slave status\G' 34 DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)' 35 DELAY_METRICS = 'chromeos/autotest/database/seconds_behind_master' 36 # A large delay to report to metrics indicating the replica is in error. 37 LARGE_DELAY = 1000000 38 39 def check_delay(server, user, password): 40 """Check the delay of a given slave database server. 41 42 @param server: Hostname or IP address of the MySQL server. 43 @param user: User name to log in the MySQL server. 44 @param password: Password to log in the MySQL server. 45 """ 46 try: 47 result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD) 48 search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE) 49 if search: 50 delay = int(search.group(1)) 51 metrics.SecondsDistribution(DELAY_METRICS).add( 52 delay, fields={'server': server}) 53 logging.debug('Seconds_Behind_Master of server %s is %d.', server, 54 delay) 55 else: 56 # The value of Seconds_Behind_Master could be NULL, report a large 57 # number to indicate database error. 58 metrics.SecondsDistribution(DELAY_METRICS).add( 59 LARGE_DELAY, fields={'server': server}) 60 logging.error('Failed to get Seconds_Behind_Master of server %s ' 61 'from slave status:\n %s', server, result) 62 except error.CmdError: 63 logging.exception('Failed to get slave status of server %s.', server) 64 65 66 def parse_options(): 67 """Parse command line inputs. 68 69 @return: Options to run the script. 70 """ 71 parser = argparse.ArgumentParser() 72 parser.add_argument('-r', '--replicas', nargs='+', 73 help='IP addresses of readonly replicas of TKO.') 74 parser.add_argument('-l', '--logfile', type=str, 75 default=None, 76 help='Path to the log file to save logs.') 77 return parser.parse_args() 78 79 80 def main(): 81 """Main script.""" 82 options = parse_options() 83 log_config = logging_config.LoggingConfig() 84 if options.logfile: 85 log_config.add_file_handler( 86 file_path=os.path.abspath(options.logfile), level=logging.DEBUG) 87 88 db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user') 89 db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password') 90 91 global_db_user = CONFIG.get_config_value( 92 'AUTOTEST_WEB', 'global_db_user', default=db_user) 93 global_db_password = CONFIG.get_config_value( 94 'AUTOTEST_WEB', 'global_db_password', default=db_password) 95 96 logging.info('Start checking Seconds_Behind_Master of slave databases') 97 98 for replica in options.replicas: 99 check_delay(replica, global_db_user, global_db_password) 100 101 slaves = server_manager_utils.get_servers( 102 role='database_slave', status='primary') 103 for slave in slaves: 104 check_delay(slave.hostname, db_user, db_password) 105 106 logging.info('Finished checking.') 107 108 109 if __name__ == '__main__': 110 main() 111