Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 # Copyright 2017 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Utility to check the replication delay of the slave databases.
      7 
      8 The utility checks the value of Seconds_Behind_Master of slave databases,
      9 including:
     10 Slave databases of AFE database, retrieved from server database.
     11 Readonly replicas of TKO database, passed in by option --replicas.
     12 """
     13 
     14 import argparse
     15 import logging
     16 import os
     17 import re
     18 
     19 import common
     20 from autotest_lib.client.bin import utils
     21 from autotest_lib.client.common_lib import error
     22 from autotest_lib.client.common_lib import global_config
     23 from autotest_lib.client.common_lib import logging_config
     24 from autotest_lib.frontend import setup_django_environment
     25 from autotest_lib.site_utils import server_manager_utils
     26 
     27 from chromite.lib import metrics
     28 
     29 
     30 CONFIG = global_config.global_config
     31 
     32 # SQL command to remove old test results in TKO database.
     33 SLAVE_STATUS_CMD = 'show slave status\G'
     34 DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)'
     35 DELAY_METRICS = 'chromeos/autotest/database/seconds_behind_master'
     36 # A large delay to report to metrics indicating the replica is in error.
     37 LARGE_DELAY = 1000000
     38 
     39 def check_delay(server, user, password):
     40     """Check the delay of a given slave database server.
     41 
     42     @param server: Hostname or IP address of the MySQL server.
     43     @param user: User name to log in the MySQL server.
     44     @param password: Password to log in the MySQL server.
     45     """
     46     try:
     47         result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD)
     48         search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE)
     49         if search:
     50             delay = int(search.group(1))
     51             metrics.SecondsDistribution(DELAY_METRICS).add(
     52                     delay, fields={'server': server})
     53             logging.debug('Seconds_Behind_Master of server %s is %d.', server,
     54                           delay)
     55         else:
     56             # The value of Seconds_Behind_Master could be NULL, report a large
     57             # number to indicate database error.
     58             metrics.SecondsDistribution(DELAY_METRICS).add(
     59                     LARGE_DELAY, fields={'server': server})
     60             logging.error('Failed to get Seconds_Behind_Master of server %s '
     61                           'from slave status:\n %s', server, result)
     62     except error.CmdError:
     63         logging.exception('Failed to get slave status of server %s.', server)
     64 
     65 
     66 def parse_options():
     67     """Parse command line inputs.
     68 
     69     @return: Options to run the script.
     70     """
     71     parser = argparse.ArgumentParser()
     72     parser.add_argument('-r', '--replicas', nargs='+',
     73                         help='IP addresses of readonly replicas of TKO.')
     74     parser.add_argument('-l', '--logfile', type=str,
     75                         default=None,
     76                         help='Path to the log file to save logs.')
     77     return parser.parse_args()
     78 
     79 
     80 def main():
     81     """Main script."""
     82     options = parse_options()
     83     log_config = logging_config.LoggingConfig()
     84     if options.logfile:
     85         log_config.add_file_handler(
     86                 file_path=os.path.abspath(options.logfile), level=logging.DEBUG)
     87 
     88     db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user')
     89     db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password')
     90 
     91     global_db_user = CONFIG.get_config_value(
     92                 'AUTOTEST_WEB', 'global_db_user', default=db_user)
     93     global_db_password = CONFIG.get_config_value(
     94                 'AUTOTEST_WEB', 'global_db_password', default=db_password)
     95 
     96     logging.info('Start checking Seconds_Behind_Master of slave databases')
     97 
     98     for replica in options.replicas:
     99         check_delay(replica, global_db_user, global_db_password)
    100 
    101     slaves = server_manager_utils.get_servers(
    102             role='database_slave', status='primary')
    103     for slave in slaves:
    104         check_delay(slave.hostname, db_user, db_password)
    105 
    106     logging.info('Finished checking.')
    107 
    108 
    109 if __name__ == '__main__':
    110     main()
    111