Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 # Copyright 2017 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Utility to check the replication delay of the slave databases.
      7 
      8 The utility checks the value of Seconds_Behind_Master of slave databases,
      9 including:
     10 Slave databases of AFE database, retrieved from server database.
     11 Readonly replicas of TKO database, passed in by option --replicas.
     12 """
     13 
     14 import argparse
     15 import logging
     16 import os
     17 import re
     18 
     19 import common
     20 from autotest_lib.client.bin import utils
     21 from autotest_lib.client.common_lib import error
     22 from autotest_lib.client.common_lib import global_config
     23 from autotest_lib.client.common_lib import logging_config
     24 from autotest_lib.frontend import setup_django_environment
     25 from autotest_lib.server import site_utils
     26 from autotest_lib.site_utils import server_manager_utils
     27 
     28 from chromite.lib import metrics
     29 
     30 
     31 CONFIG = global_config.global_config
     32 
     33 # SQL command to remove old test results in TKO database.
     34 SLAVE_STATUS_CMD = 'show slave status\G'
     35 DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)'
     36 DELAY_METRICS = 'chromeos/autotest/afe_db/slave_delay_seconds'
     37 # A large delay to report to metrics indicating the replica is in error.
     38 LARGE_DELAY = 1000000
     39 
     40 def check_delay(server, user, password):
     41     """Check the delay of a given slave database server.
     42 
     43     @param server: Hostname or IP address of the MySQL server.
     44     @param user: User name to log in the MySQL server.
     45     @param password: Password to log in the MySQL server.
     46     """
     47     try:
     48         result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD)
     49         search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE)
     50         m = metrics.Float(DELAY_METRICS)
     51         f = {'slave': server}
     52         if search:
     53             delay = float(search.group(1))
     54             m.set(delay, fields=f)
     55             logging.debug('Seconds_Behind_Master of server %s is %d.', server,
     56                           delay)
     57         else:
     58             # The value of Seconds_Behind_Master could be NULL, report a large
     59             # number to indicate database error.
     60             m.set(LARGE_DELAY, fields=f)
     61             logging.error('Failed to get Seconds_Behind_Master of server %s '
     62                           'from slave status:\n %s', server, result)
     63     except error.CmdError:
     64         logging.exception('Failed to get slave status of server %s.', server)
     65 
     66 
     67 def parse_options():
     68     """Parse command line inputs.
     69 
     70     @return: Options to run the script.
     71     """
     72     parser = argparse.ArgumentParser()
     73     parser.add_argument('-r', '--replicas', nargs='+',
     74                         default=[],
     75                         help='IP addresses of readonly replicas of TKO.')
     76     parser.add_argument('-l', '--logfile', type=str,
     77                         default=None,
     78                         help='Path to the log file to save logs.')
     79     return parser.parse_args()
     80 
     81 
     82 def main():
     83     """Main script."""
     84     with site_utils.SetupTsMonGlobalState('check_slave_db_delay',indirect=True):
     85         options = parse_options()
     86         log_config = logging_config.LoggingConfig()
     87         if options.logfile:
     88             log_config.add_file_handler(
     89                 file_path=os.path.abspath(options.logfile),
     90                 level=logging.DEBUG
     91             )
     92         db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user')
     93         db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password')
     94 
     95         global_db_user = CONFIG.get_config_value(
     96                     'AUTOTEST_WEB', 'global_db_user', default=db_user)
     97         global_db_password = CONFIG.get_config_value(
     98                     'AUTOTEST_WEB', 'global_db_password', default=db_password)
     99 
    100         logging.info('Start checking Seconds_Behind_Master of slave databases')
    101 
    102         for replica in options.replicas:
    103             check_delay(replica, global_db_user, global_db_password)
    104         if not options.replicas:
    105             logging.warning('No replicas checked.')
    106 
    107         slaves = server_manager_utils.get_servers(
    108                 role='database_slave', status='primary')
    109         for slave in slaves:
    110             check_delay(slave.hostname, db_user, db_password)
    111         if not slaves:
    112             logging.warning('No slaves checked.')
    113 
    114 
    115         logging.info('Finished checking.')
    116 
    117 
    118 if __name__ == '__main__':
    119     main()
    120