Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 # Copyright 2016 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Kill slow queries in local autotest database."""
      7 
      8 import logging
      9 import optparse
     10 import sys
     11 import time
     12 
     13 import common
     14 from autotest_lib.client.common_lib import global_config
     15 from autotest_lib.site_utils import gmail_lib
     16 from autotest_lib.client.common_lib import utils
     17 from autotest_lib.site_utils.stats import mysql_stats
     18 
     19 try:
     20     from chromite.lib import metrics
     21     from chromite.lib import ts_mon_config
     22 except ImportError:
     23     metrics = utils.metrics_mock
     24     ts_mon_config = utils.metrics_mock
     25 
     26 AT_DIR='/usr/local/autotest'
     27 DEFAULT_USER = global_config.global_config.get_config_value(
     28         'CROS', 'db_backup_user', type=str, default='')
     29 DEFAULT_PASSWD = global_config.global_config.get_config_value(
     30         'CROS', 'db_backup_password', type=str, default='')
     31 DEFAULT_MAIL = global_config.global_config.get_config_value(
     32         'SCHEDULER', 'notify_email', type=str, default='')
     33 
     34 
     35 def parse_options():
     36     """Parse the command line arguments."""
     37     usage = 'usage: %prog [options]'
     38     parser = optparse.OptionParser(usage=usage)
     39     parser.add_option('-u', '--user', default=DEFAULT_USER,
     40                       help='User to login to the Autotest DB. Default is the '
     41                            'one defined in config file.')
     42     parser.add_option('-p', '--password', default=DEFAULT_PASSWD,
     43                       help='Password to login to the Autotest DB. Default is '
     44                            'the one defined in config file.')
     45     parser.add_option('-t', '--timeout', type=int, default=300,
     46                       help='Timeout boundry of the slow database query. '
     47                            'Default is 300s')
     48     parser.add_option('-m', '--mail', default=DEFAULT_MAIL,
     49                       help='Mail address to send the summary to. Default is '
     50                            'ChromeOS infra Deputy')
     51     options, args = parser.parse_args()
     52     return parser, options, args
     53 
     54 
     55 def verify_options_and_args(options, args):
     56     """Verify the validity of options and args.
     57 
     58     @param options: The parsed options to verify.
     59     @param args: The parsed args to verify.
     60 
     61     @returns: True if verification passes, False otherwise.
     62     """
     63     if args:
     64         logging.error('Unknown arguments: ' + str(args))
     65         return False
     66 
     67     if not (options.user and options.password):
     68         logging.error('Failed to get the default user of password for Autotest'
     69                       ' DB. Please specify them through the command line.')
     70         return False
     71     return True
     72 
     73 
     74 def format_the_output(slow_queries):
     75     """Convert a list of slow queries into a readable string format.
     76 
     77     e.g. [(a, b, c...)]  -->
     78          "Id: a
     79           Host: b
     80           User: c
     81           ...
     82          "
     83     @param slow_queries: A list of tuples, one tuple contains all the info about
     84                          one single slow query.
     85 
     86     @returns: one clean string representation of all the slow queries.
     87     """
     88     query_str_list = [('Id: %s\nUser: %s\nHost: %s\ndb: %s\nCommand: %s\n'
     89                        'Time: %s\nState: %s\nInfo: %s\n') %
     90                       q for q in slow_queries]
     91     return '\n'.join(query_str_list)
     92 
     93 
     94 def kill_slow_queries(user, password, timeout):
     95     """Kill the slow database queries running beyond the timeout limit.
     96 
     97     @param user: User to login to the Autotest DB.
     98     @param password: Password to login to the Autotest DB.
     99     @param timeout: Timeout limit to kill the slow queries.
    100 
    101     @returns: a tuple, first element is the string representation of all the
    102               killed slow queries, second element is the total number of them.
    103     """
    104     cursor = mysql_stats.RetryingConnection('localhost', user, password)
    105     cursor.Connect()
    106 
    107     # Get the processlist.
    108     cursor.Execute('SHOW FULL PROCESSLIST')
    109     processlist = cursor.Fetchall()
    110     # Filter out the slow queries and kill them.
    111     slow_queries = [p for p in processlist if p[4]=='Query' and p[5]>=timeout]
    112     queries_str = ''
    113     num_killed_queries = 0
    114     if slow_queries:
    115         queries_str = format_the_output(slow_queries)
    116         queries_ids = [q[0] for q in slow_queries]
    117         logging.info('Start killing following slow queries\n%s', queries_str)
    118         for query_id in queries_ids:
    119             logging.info('Killing %s...', query_id)
    120             cursor.Execute('KILL %d' % query_id)
    121             logging.info('Done!')
    122             num_killed_queries += 1
    123     else:
    124         logging.info('No slow queries over %ds!', timeout)
    125     return (queries_str, num_killed_queries)
    126 
    127 
    128 def main():
    129     """Main entry."""
    130     # Clear all loggers to make sure the following basicConfig take effect.
    131     logging.shutdown()
    132     reload(logging)
    133     logging.basicConfig(format='%(asctime)s %(message)s',
    134                         datefmt='%m/%d/%Y %H:%M:%S', level=logging.DEBUG)
    135 
    136     with ts_mon_config.SetupTsMonGlobalState(service_name='kill_slow_queries',
    137                                              indirect=True):
    138         count = 0
    139         parser, options, args = parse_options()
    140         if not verify_options_and_args(options, args):
    141             parser.print_help()
    142             return 1
    143         try:
    144             while True:
    145                 result_log_strs, count = kill_slow_queries(
    146                     options.user, options.password, options.timeout)
    147                 if result_log_strs:
    148                     gmail_lib.send_email(
    149                         options.mail,
    150                         'Successfully killed slow autotest db queries',
    151                         'Below are killed queries:\n%s' % result_log_strs)
    152                     m = 'chromeos/autotest/afe_db/killed_slow_queries'
    153                     metrics.Counter(m).increment_by(count)
    154                 time.sleep(options.timeout)
    155         except Exception as e:
    156             m = 'chromeos/autotest/afe_db/failed_to_kill_query'
    157             metrics.Counter(m).increment()
    158             logging.error('Failed to kill slow db queries.\n%s', e)
    159             raise
    160 
    161 
    162 if __name__ == '__main__':
    163     sys.exit(main())
    164 
    165