1 #!/usr/bin/python 2 3 # Copyright 2016 The Chromium OS Authors. All rights reserved. 4 # Use of this source code is governed by a BSD-style license that can be 5 # found in the LICENSE file. 6 7 """Queries a MySQL database and emits status metrics to Monarch. 8 9 Note: confusingly, 'Innodb_buffer_pool_reads' is actually the cache-misses, not 10 the number of reads to the buffer pool. 'Innodb_buffer_pool_read_requests' 11 corresponds to the number of reads the the buffer pool. 12 """ 13 import logging 14 import sys 15 16 import MySQLdb 17 import time 18 19 import common 20 21 from autotest_lib.client.common_lib import global_config 22 from autotest_lib.client.common_lib.cros import retry 23 24 from chromite.lib import metrics 25 from chromite.lib import ts_mon_config 26 27 AT_DIR='/usr/local/autotest' 28 DEFAULT_USER = global_config.global_config.get_config_value( 29 'CROS', 'db_backup_user', type=str, default='') 30 DEFAULT_PASSWD = global_config.global_config.get_config_value( 31 'CROS', 'db_backup_password', type=str, default='') 32 33 LOOP_INTERVAL = 60 34 35 EMITTED_STATUSES_COUNTERS = [ 36 'bytes_received', 37 'bytes_sent', 38 'connections', 39 'Innodb_buffer_pool_read_requests', 40 'Innodb_buffer_pool_reads', 41 'Innodb_row_lock_waits', 42 'questions', 43 'slow_queries', 44 'threads_created', 45 ] 46 47 EMITTED_STATUS_GAUGES = [ 48 'Innodb_row_lock_time_avg', 49 'Innodb_row_lock_current_waits', 50 'threads_running', 51 'threads_connected', 52 ] 53 54 55 class RetryingConnection(object): 56 """Maintains a db connection and a cursor.""" 57 INITIAL_SLEEP_SECONDS = 20 58 MAX_TIMEOUT_SECONDS = 60 * 60 59 60 def __init__(self, *args, **kwargs): 61 self.args = args 62 self.kwargs = kwargs 63 self.db = None 64 self.cursor = None 65 66 def Connect(self): 67 """Establishes a MySQL connection and creates a cursor.""" 68 self.db = MySQLdb.connect(*self.args, **self.kwargs) 69 self.cursor = self.db.cursor() 70 71 def Reconnect(self): 72 """Attempts to close the connection, then reconnects.""" 73 try: 74 self.cursor.close() 75 self.db.close() 76 except MySQLdb.Error: 77 pass 78 self.Connect() 79 80 def RetryWith(self, func): 81 """Run a function, retrying on OperationalError.""" 82 return retry.retry( 83 MySQLdb.OperationalError, 84 delay_sec=self.INITIAL_SLEEP_SECONDS, 85 timeout_min=self.MAX_TIMEOUT_SECONDS, 86 callback=self.Reconnect 87 )(func)() 88 89 def Execute(self, *args, **kwargs): 90 """Runs .execute on the cursor, reconnecting on failure.""" 91 def _Execute(): 92 return self.cursor.execute(*args, **kwargs) 93 return self.RetryWith(_Execute) 94 95 def Fetchall(self): 96 """Runs .fetchall on the cursor.""" 97 return self.cursor.fetchall() 98 99 100 def GetStatus(connection, status): 101 """Get the status variable from the database, retrying on failure. 102 103 @param connection: MySQLdb cursor to query with. 104 @param status: Name of the status variable. 105 @returns The mysql query result. 106 """ 107 connection.Execute('SHOW GLOBAL STATUS LIKE "%s";' % status) 108 output = connection.Fetchall()[0][1] 109 110 if not output: 111 logging.error('Cannot find any global status like %s', status) 112 113 return int(output) 114 115 116 def QueryAndEmit(baselines, conn): 117 """Queries MySQL for important stats and emits Monarch metrics 118 119 @param baselines: A dict containing the initial values for the cumulative 120 metrics. 121 @param conn: The mysql connection object. 122 """ 123 for status in EMITTED_STATUSES_COUNTERS: 124 metric_name = 'chromeos/autotest/afe_db/%s' % status.lower() 125 delta = GetStatus(conn, status) - baselines[status] 126 metrics.Counter(metric_name).set(delta) 127 128 for status in EMITTED_STATUS_GAUGES: 129 metric_name = 'chromeos/autotest/afe_db/%s' % status.lower() 130 metrics.Gauge(metric_name).set(GetStatus(conn, status)) 131 132 pages_free = GetStatus(conn, 'Innodb_buffer_pool_pages_free') 133 pages_total = GetStatus(conn, 'Innodb_buffer_pool_pages_total') 134 135 metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set( 136 pages_free, fields={'used': False}) 137 138 metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set( 139 pages_total - pages_free, fields={'used': True}) 140 141 142 def main(): 143 """Sets up ts_mon and repeatedly queries MySQL stats""" 144 logging.basicConfig(stream=sys.stdout, level=logging.INFO) 145 conn = RetryingConnection('localhost', DEFAULT_USER, DEFAULT_PASSWD) 146 conn.Connect() 147 148 # TODO(crbug.com/803566) Use indirect=False to mitigate orphan mysql_stats 149 # processes overwhelming shards. 150 with ts_mon_config.SetupTsMonGlobalState('mysql_stats', indirect=False): 151 QueryLoop(conn) 152 153 154 def QueryLoop(conn): 155 """Queries and emits metrics every LOOP_INTERVAL seconds. 156 157 @param conn: The mysql connection object. 158 """ 159 # Get the baselines for cumulative metrics. Otherwise the windowed rate at 160 # the very beginning will be extremely high as it shoots up from 0 to its 161 # current value. 162 baselines = dict((s, GetStatus(conn, s)) 163 for s in EMITTED_STATUSES_COUNTERS) 164 165 while True: 166 now = time.time() 167 QueryAndEmit(baselines, conn) 168 time_spent = time.time() - now 169 sleep_duration = LOOP_INTERVAL - time_spent 170 time.sleep(max(0, sleep_duration)) 171 172 173 if __name__ == '__main__': 174 main() 175