Home | History | Annotate | Download | only in site_utils
      1 #!/usr/bin/python
      2 
      3 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 """Module used to back up the mysql db and upload to Google Storage.
      8 
      9 Usage:
     10   backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
     11 
     12   gs_bucket may refer to a local location by omitting gs:// and giving a local
     13   path if desired for testing. The example usage above creates a dump
     14   of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
     15   cleans up older dumps if there are more than 10 in that directory.
     16 """
     17 
     18 import datetime
     19 from distutils import version
     20 import logging
     21 import optparse
     22 import os
     23 import tempfile
     24 
     25 import common
     26 
     27 from autotest_lib.client.common_lib import error
     28 from autotest_lib.client.common_lib import global_config
     29 from autotest_lib.client.common_lib import logging_manager
     30 from autotest_lib.client.common_lib import utils
     31 from autotest_lib.utils import test_importer
     32 
     33 from chromite.lib import metrics
     34 from chromite.lib import ts_mon_config
     35 
     36 _ATTEMPTS = 3
     37 _GSUTIL_BIN = 'gsutil'
     38 _GS_BUCKET = 'gs://chromeos-lab/backup/database'
     39 # TODO(scottz): Should we need to ignore more than one database a general
     40 # function should be designed that lists tables in the database and properly
     41 # creates the --ignore-table= args to be passed to mysqldump.
     42 # Tables to ignore when dumping all databases.
     43 # performance_schema is an internal database that cannot be dumped
     44 IGNORE_TABLES = ['performance_schema.cond_instances',
     45                  'performance_schema.events_waits_current',
     46                  'performance_schema.cond_instances',
     47                  'performance_schema.events_waits_history',
     48                  'performance_schema.events_waits_history_long',
     49                  'performance_schema.events_waits_summary_by_instance',
     50                  ('performance_schema.'
     51                   'events_waits_summary_by_thread_by_event_name'),
     52                  'performance_schema.events_waits_summary_global_by_event_name',
     53                  'performance_schema.file_instances',
     54                  'performance_schema.file_summary_by_event_name',
     55                  'performance_schema.file_summary_by_instance',
     56                  'performance_schema.mutex_instances',
     57                  'performance_schema.performance_timers',
     58                  'performance_schema.rwlock_instances',
     59                  'performance_schema.setup_consumers',
     60                  'performance_schema.setup_instruments',
     61                  'performance_schema.setup_timers',
     62                  'performance_schema.threads']
     63 
     64 # Conventional mysqldump schedules.
     65 _DAILY = 'daily'
     66 _WEEKLY = 'weekly'
     67 _MONTHLY = 'monthly'
     68 
     69 # Back up server db
     70 _SERVER_DB = 'server_db'
     71 
     72 # Contrary to a conventional mysql dump which takes O(hours) on large databases,
     73 # a host dump is the cheapest form of backup possible. We dump the output of a
     74 # of a mysql command showing all hosts and their pool labels to a text file that
     75 # is backed up to google storage.
     76 _ONLY_HOSTS = 'only_hosts'
     77 _ONLY_SHARDS = 'only_shards'
     78 _SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, _DAILY, _WEEKLY, _MONTHLY]
     79 
     80 class BackupError(Exception):
     81   """Raised for error occurred during backup."""
     82 
     83 
     84 class MySqlArchiver(object):
     85     """Class that archives the Autotest MySQL DB to Google Storage.
     86 
     87     Vars:
     88       gs_dir:  The path to the directory in Google Storage that this dump file
     89                will be uploaded to.
     90       number_to_keep:  The number of dumps we should store.
     91     """
     92     _AUTOTEST_DB = "chromeos_autotest_db"
     93     _SERVER_DB = "chromeos_lab_servers"
     94 
     95 
     96     def __init__(self, scheduled_type, number_to_keep, gs_bucket):
     97         # For conventional scheduled type, we back up all databases.
     98         # self._db is only used when scheduled_type is not
     99         # conventional scheduled type.
    100         self._db = self._get_db_name(scheduled_type)
    101         self._gs_dir = '/'.join([gs_bucket, scheduled_type])
    102         self._number_to_keep = number_to_keep
    103         self._type = scheduled_type
    104 
    105 
    106     @classmethod
    107     def _get_db_name(cls, scheduled_type):
    108         """Get the db name to backup.
    109 
    110         @param scheduled_type: one of _SCHEDULER_TYPES.
    111 
    112         @returns: The name of the db to backup.
    113                   Or None for backup all dbs.
    114         """
    115         if scheduled_type == _SERVER_DB:
    116             return cls._SERVER_DB
    117         elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]:
    118             return cls._AUTOTEST_DB
    119         else:
    120             return None
    121 
    122     @staticmethod
    123     def _get_user_pass():
    124         """Returns a tuple containing the user/pass to use to access the DB."""
    125         user = global_config.global_config.get_config_value(
    126                 'CROS', 'db_backup_user')
    127         password = global_config.global_config.get_config_value(
    128                 'CROS', 'db_backup_password')
    129         return user, password
    130 
    131 
    132     def create_mysql_dump(self):
    133         """Returns the path to a mysql dump of the current autotest DB."""
    134         user, password = self._get_user_pass()
    135         _, filename = tempfile.mkstemp('autotest_db_dump')
    136         logging.debug('Dumping mysql database to file %s', filename)
    137         extra_dump_args = ''
    138         for entry in IGNORE_TABLES:
    139             extra_dump_args += '--ignore-table=%s ' % entry
    140 
    141         if not self._db:
    142             extra_dump_args += "--all-databases"
    143         db_name = self._db or ''
    144         utils.system('set -o pipefail; mysqldump --user=%s '
    145                      '--password=%s %s %s| gzip - > %s' % (
    146                      user, password, extra_dump_args, db_name, filename))
    147         return filename
    148 
    149 
    150     def _create_dump_from_query(self, query):
    151         """Dumps result of a query into a text file.
    152 
    153         @param query: Query to execute.
    154 
    155         @return: The path to a tempfile containing the response of the query.
    156         """
    157         if not self._db:
    158             raise BackupError("_create_dump_from_query requires a specific db.")
    159         parameters = {'db': self._db, 'query': query}
    160         parameters['user'], parameters['password'] = self._get_user_pass()
    161         _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
    162         utils.system(
    163                 'set -o pipefail; mysql -u %(user)s -p%(password)s '
    164                 '%(db)s -e "%(query)s" > %(filename)s' %
    165                 parameters)
    166         return parameters['filename']
    167 
    168 
    169     def create_host_dump(self):
    170         """Dumps hosts and their labels into a text file.
    171 
    172         @return: The path to a tempfile containing a dump of
    173                  hosts and their pool labels.
    174         """
    175         respect_static_labels = global_config.global_config.get_config_value(
    176                 'SKYLAB', 'respect_static_labels', type=bool, default=False)
    177         template = ('SELECT hosts.hostname, labels.name FROM afe_hosts AS '
    178                     'hosts JOIN %(hosts_labels_table)s AS hlt ON '
    179                     'hosts.id = hlt.host_id '
    180                     'JOIN %(labels_table)s AS labels '
    181                     'ON labels.id = hlt.%(column)s '
    182                     'WHERE labels.name LIKE \'%%pool%%\';')
    183         if respect_static_labels:
    184             # HACK: We're not checking the replaced_by_static_label on the
    185             # pool label and just hard coding the fact that pool labels are
    186             # indeed static labels. Expedience.
    187             query = template % {
    188                     'hosts_labels_table': 'afe_static_hosts_labels',
    189                     'labels_table': 'afe_static_labels',
    190                     'column': 'staticlabel_id',
    191             }
    192         else:
    193             query = template % {
    194                     'hosts_labels_table': 'afe_hosts_labels',
    195                     'labels_table': 'afe_labels',
    196                     'column': 'label_id',
    197             }
    198         return self._create_dump_from_query(query)
    199 
    200 
    201     def create_shards_dump(self):
    202         """Dumps shards and their labels into a text file.
    203 
    204         @return: The path to a tempfile containing a dump of
    205                  shards and their labels.
    206         """
    207         query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
    208                  'JOIN afe_shards_labels '
    209                  'ON shards.id = afe_shards_labels.shard_id '
    210                  'JOIN afe_labels AS labels '
    211                  'ON labels.id = afe_shards_labels.label_id;')
    212         return self._create_dump_from_query(query)
    213 
    214 
    215     def dump(self):
    216         """Creates a data dump based on the type of schedule.
    217 
    218         @return: The path to a file containing the dump.
    219         """
    220         if self._type == _ONLY_HOSTS:
    221             return self.create_host_dump()
    222         if self._type == _ONLY_SHARDS:
    223             return self.create_shards_dump()
    224         return self.create_mysql_dump()
    225 
    226 
    227     def _get_name(self):
    228         """Returns the name of the dump as presented to google storage."""
    229         if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
    230             file_type = 'txt'
    231         else:
    232             file_type = 'gz'
    233         return 'autotest-dump.%s.%s' % (
    234                 datetime.datetime.now().strftime('%y.%m.%d'), file_type)
    235 
    236 
    237     @staticmethod
    238     def _retry_run(cmd):
    239         """Run the specified |cmd| string, retrying if necessary.
    240 
    241         Args:
    242           cmd: The command to run.
    243         """
    244         for attempt in range(_ATTEMPTS):
    245             try:
    246                 return utils.system_output(cmd)
    247             except error.CmdError:
    248                 if attempt == _ATTEMPTS - 1:
    249                     raise
    250                 else:
    251                     logging.error('Failed to run %r', cmd)
    252 
    253 
    254     def upload_to_google_storage(self, dump_file):
    255         """Uploads the given |dump_file| to Google Storage.
    256 
    257         @param dump_file: The path to the file containing the dump.
    258         """
    259         cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
    260         input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
    261                           name=self._get_name(), gs_dir=self._gs_dir)
    262         cmd = cmd % input_dict
    263         logging.debug('Uploading mysql dump to google storage')
    264         self._retry_run(cmd)
    265         os.remove(dump_file)
    266 
    267 
    268     def _get_gs_command(self, cmd):
    269         """Returns an array representing the command for rm or ls."""
    270         # Helpful code to allow us to test without gs.
    271         assert cmd in ['rm', 'ls']
    272         gs_bin = _GSUTIL_BIN
    273         if self._gs_dir.startswith('gs://'):
    274             cmd_array = [gs_bin, cmd]
    275         else:
    276             cmd_array = [cmd]
    277 
    278         return cmd_array
    279 
    280 
    281     def _do_ls(self):
    282         """Returns the output of running ls on the gs bucket."""
    283         cmd = self._get_gs_command('ls') + [self._gs_dir]
    284         return self._retry_run(' '.join(cmd))
    285 
    286 
    287     def cleanup(self):
    288         """Cleans up the gs bucket to ensure we don't over archive."""
    289         logging.debug('Cleaning up previously archived dump files.')
    290         listing = self._do_ls()
    291         ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
    292         if len(ordered_listing) < self._number_to_keep:
    293             logging.debug('Cleanup found nothing to do.')
    294             return
    295 
    296         to_remove = ordered_listing[:-self._number_to_keep]
    297         rm_cmd = self._get_gs_command('rm')
    298         for artifact in to_remove:
    299             cmd = ' '.join(rm_cmd + [artifact])
    300             self._retry_run(cmd)
    301 
    302 
    303 def parse_options():
    304     """Parses given options."""
    305     parser = optparse.OptionParser()
    306     parser.add_option('--gs_bucket', default=_GS_BUCKET,
    307                       help='Google storage bucket to store mysql db dumps.')
    308     parser.add_option('--keep', default=10, type=int,
    309                       help='Number of dumps to keep of specified type.')
    310     parser.add_option('--type', default=_DAILY,
    311                       help='The type of mysql dump to store.')
    312     parser.add_option('--verbose', default=False, action='store_true',
    313                       help='Google storage bucket to store mysql db dumps.')
    314     options = parser.parse_args()[0]
    315     if options.type not in _SCHEDULER_TYPES:
    316         parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
    317 
    318     return options
    319 
    320 
    321 def main():
    322     """Runs the program."""
    323     options = parse_options()
    324     logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
    325                                       verbose=options.verbose)
    326     backup_succeeded = False
    327 
    328     with ts_mon_config.SetupTsMonGlobalState(service_name='mysql_db_backup',
    329                                              indirect=True):
    330         with metrics.SecondsTimer(
    331                 'chromeos/autotest/afe_db/backup/durations',
    332                 fields={'type': options.type}):
    333              try:
    334                  logging.debug('Start db backup: %s', options.type)
    335                  archiver = MySqlArchiver(
    336                          options.type, options.keep, options.gs_bucket)
    337                  dump_file = archiver.dump()
    338                  logging.debug('Uploading backup: %s', options.type)
    339                  archiver.upload_to_google_storage(dump_file)
    340                  archiver.cleanup()
    341                  logging.debug('Db backup completed: %s', options.type)
    342                  backup_succeeded = True
    343              finally:
    344                  metrics.Counter(
    345                      'chromeos/autotest/db/db_backup/completed').increment(
    346                          fields={'success': backup_succeeded,
    347                                  'type': options.type})
    348 
    349 
    350 if __name__ == '__main__':
    351     main()
    352