Home | History | Annotate | Download | only in contrib
      1 #!/usr/bin/python
      2 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 import argparse
      7 import datetime
      8 import os
      9 import re
     10 import sys
     11 import logging
     12 
     13 os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
     14 
     15 import common
     16 from django.db import connections, transaction
     17 
     18 
     19 # Format Appears as: [Date] [Time] - [Msg Level] - [Message]
     20 LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
     21 # This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
     22 DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
     23                      '|3[01])$')
     24 SELECT_CMD_FORMAT = """
     25 SELECT %(table)s.%(primary_key)s FROM %(table)s
     26 WHERE %(table)s.%(time_column)s <= "%(date)s"
     27 """
     28 SELECT_JOIN_CMD_FORMAT = """
     29 SELECT %(table)s.%(primary_key)s FROM %(table)s
     30 INNER JOIN %(related_table)s
     31   ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
     32 WHERE %(related_table)s.%(time_column)s <= "%(date)s"
     33 """
     34 SELECT_WITH_INDIRECTION_FORMAT = """
     35 SELECT %(table)s.%(primary_key)s FROM %(table)s
     36 INNER JOIN %(indirection_table)s
     37   ON %(table)s.%(foreign_key)s =
     38      %(indirection_table)s.%(indirection_primary_key)s
     39 INNER JOIN %(related_table)s
     40   ON %(indirection_table)s.%(indirection_foreign_key)s =
     41   %(related_table)s.%(related_primary_key)s
     42 WHERE %(related_table)s.%(time_column)s <= "%(date)s"
     43 """
     44 DELETE_ROWS_FORMAT = """
     45 DELETE FROM %(table)s
     46 WHERE %(table)s.%(primary_key)s IN (%(rows)s)
     47 """
     48 
     49 
     50 AFE_JOB_ID = 'afe_job_id'
     51 JOB_ID = 'job_id'
     52 JOB_IDX = 'job_idx'
     53 TEST_IDX = 'test_idx'
     54 
     55 # CAUTION: Make sure only the 'default' connection is used. Otherwise
     56 # db_cleanup may delete stuff from the global database, which is generally not
     57 # intended.
     58 cursor = connections['default'].cursor()
     59 
     60 STEP_SIZE = None  # Threading this through properly is disgusting.
     61 
     62 class ProgressBar(object):
     63     TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
     64 
     65     def __init__(self, name, amount):
     66         self._name = name
     67         self._amount = amount
     68         self._cur = 0
     69 
     70     def __enter__(self):
     71         return self
     72 
     73     def __exit__(self, a, b, c):
     74         sys.stdout.write('\n')
     75         sys.stdout.flush()
     76 
     77     def update(self, x):
     78         """
     79         Advance the counter by `x`.
     80 
     81         @param x: An integer of how many more elements were processed.
     82         """
     83         self._cur += x
     84 
     85     def show(self):
     86         """
     87         Display the progress bar on the current line.  Repeated invocations
     88         "update" the display.
     89         """
     90         if self._amount == 0:
     91             barlen = 20
     92         else:
     93             barlen = int(20 * self._cur / float(self._amount))
     94         if barlen:
     95             bartext = '=' * (barlen-1) + '>'
     96         else:
     97             bartext = ''
     98         text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
     99         sys.stdout.write('\r')
    100         sys.stdout.write(text)
    101         sys.stdout.flush()
    102 
    103 
    104 def grouper(iterable, n):
    105     """
    106     Group the elements of `iterable` into groups of maximum size `n`.
    107 
    108     @param iterable: An iterable.
    109     @param n: Max size of returned groups.
    110     @returns: Yields iterables of size <= n.
    111 
    112     >>> grouper('ABCDEFG', 3)
    113     [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
    114     """
    115     args = [iter(iterable)] * n
    116     while True:
    117         lst = []
    118         try:
    119             for itr in args:
    120                 lst.append(next(itr))
    121             yield lst
    122         except StopIteration:
    123             if lst:
    124                 yield lst
    125             break
    126 
    127 
    128 def _delete_table_data_before_date(table_to_delete_from, primary_key,
    129                                    related_table, related_primary_key,
    130                                    date, foreign_key=None,
    131                                    time_column="started_time",
    132                                    indirection_table=None,
    133                                    indirection_primary_key=None,
    134                                    indirection_foreign_key=None):
    135     """
    136     We want a delete statement that will only delete from one table while
    137     using a related table to find the rows to delete.
    138 
    139     An example mysql command:
    140     DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
    141     tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
    142     tko_tests.started_time <= '2012-02-01';
    143 
    144     There are also tables that require 2 joins to determine which rows we want
    145     to delete and we determine these rows by joining the table we want to
    146     delete from with an indirection table to the actual jobs table.
    147 
    148     @param table_to_delete_from: Table whose rows we want to delete.
    149     @param related_table: Table with the date information we are selecting by.
    150     @param foreign_key: Foreign key used in table_to_delete_from to reference
    151                         the related table. If None, the primary_key is used.
    152     @param primary_key: Primary key in the related table.
    153     @param date: End date of the information we are trying to delete.
    154     @param time_column: Column that we want to use to compare the date to.
    155     @param indirection_table: Table we use to link the data we are trying to
    156                               delete with the table with the date information.
    157     @param indirection_primary_key: Key we use to connect the indirection table
    158                                     to the table we are trying to delete rows
    159                                     from.
    160     @param indirection_foreign_key: Key we use to connect the indirection table
    161                                     to the table with the date information.
    162     """
    163     if not foreign_key:
    164         foreign_key = primary_key
    165 
    166     if not related_table:
    167         # Deleting from a table directly.
    168         variables = dict(table=table_to_delete_from, primary_key=primary_key,
    169                          time_column=time_column, date=date)
    170         sql = SELECT_CMD_FORMAT % variables
    171     elif not indirection_table:
    172         # Deleting using a single JOIN to get the date information.
    173         variables = dict(primary_key=primary_key, table=table_to_delete_from,
    174                          foreign_key=foreign_key, related_table=related_table,
    175                          related_primary_key=related_primary_key,
    176                          time_column=time_column, date=date)
    177         sql = SELECT_JOIN_CMD_FORMAT % variables
    178     else:
    179         # There are cases where we need to JOIN 3 TABLES to determine the rows
    180         # we want to delete.
    181         variables = dict(primary_key=primary_key, table=table_to_delete_from,
    182                          indirection_table=indirection_table,
    183                          foreign_key=foreign_key,
    184                          indirection_primary_key=indirection_primary_key,
    185                          related_table=related_table,
    186                          related_primary_key=related_primary_key,
    187                          indirection_foreign_key=indirection_foreign_key,
    188                          time_column=time_column, date=date)
    189         sql = SELECT_WITH_INDIRECTION_FORMAT % variables
    190 
    191     logging.debug('SQL: %s', sql)
    192     cursor.execute(sql, [])
    193     rows = [x[0] for x in cursor.fetchall()]
    194     logging.debug(rows)
    195 
    196     if not rows or rows == [None]:
    197         with ProgressBar(table_to_delete_from, 0) as pb:
    198             pb.show()
    199         logging.debug('Noting to delete for %s', table_to_delete_from)
    200         return
    201 
    202     with ProgressBar(table_to_delete_from, len(rows)) as pb:
    203         for row_keys in grouper(rows, STEP_SIZE):
    204             variables['rows'] = ','.join([str(x) for x in row_keys])
    205             sql = DELETE_ROWS_FORMAT % variables
    206             logging.debug('SQL: %s', sql)
    207             cursor.execute(sql, [])
    208             transaction.commit_unless_managed(using='default')
    209             pb.update(len(row_keys))
    210             pb.show()
    211 
    212 
    213 def _subtract_days(date, days_to_subtract):
    214     """
    215     Return a date (string) that is 'days' before 'date'
    216 
    217     @param date: date (string) we are subtracting from.
    218     @param days_to_subtract: days (int) we are subtracting.
    219     """
    220     date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
    221     difference = date_obj - datetime.timedelta(days=days_to_subtract)
    222     return difference.strftime('%Y-%m-%d')
    223 
    224 
    225 def _delete_all_data_before_date(date):
    226     """
    227     Delete all the database data before a given date.
    228 
    229     This function focuses predominately on the data for jobs in tko_jobs.
    230     However not all jobs in afe_jobs are also in tko_jobs.
    231 
    232     Therefore we delete all the afe_job and foreign key relations prior to two
    233     days before date. Then we do the queries using tko_jobs and these
    234     tables to ensure all the related information is gone. Even though we are
    235     repeating deletes on these tables, the second delete will be quick and
    236     completely thorough in ensuring we clean up all the foreign key
    237     dependencies correctly.
    238 
    239     @param date: End date of the information we are trying to delete.
    240     @param step: Rows to delete per SQL query.
    241     """
    242     # First cleanup all afe_job related data (prior to 2 days before date).
    243     # The reason for this is not all afe_jobs may be in tko_jobs.
    244     afe_date = _subtract_days(date, 2)
    245     logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
    246     _delete_table_data_before_date('afe_aborted_host_queue_entries',
    247                                    'queue_entry_id',
    248                                    'afe_jobs', 'id', afe_date,
    249                                    time_column= 'created_on',
    250                                    foreign_key='queue_entry_id',
    251                                    indirection_table='afe_host_queue_entries',
    252                                    indirection_primary_key='id',
    253                                    indirection_foreign_key='job_id')
    254     _delete_table_data_before_date('afe_special_tasks', 'id',
    255                                    'afe_jobs', 'id',
    256                                    afe_date, time_column='created_on',
    257                                    foreign_key='queue_entry_id',
    258                                    indirection_table='afe_host_queue_entries',
    259                                    indirection_primary_key='id',
    260                                    indirection_foreign_key='job_id')
    261     _delete_table_data_before_date('afe_host_queue_entries', 'id',
    262                                    'afe_jobs', 'id',
    263                                    afe_date, time_column='created_on',
    264                                    foreign_key=JOB_ID)
    265     _delete_table_data_before_date('afe_job_keyvals', 'id',
    266                                    'afe_jobs', 'id',
    267                                    afe_date, time_column='created_on',
    268                                    foreign_key=JOB_ID)
    269     _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
    270                                    'afe_jobs', 'id',
    271                                    afe_date, time_column='created_on',
    272                                    foreign_key=JOB_ID)
    273     _delete_table_data_before_date('afe_jobs', 'id',
    274                                    None, None,
    275                                    afe_date, time_column='created_on')
    276     # Special tasks that aren't associated with an HQE
    277     # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
    278     # more than we should, but I doubt anyone will notice or care.
    279     _delete_table_data_before_date('afe_special_tasks', 'id',
    280                                    None, None,
    281                                    afe_date, time_column='time_requested')
    282 
    283     # Now go through and clean up all the rows related to tko_jobs prior to
    284     # date.
    285     logging.info('Cleaning up all data related to tko_jobs prior to %s.',
    286                   date)
    287     _delete_table_data_before_date('tko_test_attributes', 'id',
    288                                    'tko_tests', TEST_IDX,
    289                                    date, foreign_key=TEST_IDX)
    290     _delete_table_data_before_date('tko_test_labels_tests', 'id',
    291                                    'tko_tests', TEST_IDX,
    292                                    date, foreign_key= 'test_id')
    293     _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
    294                                    'tko_tests', TEST_IDX,
    295                                    date)
    296     _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
    297                                    'tko_tests', TEST_IDX,
    298                                    date)
    299     _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
    300                                    'tko_tests', TEST_IDX,
    301                                    date)
    302     _delete_table_data_before_date('tko_job_keyvals', 'id',
    303                                    'tko_jobs', JOB_IDX,
    304                                    date, foreign_key='job_id')
    305     _delete_table_data_before_date('afe_aborted_host_queue_entries',
    306                                    'queue_entry_id',
    307                                    'tko_jobs', AFE_JOB_ID, date,
    308                                    foreign_key='queue_entry_id',
    309                                    indirection_table='afe_host_queue_entries',
    310                                    indirection_primary_key='id',
    311                                    indirection_foreign_key='job_id')
    312     _delete_table_data_before_date('afe_special_tasks', 'id',
    313                                    'tko_jobs', AFE_JOB_ID,
    314                                    date, foreign_key='queue_entry_id',
    315                                    indirection_table='afe_host_queue_entries',
    316                                    indirection_primary_key='id',
    317                                    indirection_foreign_key='job_id')
    318     _delete_table_data_before_date('afe_host_queue_entries', 'id',
    319                                    'tko_jobs', AFE_JOB_ID,
    320                                    date, foreign_key='job_id')
    321     _delete_table_data_before_date('afe_job_keyvals', 'id',
    322                                    'tko_jobs', AFE_JOB_ID,
    323                                    date, foreign_key='job_id')
    324     _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
    325                                    'tko_jobs', AFE_JOB_ID,
    326                                    date, foreign_key='job_id')
    327     _delete_table_data_before_date('afe_jobs', 'id',
    328                                    'tko_jobs', AFE_JOB_ID,
    329                                    date, foreign_key='id')
    330     _delete_table_data_before_date('tko_tests', TEST_IDX,
    331                                    'tko_jobs', JOB_IDX,
    332                                    date, foreign_key=JOB_IDX)
    333     _delete_table_data_before_date('tko_jobs', JOB_IDX,
    334                                    None, None, date)
    335 
    336 
    337 def parse_args():
    338     """Parse command line arguments"""
    339     parser = argparse.ArgumentParser()
    340     parser.add_argument('-v', '--verbose', action='store_true',
    341                         help='Print SQL commands and results')
    342     parser.add_argument('--step', type=int, action='store',
    343                         default=1000,
    344                         help='Number of rows to delete at once')
    345     parser.add_argument('date', help='Keep results newer than')
    346     return parser.parse_args()
    347 
    348 
    349 def main():
    350     """main"""
    351     args = parse_args()
    352 
    353     level = logging.DEBUG if args.verbose else logging.INFO
    354     logging.basicConfig(level=level, format=LOGGING_FORMAT)
    355     logging.info('Calling: %s', sys.argv)
    356 
    357     if not re.match(DATE_FORMAT_REGEX, args.date):
    358         print 'DATE must be in yyyy-mm-dd format!'
    359         return
    360 
    361     global STEP_SIZE
    362     STEP_SIZE = args.step
    363     _delete_all_data_before_date(args.date)
    364 
    365 
    366 if __name__ == '__main__':
    367     main()
    368