1 #!/usr/bin/python 2 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 import argparse 7 import datetime 8 import os 9 import re 10 import sys 11 import logging 12 13 os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings' 14 15 import common 16 from django.db import connections, transaction 17 18 19 # Format Appears as: [Date] [Time] - [Msg Level] - [Message] 20 LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s' 21 # This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01) 22 DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]' 23 '|3[01])$') 24 SELECT_CMD_FORMAT = """ 25 SELECT %(table)s.%(primary_key)s FROM %(table)s 26 WHERE %(table)s.%(time_column)s <= "%(date)s" 27 """ 28 SELECT_JOIN_CMD_FORMAT = """ 29 SELECT %(table)s.%(primary_key)s FROM %(table)s 30 INNER JOIN %(related_table)s 31 ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s 32 WHERE %(related_table)s.%(time_column)s <= "%(date)s" 33 """ 34 SELECT_WITH_INDIRECTION_FORMAT = """ 35 SELECT %(table)s.%(primary_key)s FROM %(table)s 36 INNER JOIN %(indirection_table)s 37 ON %(table)s.%(foreign_key)s = 38 %(indirection_table)s.%(indirection_primary_key)s 39 INNER JOIN %(related_table)s 40 ON %(indirection_table)s.%(indirection_foreign_key)s = 41 %(related_table)s.%(related_primary_key)s 42 WHERE %(related_table)s.%(time_column)s <= "%(date)s" 43 """ 44 DELETE_ROWS_FORMAT = """ 45 DELETE FROM %(table)s 46 WHERE %(table)s.%(primary_key)s IN (%(rows)s) 47 """ 48 49 50 AFE_JOB_ID = 'afe_job_id' 51 JOB_ID = 'job_id' 52 JOB_IDX = 'job_idx' 53 TEST_IDX = 'test_idx' 54 55 # CAUTION: Make sure only the 'default' connection is used. Otherwise 56 # db_cleanup may delete stuff from the global database, which is generally not 57 # intended. 58 cursor = connections['default'].cursor() 59 60 STEP_SIZE = None # Threading this through properly is disgusting. 61 62 class ProgressBar(object): 63 TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})" 64 65 def __init__(self, name, amount): 66 self._name = name 67 self._amount = amount 68 self._cur = 0 69 70 def __enter__(self): 71 return self 72 73 def __exit__(self, a, b, c): 74 sys.stdout.write('\n') 75 sys.stdout.flush() 76 77 def update(self, x): 78 """ 79 Advance the counter by `x`. 80 81 @param x: An integer of how many more elements were processed. 82 """ 83 self._cur += x 84 85 def show(self): 86 """ 87 Display the progress bar on the current line. Repeated invocations 88 "update" the display. 89 """ 90 if self._amount == 0: 91 barlen = 20 92 else: 93 barlen = int(20 * self._cur / float(self._amount)) 94 if barlen: 95 bartext = '=' * (barlen-1) + '>' 96 else: 97 bartext = '' 98 text = self.TEXT.format(self._name, bartext, self._cur, self._amount) 99 sys.stdout.write('\r') 100 sys.stdout.write(text) 101 sys.stdout.flush() 102 103 104 def grouper(iterable, n): 105 """ 106 Group the elements of `iterable` into groups of maximum size `n`. 107 108 @param iterable: An iterable. 109 @param n: Max size of returned groups. 110 @returns: Yields iterables of size <= n. 111 112 >>> grouper('ABCDEFG', 3) 113 [['A', 'B', C'], ['D', 'E', 'F'], ['G']] 114 """ 115 args = [iter(iterable)] * n 116 while True: 117 lst = [] 118 try: 119 for itr in args: 120 lst.append(next(itr)) 121 yield lst 122 except StopIteration: 123 if lst: 124 yield lst 125 break 126 127 128 def _delete_table_data_before_date(table_to_delete_from, primary_key, 129 related_table, related_primary_key, 130 date, foreign_key=None, 131 time_column="started_time", 132 indirection_table=None, 133 indirection_primary_key=None, 134 indirection_foreign_key=None): 135 """ 136 We want a delete statement that will only delete from one table while 137 using a related table to find the rows to delete. 138 139 An example mysql command: 140 DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN 141 tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND 142 tko_tests.started_time <= '2012-02-01'; 143 144 There are also tables that require 2 joins to determine which rows we want 145 to delete and we determine these rows by joining the table we want to 146 delete from with an indirection table to the actual jobs table. 147 148 @param table_to_delete_from: Table whose rows we want to delete. 149 @param related_table: Table with the date information we are selecting by. 150 @param foreign_key: Foreign key used in table_to_delete_from to reference 151 the related table. If None, the primary_key is used. 152 @param primary_key: Primary key in the related table. 153 @param date: End date of the information we are trying to delete. 154 @param time_column: Column that we want to use to compare the date to. 155 @param indirection_table: Table we use to link the data we are trying to 156 delete with the table with the date information. 157 @param indirection_primary_key: Key we use to connect the indirection table 158 to the table we are trying to delete rows 159 from. 160 @param indirection_foreign_key: Key we use to connect the indirection table 161 to the table with the date information. 162 """ 163 if not foreign_key: 164 foreign_key = primary_key 165 166 if not related_table: 167 # Deleting from a table directly. 168 variables = dict(table=table_to_delete_from, primary_key=primary_key, 169 time_column=time_column, date=date) 170 sql = SELECT_CMD_FORMAT % variables 171 elif not indirection_table: 172 # Deleting using a single JOIN to get the date information. 173 variables = dict(primary_key=primary_key, table=table_to_delete_from, 174 foreign_key=foreign_key, related_table=related_table, 175 related_primary_key=related_primary_key, 176 time_column=time_column, date=date) 177 sql = SELECT_JOIN_CMD_FORMAT % variables 178 else: 179 # There are cases where we need to JOIN 3 TABLES to determine the rows 180 # we want to delete. 181 variables = dict(primary_key=primary_key, table=table_to_delete_from, 182 indirection_table=indirection_table, 183 foreign_key=foreign_key, 184 indirection_primary_key=indirection_primary_key, 185 related_table=related_table, 186 related_primary_key=related_primary_key, 187 indirection_foreign_key=indirection_foreign_key, 188 time_column=time_column, date=date) 189 sql = SELECT_WITH_INDIRECTION_FORMAT % variables 190 191 logging.debug('SQL: %s', sql) 192 cursor.execute(sql, []) 193 rows = [x[0] for x in cursor.fetchall()] 194 logging.debug(rows) 195 196 if not rows or rows == [None]: 197 with ProgressBar(table_to_delete_from, 0) as pb: 198 pb.show() 199 logging.debug('Noting to delete for %s', table_to_delete_from) 200 return 201 202 with ProgressBar(table_to_delete_from, len(rows)) as pb: 203 for row_keys in grouper(rows, STEP_SIZE): 204 variables['rows'] = ','.join([str(x) for x in row_keys]) 205 sql = DELETE_ROWS_FORMAT % variables 206 logging.debug('SQL: %s', sql) 207 cursor.execute(sql, []) 208 transaction.commit_unless_managed(using='default') 209 pb.update(len(row_keys)) 210 pb.show() 211 212 213 def _subtract_days(date, days_to_subtract): 214 """ 215 Return a date (string) that is 'days' before 'date' 216 217 @param date: date (string) we are subtracting from. 218 @param days_to_subtract: days (int) we are subtracting. 219 """ 220 date_obj = datetime.datetime.strptime(date, '%Y-%m-%d') 221 difference = date_obj - datetime.timedelta(days=days_to_subtract) 222 return difference.strftime('%Y-%m-%d') 223 224 225 def _delete_all_data_before_date(date): 226 """ 227 Delete all the database data before a given date. 228 229 This function focuses predominately on the data for jobs in tko_jobs. 230 However not all jobs in afe_jobs are also in tko_jobs. 231 232 Therefore we delete all the afe_job and foreign key relations prior to two 233 days before date. Then we do the queries using tko_jobs and these 234 tables to ensure all the related information is gone. Even though we are 235 repeating deletes on these tables, the second delete will be quick and 236 completely thorough in ensuring we clean up all the foreign key 237 dependencies correctly. 238 239 @param date: End date of the information we are trying to delete. 240 @param step: Rows to delete per SQL query. 241 """ 242 # First cleanup all afe_job related data (prior to 2 days before date). 243 # The reason for this is not all afe_jobs may be in tko_jobs. 244 afe_date = _subtract_days(date, 2) 245 logging.info('Cleaning up all afe_job data prior to %s.', afe_date) 246 _delete_table_data_before_date('afe_aborted_host_queue_entries', 247 'queue_entry_id', 248 'afe_jobs', 'id', afe_date, 249 time_column= 'created_on', 250 foreign_key='queue_entry_id', 251 indirection_table='afe_host_queue_entries', 252 indirection_primary_key='id', 253 indirection_foreign_key='job_id') 254 _delete_table_data_before_date('afe_special_tasks', 'id', 255 'afe_jobs', 'id', 256 afe_date, time_column='created_on', 257 foreign_key='queue_entry_id', 258 indirection_table='afe_host_queue_entries', 259 indirection_primary_key='id', 260 indirection_foreign_key='job_id') 261 _delete_table_data_before_date('afe_host_queue_entries', 'id', 262 'afe_jobs', 'id', 263 afe_date, time_column='created_on', 264 foreign_key=JOB_ID) 265 _delete_table_data_before_date('afe_job_keyvals', 'id', 266 'afe_jobs', 'id', 267 afe_date, time_column='created_on', 268 foreign_key=JOB_ID) 269 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id', 270 'afe_jobs', 'id', 271 afe_date, time_column='created_on', 272 foreign_key=JOB_ID) 273 _delete_table_data_before_date('afe_jobs', 'id', 274 None, None, 275 afe_date, time_column='created_on') 276 # Special tasks that aren't associated with an HQE 277 # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit 278 # more than we should, but I doubt anyone will notice or care. 279 _delete_table_data_before_date('afe_special_tasks', 'id', 280 None, None, 281 afe_date, time_column='time_requested') 282 283 # Now go through and clean up all the rows related to tko_jobs prior to 284 # date. 285 logging.info('Cleaning up all data related to tko_jobs prior to %s.', 286 date) 287 _delete_table_data_before_date('tko_test_attributes', 'id', 288 'tko_tests', TEST_IDX, 289 date, foreign_key=TEST_IDX) 290 _delete_table_data_before_date('tko_test_labels_tests', 'id', 291 'tko_tests', TEST_IDX, 292 date, foreign_key= 'test_id') 293 _delete_table_data_before_date('tko_iteration_result', TEST_IDX, 294 'tko_tests', TEST_IDX, 295 date) 296 _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX, 297 'tko_tests', TEST_IDX, 298 date) 299 _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX, 300 'tko_tests', TEST_IDX, 301 date) 302 _delete_table_data_before_date('tko_job_keyvals', 'id', 303 'tko_jobs', JOB_IDX, 304 date, foreign_key='job_id') 305 _delete_table_data_before_date('afe_aborted_host_queue_entries', 306 'queue_entry_id', 307 'tko_jobs', AFE_JOB_ID, date, 308 foreign_key='queue_entry_id', 309 indirection_table='afe_host_queue_entries', 310 indirection_primary_key='id', 311 indirection_foreign_key='job_id') 312 _delete_table_data_before_date('afe_special_tasks', 'id', 313 'tko_jobs', AFE_JOB_ID, 314 date, foreign_key='queue_entry_id', 315 indirection_table='afe_host_queue_entries', 316 indirection_primary_key='id', 317 indirection_foreign_key='job_id') 318 _delete_table_data_before_date('afe_host_queue_entries', 'id', 319 'tko_jobs', AFE_JOB_ID, 320 date, foreign_key='job_id') 321 _delete_table_data_before_date('afe_job_keyvals', 'id', 322 'tko_jobs', AFE_JOB_ID, 323 date, foreign_key='job_id') 324 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id', 325 'tko_jobs', AFE_JOB_ID, 326 date, foreign_key='job_id') 327 _delete_table_data_before_date('afe_jobs', 'id', 328 'tko_jobs', AFE_JOB_ID, 329 date, foreign_key='id') 330 _delete_table_data_before_date('tko_tests', TEST_IDX, 331 'tko_jobs', JOB_IDX, 332 date, foreign_key=JOB_IDX) 333 _delete_table_data_before_date('tko_jobs', JOB_IDX, 334 None, None, date) 335 336 337 def parse_args(): 338 """Parse command line arguments""" 339 parser = argparse.ArgumentParser() 340 parser.add_argument('-v', '--verbose', action='store_true', 341 help='Print SQL commands and results') 342 parser.add_argument('--step', type=int, action='store', 343 default=1000, 344 help='Number of rows to delete at once') 345 parser.add_argument('date', help='Keep results newer than') 346 return parser.parse_args() 347 348 349 def main(): 350 """main""" 351 args = parse_args() 352 353 level = logging.DEBUG if args.verbose else logging.INFO 354 logging.basicConfig(level=level, format=LOGGING_FORMAT) 355 logging.info('Calling: %s', sys.argv) 356 357 if not re.match(DATE_FORMAT_REGEX, args.date): 358 print 'DATE must be in yyyy-mm-dd format!' 359 return 360 361 global STEP_SIZE 362 STEP_SIZE = args.step 363 _delete_all_data_before_date(args.date) 364 365 366 if __name__ == '__main__': 367 main() 368