1 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 import datetime 6 import logging 7 import webapp2 8 9 from . import cloud_config 10 from .trace_info import TraceInfo 11 import cloudstorage as gcs 12 13 BATCH_SIZE = 100 14 MAX_DAYS = 30 15 DEFAULT_RETRY_PARAMS = gcs.RetryParams(initial_delay=0.2, 16 max_delay=5.0, 17 backoff_factor=2, 18 max_retry_period=15) 19 20 class CorpusCleanupPage(webapp2.RequestHandler): 21 22 def _delete_traces(self): 23 trace_bucket = cloud_config.Get().trace_upload_bucket 24 deleted_traces = 0 25 26 oldest_time = datetime.datetime.now() - datetime.timedelta(days=MAX_DAYS) 27 q = TraceInfo.query(TraceInfo.date < oldest_time) 28 29 for key in q.fetch(BATCH_SIZE, keys_only=True): 30 gcs_path = '/%s/%s.gz' % (trace_bucket, key.id()) 31 try: 32 gcs.delete(gcs_path, retry_params=DEFAULT_RETRY_PARAMS) 33 except gcs.NotFoundError: 34 pass 35 36 key.delete() 37 deleted_traces += 1 38 39 return deleted_traces 40 41 def get(self): 42 self.response.out.write('<html><body>') 43 44 while True: 45 deleted_traces = self._delete_traces() 46 self.response.out.write("<br><div><bold>Traces Cleaned:</bold> %s</div>" 47 % deleted_traces) 48 49 logging.info('Daily cleanup deleted %s traces.' % deleted_traces) 50 51 if deleted_traces < BATCH_SIZE: 52 break 53 54 self.response.out.write('</body></html>') 55 56 57 app = webapp2.WSGIApplication([('/corpus_cleanup', CorpusCleanupPage)]) 58