Home | History | Annotate | Download | only in endpoints
      1 # Copyright 2015 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 import datetime
      6 import logging
      7 import webapp2
      8 
      9 from . import cloud_config
     10 from .trace_info import TraceInfo
     11 import cloudstorage as gcs
     12 
     13 BATCH_SIZE = 100
     14 MAX_DAYS = 30
     15 DEFAULT_RETRY_PARAMS = gcs.RetryParams(initial_delay=0.2,
     16                                        max_delay=5.0,
     17                                        backoff_factor=2,
     18                                        max_retry_period=15)
     19 
     20 class CorpusCleanupPage(webapp2.RequestHandler):
     21 
     22   def _delete_traces(self):
     23     trace_bucket = cloud_config.Get().trace_upload_bucket
     24     deleted_traces = 0
     25 
     26     oldest_time = datetime.datetime.now() - datetime.timedelta(days=MAX_DAYS)
     27     q = TraceInfo.query(TraceInfo.date < oldest_time)
     28 
     29     for key in q.fetch(BATCH_SIZE, keys_only=True):
     30       gcs_path = '/%s/%s.gz' % (trace_bucket, key.id())
     31       try:
     32         gcs.delete(gcs_path, retry_params=DEFAULT_RETRY_PARAMS)
     33       except gcs.NotFoundError:
     34         pass
     35 
     36       key.delete()
     37       deleted_traces += 1
     38 
     39     return deleted_traces
     40 
     41   def get(self):
     42     self.response.out.write('<html><body>')
     43 
     44     while True:
     45       deleted_traces = self._delete_traces()
     46       self.response.out.write("<br><div><bold>Traces Cleaned:</bold> %s</div>"
     47           % deleted_traces)
     48 
     49       logging.info('Daily cleanup deleted %s traces.' % deleted_traces)
     50 
     51       if deleted_traces < BATCH_SIZE:
     52         break
     53 
     54     self.response.out.write('</body></html>')
     55 
     56 
     57 app = webapp2.WSGIApplication([('/corpus_cleanup', CorpusCleanupPage)])
     58