Home | History | Annotate | Download | only in telemetry
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 import argparse
      7 import logging
      8 import os
      9 import subprocess
     10 import sys
     11 
     12 from telemetry.core import command_line
     13 from telemetry.page import cloud_storage
     14 
     15 
     16 BUCKET_ALIASES = {
     17     'public': cloud_storage.PUBLIC_BUCKET,
     18     'partner': cloud_storage.PARTNER_BUCKET,
     19     'google-only': cloud_storage.INTERNAL_BUCKET,
     20 }
     21 BUCKETS = {bucket: easy_bucket_name for easy_bucket_name, bucket
     22            in BUCKET_ALIASES.iteritems()}
     23 
     24 
     25 def _GetPaths(path):
     26   root, ext = os.path.splitext(path)
     27   if ext == '.sha1':
     28     file_path = root
     29     hash_path = path
     30   else:
     31     file_path = path
     32     hash_path = path + '.sha1'
     33   return file_path, hash_path
     34 
     35 
     36 def _FindFilesInCloudStorage(files):
     37   """Returns a dict of all files and which buckets they're in."""
     38   # Preprocessing: get the contents of all buckets.
     39   bucket_contents = {}
     40   for bucket in BUCKETS:
     41     try:
     42       bucket_contents[bucket] = cloud_storage.List(bucket)
     43     except (cloud_storage.PermissionError, cloud_storage.CredentialsError):
     44       pass
     45 
     46   # Check if each file is in the bucket contents.
     47   file_buckets = {}
     48   for path in files:
     49     file_path, hash_path = _GetPaths(path)
     50 
     51     if file_path in file_buckets:
     52       # Ignore duplicates, if both data and sha1 file were in the file list.
     53       continue
     54     if not os.path.exists(hash_path):
     55       # Probably got some non-Cloud Storage files in the file list. Ignore.
     56       continue
     57 
     58     file_hash = cloud_storage.ReadHash(hash_path)
     59     file_buckets[file_path] = []
     60     for bucket in BUCKETS:
     61       if bucket in bucket_contents and file_hash in bucket_contents[bucket]:
     62         file_buckets[file_path].append(bucket)
     63 
     64   return file_buckets
     65 
     66 
     67 class Ls(command_line.Command):
     68   """List which bucket each file is in."""
     69 
     70   @classmethod
     71   def AddCommandLineArgs(cls, parser):
     72     parser.add_argument('-r', '--recursive', action='store_true')
     73     parser.add_argument('paths', nargs='+')
     74 
     75   @classmethod
     76   def ProcessCommandLineArgs(cls, parser, args):
     77     for path in args.paths:
     78       if not os.path.exists(path):
     79         parser.error('Path not found: %s' % path)
     80 
     81   def Run(self, args):
     82     def GetFilesInPaths(paths, recursive):
     83       """If path is a dir, yields all files in path, otherwise just yields path.
     84       
     85       If recursive is true, walks subdirectories recursively."""
     86       for path in paths:
     87         if not os.path.isdir(path):
     88           yield path
     89           continue
     90 
     91         if recursive:
     92           for root, _, filenames in os.walk(path):
     93             for filename in filenames:
     94               yield os.path.join(root, filename)
     95         else:
     96           for filename in os.listdir(path):
     97             yield os.path.join(path, filename)
     98 
     99     files = _FindFilesInCloudStorage(GetFilesInPaths(args.paths, args.recursive))
    100 
    101     if not files:
    102       print 'No files in Cloud Storage.'
    103       return
    104 
    105     for file_path, buckets in sorted(files.iteritems()):
    106       if buckets:
    107         buckets = [BUCKETS[bucket] for bucket in buckets]
    108         print '%-11s  %s' % (','.join(buckets), file_path)
    109       else:
    110         print '%-11s  %s' % ('not found', file_path)
    111 
    112 
    113 class Mv(command_line.Command):
    114   """Move files to the given bucket."""
    115 
    116   @classmethod
    117   def AddCommandLineArgs(cls, parser):
    118     parser.add_argument('files', nargs='+')
    119     parser.add_argument('bucket', choices=BUCKET_ALIASES)
    120 
    121   @classmethod
    122   def ProcessCommandLineArgs(cls, parser, args):
    123     args.bucket = BUCKET_ALIASES[args.bucket]
    124 
    125   def Run(self, args):
    126     files = _FindFilesInCloudStorage(args.files)
    127 
    128     for file_path, buckets in sorted(files.iteritems()):
    129       if not buckets:
    130         raise IOError('%s not found in Cloud Storage.' % file_path)
    131 
    132     for file_path, buckets in sorted(files.iteritems()):
    133       if args.bucket in buckets:
    134         buckets.remove(args.bucket)
    135       if not buckets:
    136         logging.info('Skipping %s, no action needed.' % file_path)
    137         continue
    138 
    139       # Move to the target bucket.
    140       file_hash = cloud_storage.ReadHash(file_path + '.sha1')
    141       cloud_storage.Move(buckets.pop(), args.bucket, file_hash)
    142 
    143       # Delete all additional copies.
    144       for bucket in buckets:
    145         cloud_storage.Delete(bucket, file_hash)
    146 
    147 
    148 class Rm(command_line.Command):
    149   """Remove files from Cloud Storage."""
    150 
    151   @classmethod
    152   def AddCommandLineArgs(cls, parser):
    153     parser.add_argument('files', nargs='+')
    154 
    155   def Run(self, args):
    156     files = _FindFilesInCloudStorage(args.files)
    157     for file_path, buckets in sorted(files.iteritems()):
    158       file_hash = cloud_storage.ReadHash(file_path + '.sha1')
    159       for bucket in buckets:
    160         cloud_storage.Delete(bucket, file_hash)
    161 
    162 
    163 class Upload(command_line.Command):
    164   """Upload files to Cloud Storage."""
    165 
    166   @classmethod
    167   def AddCommandLineArgs(cls, parser):
    168     parser.add_argument('files', nargs='+')
    169     parser.add_argument('bucket', choices=BUCKET_ALIASES)
    170 
    171   @classmethod
    172   def ProcessCommandLineArgs(cls, parser, args):
    173     args.bucket = BUCKET_ALIASES[args.bucket]
    174 
    175     for path in args.files:
    176       if not os.path.exists(path):
    177         parser.error('File not found: %s' % path)
    178 
    179   def Run(self, args):
    180     for file_path in args.files:
    181       file_hash = cloud_storage.CalculateHash(file_path)
    182 
    183       # Create or update the hash file.
    184       hash_path = file_path + '.sha1'
    185       with open(hash_path, 'wb') as f:
    186         f.write(file_hash)
    187         f.flush()
    188 
    189       # Add the data to Cloud Storage.
    190       cloud_storage.Insert(args.bucket, file_hash, file_path)
    191 
    192       # Add the hash file to the branch, for convenience. :)
    193       subprocess.call(['git', 'add', hash_path])
    194 
    195 
    196 class CloudStorageCommand(command_line.SubcommandCommand):
    197   commands = (Ls, Mv, Rm, Upload)
    198 
    199 
    200 if __name__ == '__main__':
    201   logging.getLogger().setLevel(logging.INFO)
    202   sys.exit(CloudStorageCommand.main())
    203