1 #!/usr/bin/env python 2 # Copyright 2014 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 import argparse 7 import logging 8 import os 9 import subprocess 10 import sys 11 12 from telemetry.core import command_line 13 from telemetry.page import cloud_storage 14 15 16 BUCKET_ALIASES = { 17 'public': cloud_storage.PUBLIC_BUCKET, 18 'partner': cloud_storage.PARTNER_BUCKET, 19 'google-only': cloud_storage.INTERNAL_BUCKET, 20 } 21 BUCKETS = {bucket: easy_bucket_name for easy_bucket_name, bucket 22 in BUCKET_ALIASES.iteritems()} 23 24 25 def _GetPaths(path): 26 root, ext = os.path.splitext(path) 27 if ext == '.sha1': 28 file_path = root 29 hash_path = path 30 else: 31 file_path = path 32 hash_path = path + '.sha1' 33 return file_path, hash_path 34 35 36 def _FindFilesInCloudStorage(files): 37 """Returns a dict of all files and which buckets they're in.""" 38 # Preprocessing: get the contents of all buckets. 39 bucket_contents = {} 40 for bucket in BUCKETS: 41 try: 42 bucket_contents[bucket] = cloud_storage.List(bucket) 43 except (cloud_storage.PermissionError, cloud_storage.CredentialsError): 44 pass 45 46 # Check if each file is in the bucket contents. 47 file_buckets = {} 48 for path in files: 49 file_path, hash_path = _GetPaths(path) 50 51 if file_path in file_buckets: 52 # Ignore duplicates, if both data and sha1 file were in the file list. 53 continue 54 if not os.path.exists(hash_path): 55 # Probably got some non-Cloud Storage files in the file list. Ignore. 56 continue 57 58 file_hash = cloud_storage.ReadHash(hash_path) 59 file_buckets[file_path] = [] 60 for bucket in BUCKETS: 61 if bucket in bucket_contents and file_hash in bucket_contents[bucket]: 62 file_buckets[file_path].append(bucket) 63 64 return file_buckets 65 66 67 class Ls(command_line.Command): 68 """List which bucket each file is in.""" 69 70 @classmethod 71 def AddCommandLineArgs(cls, parser): 72 parser.add_argument('-r', '--recursive', action='store_true') 73 parser.add_argument('paths', nargs='+') 74 75 @classmethod 76 def ProcessCommandLineArgs(cls, parser, args): 77 for path in args.paths: 78 if not os.path.exists(path): 79 parser.error('Path not found: %s' % path) 80 81 def Run(self, args): 82 def GetFilesInPaths(paths, recursive): 83 """If path is a dir, yields all files in path, otherwise just yields path. 84 85 If recursive is true, walks subdirectories recursively.""" 86 for path in paths: 87 if not os.path.isdir(path): 88 yield path 89 continue 90 91 if recursive: 92 for root, _, filenames in os.walk(path): 93 for filename in filenames: 94 yield os.path.join(root, filename) 95 else: 96 for filename in os.listdir(path): 97 yield os.path.join(path, filename) 98 99 files = _FindFilesInCloudStorage(GetFilesInPaths(args.paths, args.recursive)) 100 101 if not files: 102 print 'No files in Cloud Storage.' 103 return 104 105 for file_path, buckets in sorted(files.iteritems()): 106 if buckets: 107 buckets = [BUCKETS[bucket] for bucket in buckets] 108 print '%-11s %s' % (','.join(buckets), file_path) 109 else: 110 print '%-11s %s' % ('not found', file_path) 111 112 113 class Mv(command_line.Command): 114 """Move files to the given bucket.""" 115 116 @classmethod 117 def AddCommandLineArgs(cls, parser): 118 parser.add_argument('files', nargs='+') 119 parser.add_argument('bucket', choices=BUCKET_ALIASES) 120 121 @classmethod 122 def ProcessCommandLineArgs(cls, parser, args): 123 args.bucket = BUCKET_ALIASES[args.bucket] 124 125 def Run(self, args): 126 files = _FindFilesInCloudStorage(args.files) 127 128 for file_path, buckets in sorted(files.iteritems()): 129 if not buckets: 130 raise IOError('%s not found in Cloud Storage.' % file_path) 131 132 for file_path, buckets in sorted(files.iteritems()): 133 if args.bucket in buckets: 134 buckets.remove(args.bucket) 135 if not buckets: 136 logging.info('Skipping %s, no action needed.' % file_path) 137 continue 138 139 # Move to the target bucket. 140 file_hash = cloud_storage.ReadHash(file_path + '.sha1') 141 cloud_storage.Move(buckets.pop(), args.bucket, file_hash) 142 143 # Delete all additional copies. 144 for bucket in buckets: 145 cloud_storage.Delete(bucket, file_hash) 146 147 148 class Rm(command_line.Command): 149 """Remove files from Cloud Storage.""" 150 151 @classmethod 152 def AddCommandLineArgs(cls, parser): 153 parser.add_argument('files', nargs='+') 154 155 def Run(self, args): 156 files = _FindFilesInCloudStorage(args.files) 157 for file_path, buckets in sorted(files.iteritems()): 158 file_hash = cloud_storage.ReadHash(file_path + '.sha1') 159 for bucket in buckets: 160 cloud_storage.Delete(bucket, file_hash) 161 162 163 class Upload(command_line.Command): 164 """Upload files to Cloud Storage.""" 165 166 @classmethod 167 def AddCommandLineArgs(cls, parser): 168 parser.add_argument('files', nargs='+') 169 parser.add_argument('bucket', choices=BUCKET_ALIASES) 170 171 @classmethod 172 def ProcessCommandLineArgs(cls, parser, args): 173 args.bucket = BUCKET_ALIASES[args.bucket] 174 175 for path in args.files: 176 if not os.path.exists(path): 177 parser.error('File not found: %s' % path) 178 179 def Run(self, args): 180 for file_path in args.files: 181 file_hash = cloud_storage.CalculateHash(file_path) 182 183 # Create or update the hash file. 184 hash_path = file_path + '.sha1' 185 with open(hash_path, 'wb') as f: 186 f.write(file_hash) 187 f.flush() 188 189 # Add the data to Cloud Storage. 190 cloud_storage.Insert(args.bucket, file_hash, file_path) 191 192 # Add the hash file to the branch, for convenience. :) 193 subprocess.call(['git', 'add', hash_path]) 194 195 196 class CloudStorageCommand(command_line.SubcommandCommand): 197 commands = (Ls, Mv, Rm, Upload) 198 199 200 if __name__ == '__main__': 201 logging.getLogger().setLevel(logging.INFO) 202 sys.exit(CloudStorageCommand.main()) 203