1 # -*- coding: utf-8 -*- 2 # Copyright 2014 Google Inc. All Rights Reserved. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 """Utility functions and class for listing commands such as ls and du.""" 16 17 from __future__ import absolute_import 18 19 import fnmatch 20 21 from gslib.exception import CommandException 22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator 23 from gslib.util import UTF8 24 from gslib.wildcard_iterator import StorageUrlFromString 25 26 27 def PrintNewLine(): 28 """Default function for printing new lines between directories.""" 29 print 30 31 32 def PrintDirHeader(bucket_listing_ref): 33 """Default function for printing headers for prefixes. 34 35 Header is printed prior to listing the contents of the prefix. 36 37 Args: 38 bucket_listing_ref: BucketListingRef of type PREFIX. 39 """ 40 print '%s:' % bucket_listing_ref.url_string.encode(UTF8) 41 42 43 def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument 44 """Default function for printing headers for buckets. 45 46 Header is printed prior to listing the contents of the bucket. 47 48 Args: 49 bucket_listing_ref: BucketListingRef of type BUCKET. 50 """ 51 pass 52 53 54 def PrintDir(bucket_listing_ref): 55 """Default function for printing buckets or prefixes. 56 57 Args: 58 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. 59 """ 60 print bucket_listing_ref.url_string.encode(UTF8) 61 62 63 # pylint: disable=unused-argument 64 def PrintDirSummary(num_bytes, bucket_listing_ref): 65 """Off-by-default function for printing buckets or prefix size summaries. 66 67 Args: 68 num_bytes: Number of bytes contained in the directory. 69 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX. 70 """ 71 pass 72 73 74 def PrintObject(bucket_listing_ref): 75 """Default printing function for objects. 76 77 Args: 78 bucket_listing_ref: BucketListingRef of type OBJECT. 79 80 Returns: 81 (num_objects, num_bytes). 82 """ 83 print bucket_listing_ref.url_string.encode(UTF8) 84 return (1, 0) 85 86 87 class LsHelper(object): 88 """Helper class for ls and du.""" 89 90 def __init__(self, iterator_func, logger, 91 print_object_func=PrintObject, 92 print_dir_func=PrintDir, 93 print_dir_header_func=PrintDirHeader, 94 print_bucket_header_func=PrintBucketHeader, 95 print_dir_summary_func=PrintDirSummary, 96 print_newline_func=PrintNewLine, 97 all_versions=False, should_recurse=False, 98 exclude_patterns=None, fields=('name',)): 99 """Initializes the helper class to prepare for listing. 100 101 Args: 102 iterator_func: Function for instantiating iterator. 103 Inputs- 104 url_string- Url string to iterate on. May include 105 wildcards. 106 all_versions=False- If true, iterate over all object 107 versions. 108 logger: Logger for outputting warnings / errors. 109 print_object_func: Function for printing objects. 110 print_dir_func: Function for printing buckets/prefixes. 111 print_dir_header_func: Function for printing header line for buckets 112 or prefixes. 113 print_bucket_header_func: Function for printing header line for buckets 114 or prefixes. 115 print_dir_summary_func: Function for printing size summaries about 116 buckets/prefixes. 117 print_newline_func: Function for printing new lines between dirs. 118 all_versions: If true, list all object versions. 119 should_recurse: If true, recursively listing buckets/prefixes. 120 exclude_patterns: Patterns to exclude when listing. 121 fields: Fields to request from bucket listings; this should 122 include all fields that need to be populated in 123 objects so they can be listed. Can be set to None 124 to retrieve all object fields. Defaults to short 125 listing fields. 126 """ 127 self._iterator_func = iterator_func 128 self.logger = logger 129 self._print_object_func = print_object_func 130 self._print_dir_func = print_dir_func 131 self._print_dir_header_func = print_dir_header_func 132 self._print_bucket_header_func = print_bucket_header_func 133 self._print_dir_summary_func = print_dir_summary_func 134 self._print_newline_func = print_newline_func 135 self.all_versions = all_versions 136 self.should_recurse = should_recurse 137 self.exclude_patterns = exclude_patterns 138 self.bucket_listing_fields = fields 139 140 def ExpandUrlAndPrint(self, url): 141 """Iterates over the given URL and calls print functions. 142 143 Args: 144 url: StorageUrl to iterate over. 145 146 Returns: 147 (num_objects, num_bytes) total number of objects and bytes iterated. 148 """ 149 num_objects = 0 150 num_dirs = 0 151 num_bytes = 0 152 print_newline = False 153 154 if url.IsBucket() or self.should_recurse: 155 # IsBucket() implies a top-level listing. 156 if url.IsBucket(): 157 self._print_bucket_header_func(url) 158 return self._RecurseExpandUrlAndPrint(url.url_string, 159 print_initial_newline=False) 160 else: 161 # User provided a prefix or object URL, but it's impossible to tell 162 # which until we do a listing and see what matches. 163 top_level_iterator = PluralityCheckableIterator(self._iterator_func( 164 url.CreatePrefixUrl(wildcard_suffix=None), 165 all_versions=self.all_versions).IterAll( 166 expand_top_level_buckets=True, 167 bucket_listing_fields=self.bucket_listing_fields)) 168 plurality = top_level_iterator.HasPlurality() 169 170 for blr in top_level_iterator: 171 if self._MatchesExcludedPattern(blr): 172 continue 173 if blr.IsObject(): 174 nd = 0 175 no, nb = self._print_object_func(blr) 176 print_newline = True 177 elif blr.IsPrefix(): 178 if print_newline: 179 self._print_newline_func() 180 else: 181 print_newline = True 182 if plurality: 183 self._print_dir_header_func(blr) 184 expansion_url_str = StorageUrlFromString( 185 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') 186 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) 187 self._print_dir_summary_func(nb, blr) 188 else: 189 # We handle all buckets at the top level, so this should never happen. 190 raise CommandException( 191 'Sub-level iterator returned a CsBucketListingRef of type Bucket') 192 num_objects += no 193 num_dirs += nd 194 num_bytes += nb 195 return num_dirs, num_objects, num_bytes 196 197 def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True): 198 """Iterates over the given URL string and calls print functions. 199 200 Args: 201 url_str: String describing StorageUrl to iterate over. 202 Must be of depth one or higher. 203 print_initial_newline: If true, print a newline before recursively 204 expanded prefixes. 205 206 Returns: 207 (num_objects, num_bytes) total number of objects and bytes iterated. 208 """ 209 num_objects = 0 210 num_dirs = 0 211 num_bytes = 0 212 for blr in self._iterator_func( 213 '%s' % url_str, all_versions=self.all_versions).IterAll( 214 expand_top_level_buckets=True, 215 bucket_listing_fields=self.bucket_listing_fields): 216 if self._MatchesExcludedPattern(blr): 217 continue 218 219 if blr.IsObject(): 220 nd = 0 221 no, nb = self._print_object_func(blr) 222 elif blr.IsPrefix(): 223 if self.should_recurse: 224 if print_initial_newline: 225 self._print_newline_func() 226 else: 227 print_initial_newline = True 228 self._print_dir_header_func(blr) 229 expansion_url_str = StorageUrlFromString( 230 blr.url_string).CreatePrefixUrl(wildcard_suffix='*') 231 232 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) 233 self._print_dir_summary_func(nb, blr) 234 else: 235 nd, no, nb = 1, 0, 0 236 self._print_dir_func(blr) 237 else: 238 # We handle all buckets at the top level, so this should never happen. 239 raise CommandException( 240 'Sub-level iterator returned a bucketListingRef of type Bucket') 241 num_dirs += nd 242 num_objects += no 243 num_bytes += nb 244 245 return num_dirs, num_objects, num_bytes 246 247 def _MatchesExcludedPattern(self, blr): 248 """Checks bucket listing reference against patterns to exclude. 249 250 Args: 251 blr: BucketListingRef to check. 252 253 Returns: 254 True if reference matches a pattern and should be excluded. 255 """ 256 if self.exclude_patterns: 257 tomatch = blr.url_string 258 for pattern in self.exclude_patterns: 259 if fnmatch.fnmatch(tomatch, pattern): 260 return True 261 return False 262