Home | History | Annotate | Download | only in gslib
      1 # -*- coding: utf-8 -*-
      2 # Copyright 2014 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #     http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 """Utility functions and class for listing commands such as ls and du."""
     16 
     17 from __future__ import absolute_import
     18 
     19 import fnmatch
     20 
     21 from gslib.exception import CommandException
     22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
     23 from gslib.util import UTF8
     24 from gslib.wildcard_iterator import StorageUrlFromString
     25 
     26 
     27 def PrintNewLine():
     28   """Default function for printing new lines between directories."""
     29   print
     30 
     31 
     32 def PrintDirHeader(bucket_listing_ref):
     33   """Default function for printing headers for prefixes.
     34 
     35   Header is printed prior to listing the contents of the prefix.
     36 
     37   Args:
     38     bucket_listing_ref: BucketListingRef of type PREFIX.
     39   """
     40   print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
     41 
     42 
     43 def PrintBucketHeader(bucket_listing_ref):  # pylint: disable=unused-argument
     44   """Default function for printing headers for buckets.
     45 
     46   Header is printed prior to listing the contents of the bucket.
     47 
     48   Args:
     49     bucket_listing_ref: BucketListingRef of type BUCKET.
     50   """
     51   pass
     52 
     53 
     54 def PrintDir(bucket_listing_ref):
     55   """Default function for printing buckets or prefixes.
     56 
     57   Args:
     58     bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
     59   """
     60   print bucket_listing_ref.url_string.encode(UTF8)
     61 
     62 
     63 # pylint: disable=unused-argument
     64 def PrintDirSummary(num_bytes, bucket_listing_ref):
     65   """Off-by-default function for printing buckets or prefix size summaries.
     66 
     67   Args:
     68     num_bytes: Number of bytes contained in the directory.
     69     bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
     70   """
     71   pass
     72 
     73 
     74 def PrintObject(bucket_listing_ref):
     75   """Default printing function for objects.
     76 
     77   Args:
     78     bucket_listing_ref: BucketListingRef of type OBJECT.
     79 
     80   Returns:
     81     (num_objects, num_bytes).
     82   """
     83   print bucket_listing_ref.url_string.encode(UTF8)
     84   return (1, 0)
     85 
     86 
     87 class LsHelper(object):
     88   """Helper class for ls and du."""
     89 
     90   def __init__(self, iterator_func, logger,
     91                print_object_func=PrintObject,
     92                print_dir_func=PrintDir,
     93                print_dir_header_func=PrintDirHeader,
     94                print_bucket_header_func=PrintBucketHeader,
     95                print_dir_summary_func=PrintDirSummary,
     96                print_newline_func=PrintNewLine,
     97                all_versions=False, should_recurse=False,
     98                exclude_patterns=None, fields=('name',)):
     99     """Initializes the helper class to prepare for listing.
    100 
    101     Args:
    102       iterator_func: Function for instantiating iterator.
    103                      Inputs-
    104                        url_string- Url string to iterate on. May include
    105                                    wildcards.
    106                        all_versions=False- If true, iterate over all object
    107                                            versions.
    108       logger: Logger for outputting warnings / errors.
    109       print_object_func: Function for printing objects.
    110       print_dir_func:    Function for printing buckets/prefixes.
    111       print_dir_header_func: Function for printing header line for buckets
    112                              or prefixes.
    113       print_bucket_header_func: Function for printing header line for buckets
    114                                 or prefixes.
    115       print_dir_summary_func: Function for printing size summaries about
    116                               buckets/prefixes.
    117       print_newline_func: Function for printing new lines between dirs.
    118       all_versions:      If true, list all object versions.
    119       should_recurse:    If true, recursively listing buckets/prefixes.
    120       exclude_patterns:  Patterns to exclude when listing.
    121       fields:            Fields to request from bucket listings; this should
    122                          include all fields that need to be populated in
    123                          objects so they can be listed. Can be set to None
    124                          to retrieve all object fields. Defaults to short
    125                          listing fields.
    126     """
    127     self._iterator_func = iterator_func
    128     self.logger = logger
    129     self._print_object_func = print_object_func
    130     self._print_dir_func = print_dir_func
    131     self._print_dir_header_func = print_dir_header_func
    132     self._print_bucket_header_func = print_bucket_header_func
    133     self._print_dir_summary_func = print_dir_summary_func
    134     self._print_newline_func = print_newline_func
    135     self.all_versions = all_versions
    136     self.should_recurse = should_recurse
    137     self.exclude_patterns = exclude_patterns
    138     self.bucket_listing_fields = fields
    139 
    140   def ExpandUrlAndPrint(self, url):
    141     """Iterates over the given URL and calls print functions.
    142 
    143     Args:
    144       url: StorageUrl to iterate over.
    145 
    146     Returns:
    147       (num_objects, num_bytes) total number of objects and bytes iterated.
    148     """
    149     num_objects = 0
    150     num_dirs = 0
    151     num_bytes = 0
    152     print_newline = False
    153 
    154     if url.IsBucket() or self.should_recurse:
    155       # IsBucket() implies a top-level listing.
    156       if url.IsBucket():
    157         self._print_bucket_header_func(url)
    158       return self._RecurseExpandUrlAndPrint(url.url_string,
    159                                             print_initial_newline=False)
    160     else:
    161       # User provided a prefix or object URL, but it's impossible to tell
    162       # which until we do a listing and see what matches.
    163       top_level_iterator = PluralityCheckableIterator(self._iterator_func(
    164           url.CreatePrefixUrl(wildcard_suffix=None),
    165           all_versions=self.all_versions).IterAll(
    166               expand_top_level_buckets=True,
    167               bucket_listing_fields=self.bucket_listing_fields))
    168       plurality = top_level_iterator.HasPlurality()
    169 
    170       for blr in top_level_iterator:
    171         if self._MatchesExcludedPattern(blr):
    172           continue
    173         if blr.IsObject():
    174           nd = 0
    175           no, nb = self._print_object_func(blr)
    176           print_newline = True
    177         elif blr.IsPrefix():
    178           if print_newline:
    179             self._print_newline_func()
    180           else:
    181             print_newline = True
    182           if plurality:
    183             self._print_dir_header_func(blr)
    184           expansion_url_str = StorageUrlFromString(
    185               blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
    186           nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
    187           self._print_dir_summary_func(nb, blr)
    188         else:
    189           # We handle all buckets at the top level, so this should never happen.
    190           raise CommandException(
    191               'Sub-level iterator returned a CsBucketListingRef of type Bucket')
    192         num_objects += no
    193         num_dirs += nd
    194         num_bytes += nb
    195       return num_dirs, num_objects, num_bytes
    196 
    197   def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
    198     """Iterates over the given URL string and calls print functions.
    199 
    200     Args:
    201       url_str: String describing StorageUrl to iterate over.
    202                Must be of depth one or higher.
    203       print_initial_newline: If true, print a newline before recursively
    204                              expanded prefixes.
    205 
    206     Returns:
    207       (num_objects, num_bytes) total number of objects and bytes iterated.
    208     """
    209     num_objects = 0
    210     num_dirs = 0
    211     num_bytes = 0
    212     for blr in self._iterator_func(
    213         '%s' % url_str, all_versions=self.all_versions).IterAll(
    214             expand_top_level_buckets=True,
    215             bucket_listing_fields=self.bucket_listing_fields):
    216       if self._MatchesExcludedPattern(blr):
    217         continue
    218 
    219       if blr.IsObject():
    220         nd = 0
    221         no, nb = self._print_object_func(blr)
    222       elif blr.IsPrefix():
    223         if self.should_recurse:
    224           if print_initial_newline:
    225             self._print_newline_func()
    226           else:
    227             print_initial_newline = True
    228           self._print_dir_header_func(blr)
    229           expansion_url_str = StorageUrlFromString(
    230               blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
    231 
    232           nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
    233           self._print_dir_summary_func(nb, blr)
    234         else:
    235           nd, no, nb = 1, 0, 0
    236           self._print_dir_func(blr)
    237       else:
    238         # We handle all buckets at the top level, so this should never happen.
    239         raise CommandException(
    240             'Sub-level iterator returned a bucketListingRef of type Bucket')
    241       num_dirs += nd
    242       num_objects += no
    243       num_bytes += nb
    244 
    245     return num_dirs, num_objects, num_bytes
    246 
    247   def _MatchesExcludedPattern(self, blr):
    248     """Checks bucket listing reference against patterns to exclude.
    249 
    250     Args:
    251       blr: BucketListingRef to check.
    252 
    253     Returns:
    254       True if reference matches a pattern and should be excluded.
    255     """
    256     if self.exclude_patterns:
    257       tomatch = blr.url_string
    258       for pattern in self.exclude_patterns:
    259         if fnmatch.fnmatch(tomatch, pattern):
    260           return True
    261     return False
    262