Home | History | Annotate | Download | only in gslib
      1 # -*- coding: utf-8 -*-
      2 # Copyright 2013 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #     http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 """Gsutil API for interacting with cloud storage providers."""
     16 
     17 from __future__ import absolute_import
     18 
     19 
     20 class CloudApi(object):
     21   """Abstract base class for interacting with cloud storage providers.
     22 
     23   Implementations of the gsutil Cloud API are not guaranteed to be thread-safe.
     24   Behavior when calling a gsutil Cloud API instance simultaneously across
     25   threads is undefined and doing so will likely cause errors. Therefore,
     26   a separate instance of the gsutil Cloud API should be instantiated per-thread.
     27   """
     28 
     29   def __init__(self, bucket_storage_uri_class, logger, provider=None,
     30                debug=0, trace_token=None):
     31     """Performs necessary setup for interacting with the cloud storage provider.
     32 
     33     Args:
     34       bucket_storage_uri_class: boto storage_uri class, used by APIs that
     35                                 provide boto translation or mocking.
     36       logger: logging.logger for outputting log messages.
     37       provider: Default provider prefix describing cloud storage provider to
     38                 connect to.
     39       debug: Debug level for the API implementation (0..3).
     40       trace_token: Google internal trace token to pass to the API
     41                    implementation (string).
     42     """
     43     self.bucket_storage_uri_class = bucket_storage_uri_class
     44     self.logger = logger
     45     self.provider = provider
     46     self.debug = debug
     47     self.trace_token = trace_token
     48 
     49   def GetBucket(self, bucket_name, provider=None, fields=None):
     50     """Gets Bucket metadata.
     51 
     52     Args:
     53       bucket_name: Name of the bucket.
     54       provider: Cloud storage provider to connect to.  If not present,
     55                 class-wide default is used.
     56       fields: If present, return only these Bucket metadata fields, for
     57               example, ['logging', 'defaultObjectAcl']
     58 
     59     Raises:
     60       ArgumentException for errors during input validation.
     61       ServiceException for errors interacting with cloud storage providers.
     62 
     63     Returns:
     64       Bucket object.
     65     """
     66     raise NotImplementedError('GetBucket must be overloaded')
     67 
     68   def ListBuckets(self, project_id=None, provider=None, fields=None):
     69     """Lists bucket metadata for the given project.
     70 
     71     Args:
     72       project_id: Project owning the buckets, default from config if None.
     73       provider: Cloud storage provider to connect to.  If not present,
     74                 class-wide default is used.
     75       fields: If present, return only these metadata fields for the listing,
     76               for example:
     77               ['items/logging', 'items/defaultObjectAcl'].
     78               Note that the WildcardIterator class should be used to list
     79               buckets instead of calling this function directly.  It amends
     80               the fields definition from get-like syntax such as
     81               ['logging', 'defaultObjectAcl'] so that the caller does not
     82               need to prepend 'items/' or specify fields necessary for listing
     83               (like nextPageToken).
     84 
     85     Raises:
     86       ArgumentException for errors during input validation.
     87       ServiceException for errors interacting with cloud storage providers.
     88 
     89     Returns:
     90       Iterator over Bucket objects.
     91     """
     92     raise NotImplementedError('ListBuckets must be overloaded')
     93 
     94   def PatchBucket(self, bucket_name, metadata, canned_acl=None,
     95                   canned_def_acl=None, preconditions=None, provider=None,
     96                   fields=None):
     97     """Updates bucket metadata for the bucket with patch semantics.
     98 
     99     Args:
    100       bucket_name: Name of bucket to update.
    101       metadata: Bucket object defining metadata to be updated.
    102       canned_acl: Canned ACL to apply to the bucket.
    103       canned_def_acl: Canned default object ACL to apply to the bucket.
    104       preconditions: Preconditions for the request.
    105       provider: Cloud storage provider to connect to.  If not present,
    106                 class-wide default is used.
    107       fields: If present, return only these Bucket metadata fields.
    108 
    109     Raises:
    110       ArgumentException for errors during input validation.
    111       ServiceException for errors interacting with cloud storage providers.
    112 
    113     Returns:
    114       Bucket object describing new bucket metadata.
    115     """
    116     raise NotImplementedError('PatchBucket must be overloaded')
    117 
    118   def CreateBucket(self, bucket_name, project_id=None, metadata=None,
    119                    provider=None, fields=None):
    120     """Creates a new bucket with the specified metadata.
    121 
    122     Args:
    123       bucket_name: Name of the new bucket.
    124       project_id: Project owner of the new bucket, default from config if None.
    125       metadata: Bucket object defining new bucket metadata.
    126       provider: Cloud storage provider to connect to.  If not present,
    127                 class-wide default is used.
    128       fields: If present, return only these Bucket metadata fields.
    129 
    130     Raises:
    131       ArgumentException for errors during input validation.
    132       ServiceException for errors interacting with cloud storage providers.
    133 
    134     Returns:
    135       Bucket object describing new bucket metadata.
    136     """
    137     raise NotImplementedError('CreateBucket must be overloaded')
    138 
    139   def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
    140     """Deletes a bucket.
    141 
    142     Args:
    143       bucket_name: Name of the bucket to delete.
    144       preconditions: Preconditions for the request.
    145       provider: Cloud storage provider to connect to.  If not present,
    146                 class-wide default is used.
    147 
    148     Raises:
    149       ArgumentException for errors during input validation.
    150       ServiceException for errors interacting with cloud storage providers.
    151 
    152     Returns:
    153       None.
    154     """
    155     raise NotImplementedError('DeleteBucket must be overloaded')
    156 
    157   class CsObjectOrPrefixType(object):
    158     """Enum class for describing CsObjectOrPrefix types."""
    159     OBJECT = 'object'  # Cloud object
    160     PREFIX = 'prefix'  # Cloud bucket subdirectory
    161 
    162   class CsObjectOrPrefix(object):
    163     """Container class for ListObjects results."""
    164 
    165     def __init__(self, data, datatype):
    166       """Stores a ListObjects result.
    167 
    168       Args:
    169         data: Root object, either an apitools Object or a string Prefix.
    170         datatype: CsObjectOrPrefixType of data.
    171       """
    172       self.data = data
    173       self.datatype = datatype
    174 
    175   def ListObjects(self, bucket_name, prefix=None, delimiter=None,
    176                   all_versions=None, provider=None, fields=None):
    177     """Lists objects (with metadata) and prefixes in a bucket.
    178 
    179     Args:
    180       bucket_name: Bucket containing the objects.
    181       prefix: Prefix for directory-like behavior.
    182       delimiter: Delimiter for directory-like behavior.
    183       all_versions: If true, list all object versions.
    184       provider: Cloud storage provider to connect to.  If not present,
    185                 class-wide default is used.
    186       fields: If present, return only these metadata fields for the listing,
    187               for example:
    188               ['items/acl', 'items/updated', 'prefixes'].
    189               Note that the WildcardIterator class should be used to list
    190               objects instead of calling this function directly.  It amends
    191               the fields definition from get-like syntax such as
    192               ['acl', 'updated'] so that the caller does not need to
    193               prepend 'items/' or specify any fields necessary for listing
    194               (such as prefixes or nextPageToken).
    195 
    196     Raises:
    197       ArgumentException for errors during input validation.
    198       ServiceException for errors interacting with cloud storage providers.
    199 
    200     Returns:
    201       Iterator over CsObjectOrPrefix wrapper class.
    202     """
    203     raise NotImplementedError('ListObjects must be overloaded')
    204 
    205   def GetObjectMetadata(self, bucket_name, object_name, generation=None,
    206                         provider=None, fields=None):
    207     """Gets object metadata.
    208 
    209     Args:
    210       bucket_name: Bucket containing the object.
    211       object_name: Object name.
    212       generation: Generation of the object to retrieve.
    213       provider: Cloud storage provider to connect to.  If not present,
    214                 class-wide default is used.
    215       fields: If present, return only these Object metadata fields, for
    216               example, ['acl', 'updated'].
    217 
    218     Raises:
    219       ArgumentException for errors during input validation.
    220       ServiceException for errors interacting with cloud storage providers.
    221 
    222     Returns:
    223       Object object.
    224     """
    225     raise NotImplementedError('GetObjectMetadata must be overloaded')
    226 
    227   def PatchObjectMetadata(self, bucket_name, object_name, metadata,
    228                           canned_acl=None, generation=None, preconditions=None,
    229                           provider=None, fields=None):
    230     """Updates object metadata with patch semantics.
    231 
    232     Args:
    233       bucket_name: Bucket containing the object.
    234       object_name: Object name for object.
    235       metadata: Object object defining metadata to be updated.
    236       canned_acl: Canned ACL to be set on the object.
    237       generation: Generation (or version) of the object to update.
    238       preconditions: Preconditions for the request.
    239       provider: Cloud storage provider to connect to.  If not present,
    240                 class-wide default is used.
    241       fields: If present, return only these Object metadata fields.
    242 
    243     Raises:
    244       ArgumentException for errors during input validation.
    245       ServiceException for errors interacting with cloud storage providers.
    246 
    247     Returns:
    248       Updated object metadata.
    249     """
    250     raise NotImplementedError('PatchObjectMetadata must be overloaded')
    251 
    252   class DownloadStrategy(object):
    253     """Enum class for specifying download strategy."""
    254     ONE_SHOT = 'oneshot'
    255     RESUMABLE = 'resumable'
    256 
    257   def GetObjectMedia(self, bucket_name, object_name, download_stream,
    258                      provider=None, generation=None, object_size=None,
    259                      download_strategy=DownloadStrategy.ONE_SHOT, start_byte=0,
    260                      end_byte=None, progress_callback=None,
    261                      serialization_data=None, digesters=None):
    262     """Gets object data.
    263 
    264     Args:
    265       bucket_name: Bucket containing the object.
    266       object_name: Object name.
    267       download_stream: Stream to send the object data to.
    268       provider: Cloud storage provider to connect to.  If not present,
    269                 class-wide default is used.
    270       generation: Generation of the object to retrieve.
    271       object_size: Total size of the object being downloaded.
    272       download_strategy: Cloud API download strategy to use for download.
    273       start_byte: Starting point for download (for resumable downloads and
    274                   range requests). Can be set to negative to request a range
    275                   of bytes (python equivalent of [:-3])
    276       end_byte: Ending byte number, inclusive, for download (for range
    277                 requests). If None, download the rest of the object.
    278       progress_callback: Optional callback function for progress notifications.
    279                          Receives calls with arguments
    280                          (bytes_transferred, total_size).
    281       serialization_data: Implementation-specific JSON string of a dict
    282                           containing serialization information for the download.
    283       digesters: Dict of {string : digester}, where string is a name of a hash
    284                  algorithm, and digester is a validation digester that supports
    285                  update(bytes) and digest() using that algorithm.
    286                  Implementation can set the digester value to None to indicate
    287                  bytes were not successfully digested on-the-fly.
    288 
    289     Raises:
    290       ArgumentException for errors during input validation.
    291       ServiceException for errors interacting with cloud storage providers.
    292 
    293     Returns:
    294       Content-encoding string if it was detected that the server sent an encoded
    295       object during transfer, None otherwise.
    296     """
    297     raise NotImplementedError('GetObjectMedia must be overloaded')
    298 
    299   def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
    300                    size=None, preconditions=None, progress_callback=None,
    301                    provider=None, fields=None):
    302     """Uploads object data and metadata.
    303 
    304     Args:
    305       upload_stream: Seekable stream of object data.
    306       object_metadata: Object metadata for new object.  Must include bucket
    307                        and object name.
    308       canned_acl: Optional canned ACL to apply to object. Overrides ACL set
    309                   in object_metadata.
    310       size: Optional object size.
    311       preconditions: Preconditions for the request.
    312       progress_callback: Optional callback function for progress notifications.
    313                          Receives calls with arguments
    314                          (bytes_transferred, total_size).
    315       provider: Cloud storage provider to connect to.  If not present,
    316                 class-wide default is used.
    317       fields: If present, return only these Object metadata fields.
    318 
    319     Raises:
    320       ArgumentException for errors during input validation.
    321       ServiceException for errors interacting with cloud storage providers.
    322 
    323     Returns:
    324       Object object for newly created destination object.
    325     """
    326     raise NotImplementedError('UploadObject must be overloaded')
    327 
    328   def UploadObjectStreaming(self, upload_stream, object_metadata,
    329                             canned_acl=None, preconditions=None,
    330                             progress_callback=None, provider=None,
    331                             fields=None):
    332     """Uploads object data and metadata.
    333 
    334     Args:
    335       upload_stream: Stream of object data. May not be seekable.
    336       object_metadata: Object metadata for new object.  Must include bucket
    337                        and object name.
    338       canned_acl: Optional canned ACL to apply to object. Overrides ACL set
    339                   in object_metadata.
    340       preconditions: Preconditions for the request.
    341       progress_callback: Optional callback function for progress notifications.
    342                          Receives calls with arguments
    343                          (bytes_transferred, total_size), but fills in only
    344                          bytes_transferred.
    345       provider: Cloud storage provider to connect to.  If not present,
    346                 class-wide default is used.
    347       fields: If present, return only these Object metadata fields.
    348 
    349     Raises:
    350       ArgumentException for errors during input validation.
    351       ServiceException for errors interacting with cloud storage providers.
    352 
    353     Returns:
    354       Object object for newly created destination object.
    355     """
    356     raise NotImplementedError('UploadObjectStreaming must be overloaded')
    357 
    358   def UploadObjectResumable(
    359       self, upload_stream, object_metadata, canned_acl=None,
    360       size=None, preconditions=None, serialization_data=None,
    361       tracker_callback=None, progress_callback=None, provider=None,
    362       fields=None):
    363     """Uploads object data and metadata using a resumable upload strategy.
    364 
    365     Args:
    366       upload_stream: Seekable stream of object data.
    367       object_metadata: Object metadata for new object.  Must include bucket
    368                        and object name.
    369       canned_acl: Optional canned ACL to apply to object. Overrides ACL set
    370                   in object_metadata.
    371       size: Total size of the object.
    372       preconditions: Preconditions for the request.
    373       serialization_data: Dict of {'url' : UploadURL} allowing for uploads to
    374                           be resumed.
    375       tracker_callback: Callback function taking a upload URL string.
    376                         Guaranteed to be called when the implementation gets an
    377                         upload URL, allowing the caller to resume the upload
    378                         across process breaks by saving the upload URL in
    379                         a tracker file.
    380       progress_callback: Optional callback function for progress notifications.
    381                          Receives calls with arguments
    382                          (bytes_transferred, total_size).
    383       provider: Cloud storage provider to connect to.  If not present,
    384                 class-wide default is used.
    385       fields: If present, return only these Object metadata fields when the
    386               upload is complete.
    387 
    388     Raises:
    389       ArgumentException for errors during input validation.
    390       ServiceException for errors interacting with cloud storage providers.
    391 
    392     Returns:
    393       Object object for newly created destination object.
    394     """
    395     raise NotImplementedError('UploadObjectResumable must be overloaded')
    396 
    397   def CopyObject(self, src_obj_metadata, dst_obj_metadata, src_generation=None,
    398                  canned_acl=None, preconditions=None, progress_callback=None,
    399                  max_bytes_per_call=None, provider=None, fields=None):
    400     """Copies an object in the cloud.
    401 
    402     Args:
    403       src_obj_metadata: Object metadata for source object.  Must include
    404                         bucket name, object name, and etag.
    405       dst_obj_metadata: Object metadata for new object.  Must include bucket
    406                         and object name.
    407       src_generation: Generation of the source object to copy.
    408       canned_acl: Optional canned ACL to apply to destination object. Overrides
    409                   ACL set in dst_obj_metadata.
    410       preconditions: Destination object preconditions for the request.
    411       progress_callback: Optional callback function for progress notifications.
    412                          Receives calls with arguments
    413                          (bytes_transferred, total_size).
    414       max_bytes_per_call: Integer describing maximum number of bytes
    415                           to rewrite per service call.
    416       provider: Cloud storage provider to connect to.  If not present,
    417                 class-wide default is used.
    418       fields: If present, return only these Object metadata fields.
    419 
    420     Raises:
    421       ArgumentException for errors during input validation.
    422       ServiceException for errors interacting with cloud storage providers.
    423 
    424     Returns:
    425       Object object for newly created destination object.
    426     """
    427     raise NotImplementedError('CopyObject must be overloaded')
    428 
    429   def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
    430                     preconditions=None, provider=None, fields=None):
    431     """Composes an object in the cloud.
    432 
    433     Args:
    434       src_objs_metadata: List of ComposeRequest.SourceObjectsValueListEntries
    435                          specifying the objects to compose.
    436       dst_obj_metadata: Metadata for the destination object including bucket
    437                         and object name.
    438       preconditions: Destination object preconditions for the request.
    439       provider: Cloud storage provider to connect to.  If not present,
    440                 class-wide default is used.
    441       fields: If present, return only these Object metadata fields.
    442 
    443     Raises:
    444       ArgumentException for errors during input validation.
    445       ServiceException for errors interacting with cloud storage providers.
    446 
    447     Returns:
    448       Composed object metadata.
    449     """
    450     raise NotImplementedError('ComposeObject must be overloaded')
    451 
    452   def DeleteObject(self, bucket_name, object_name, preconditions=None,
    453                    generation=None, provider=None):
    454     """Deletes an object.
    455 
    456     Args:
    457       bucket_name: Name of the containing bucket.
    458       object_name: Name of the object to delete.
    459       preconditions: Preconditions for the request.
    460       generation: Generation (or version) of the object to delete; if None,
    461                   deletes the live object.
    462       provider: Cloud storage provider to connect to.  If not present,
    463                 class-wide default is used.
    464 
    465     Raises:
    466       ArgumentException for errors during input validation.
    467       ServiceException for errors interacting with cloud storage providers.
    468 
    469     Returns:
    470       None.
    471     """
    472     raise NotImplementedError('DeleteObject must be overloaded')
    473 
    474   def WatchBucket(self, bucket_name, address, channel_id, token=None,
    475                   provider=None, fields=None):
    476     """Creates a notification subscription for changes to objects in a bucket.
    477 
    478     Args:
    479       bucket_name: Bucket containing the objects.
    480       address: Address to which to send notifications.
    481       channel_id: Unique ID string for the channel.
    482       token: If present, token string is delivered with each notification.
    483       provider: Cloud storage provider to connect to.  If not present,
    484                 class-wide default is used.
    485       fields: If present, return only these Channel metadata fields.
    486 
    487     Raises:
    488       ArgumentException for errors during input validation.
    489       ServiceException for errors interacting with cloud storage providers.
    490 
    491     Returns:
    492       Channel object describing the notification subscription.
    493     """
    494     raise NotImplementedError('WatchBucket must be overloaded')
    495 
    496   def StopChannel(self, channel_id, resource_id, provider=None):
    497     """Stops a notification channel.
    498 
    499     Args:
    500       channel_id: Unique ID string for the channel.
    501       resource_id: Version-agnostic ID string for the channel.
    502       provider: Cloud storage provider to connect to.  If not present,
    503                 class-wide default is used.
    504 
    505     Raises:
    506       ArgumentException for errors during input validation.
    507       ServiceException for errors interacting with cloud storage providers.
    508 
    509     Returns:
    510       None.
    511     """
    512     raise NotImplementedError('StopChannel must be overloaded')
    513 
    514 
    515 class Preconditions(object):
    516   """Preconditions class for specifying preconditions to cloud API requests."""
    517 
    518   def __init__(self, gen_match=None, meta_gen_match=None):
    519     """Instantiates a Preconditions object.
    520 
    521     Args:
    522       gen_match: Perform request only if generation of target object
    523                  matches the given integer. Ignored for bucket requests.
    524       meta_gen_match: Perform request only if metageneration of target
    525                       object/bucket matches the given integer.
    526     """
    527     self.gen_match = gen_match
    528     self.meta_gen_match = meta_gen_match
    529 
    530 
    531 class ArgumentException(Exception):
    532   """Exception raised when arguments to a Cloud API method are invalid.
    533 
    534     This exception is never raised as a result of a failed call to a cloud
    535     storage provider.
    536   """
    537 
    538   def __init__(self, reason):
    539     Exception.__init__(self)
    540     self.reason = reason
    541 
    542   def __repr__(self):
    543     return str(self)
    544 
    545   def __str__(self):
    546     return '%s: %s' % (self.__class__.__name__, self.reason)
    547 
    548 
    549 class ProjectIdException(ArgumentException):
    550   """Exception raised when a Project ID argument is required but not present."""
    551 
    552 
    553 class ServiceException(Exception):
    554   """Exception raised when a cloud storage provider request fails.
    555 
    556     This exception is raised only as a result of a failed remote call.
    557   """
    558 
    559   def __init__(self, reason, status=None, body=None):
    560     Exception.__init__(self)
    561     self.reason = reason
    562     self.status = status
    563     self.body = body
    564 
    565   def __repr__(self):
    566     return str(self)
    567 
    568   def __str__(self):
    569     message = '%s:' % self.__class__.__name__
    570     if self.status:
    571       message += ' %s' % self.status
    572     message += ' %s' % self.reason
    573     if self.body:
    574       message += '\n%s' % self.body
    575     return message
    576 
    577 
    578 class RetryableServiceException(ServiceException):
    579   """Exception class for retryable exceptions."""
    580 
    581 
    582 class ResumableDownloadException(RetryableServiceException):
    583   """Exception raised for res. downloads that can be retried later."""
    584 
    585 
    586 class ResumableUploadException(RetryableServiceException):
    587   """Exception raised for res. uploads that can be retried w/ same upload ID."""
    588 
    589 
    590 class ResumableUploadStartOverException(RetryableServiceException):
    591   """Exception raised for res. uploads that can be retried w/ new upload ID."""
    592 
    593 
    594 class ResumableUploadAbortException(ServiceException):
    595   """Exception raised for resumable uploads that cannot be retried later."""
    596 
    597 
    598 class AuthenticationException(ServiceException):
    599   """Exception raised for errors during the authentication process."""
    600 
    601 
    602 class PreconditionException(ServiceException):
    603   """Exception raised for precondition failures."""
    604 
    605 
    606 class NotFoundException(ServiceException):
    607   """Exception raised when a resource is not found (404)."""
    608 
    609 
    610 class BucketNotFoundException(NotFoundException):
    611   """Exception raised when a bucket resource is not found (404)."""
    612 
    613   def __init__(self, reason, bucket_name, status=None, body=None):
    614     super(BucketNotFoundException, self).__init__(reason, status=status,
    615                                                   body=body)
    616     self.bucket_name = bucket_name
    617 
    618 
    619 class NotEmptyException(ServiceException):
    620   """Exception raised when trying to delete a bucket is not empty."""
    621 
    622 
    623 class BadRequestException(ServiceException):
    624   """Exception raised for malformed requests.
    625 
    626     Where it is possible to detect invalid arguments prior to sending them
    627     to the server, an ArgumentException should be raised instead.
    628   """
    629 
    630 
    631 class AccessDeniedException(ServiceException):
    632   """Exception raised  when authenticated user has insufficient access rights.
    633 
    634     This is raised when the authentication process succeeded but the
    635     authenticated user does not have access rights to the requested resource.
    636   """
    637 
    638 
    639