Home | History | Annotate | Download | only in commands
      1 # -*- coding: utf-8 -*-
      2 # Copyright 2013 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #     http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 """Implementation of compose command for Google Cloud Storage."""
     16 
     17 from __future__ import absolute_import
     18 
     19 from gslib.bucket_listing_ref import BucketListingObject
     20 from gslib.command import Command
     21 from gslib.command_argument import CommandArgument
     22 from gslib.cs_api_map import ApiSelector
     23 from gslib.exception import CommandException
     24 from gslib.storage_url import ContainsWildcard
     25 from gslib.storage_url import StorageUrlFromString
     26 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
     27 from gslib.translation_helper import PreconditionsFromHeaders
     28 
     29 MAX_COMPONENT_COUNT = 1024
     30 MAX_COMPOSE_ARITY = 32
     31 
     32 _SYNOPSIS = """
     33   gsutil compose gs://bucket/obj1 gs://bucket/obj2 ... gs://bucket/composite
     34 """
     35 
     36 _DETAILED_HELP_TEXT = ("""
     37 <B>SYNOPSIS</B>
     38 """ + _SYNOPSIS + """
     39 
     40 
     41 <B>DESCRIPTION</B>
     42   The compose command creates a new object whose content is the concatenation
     43   of a given sequence of component objects under the same bucket. gsutil uses
     44   the content type of the first source object to determine the destination
     45   object's content type. For more information, please see:
     46   https://developers.google.com/storage/docs/composite-objects
     47 
     48   Note also that the gsutil cp command will automatically split uploads for
     49   large files into multiple component objects, upload them in parallel, and
     50   compose them into a final object (which will be subject to the component
     51   count limit). This will still perform all uploads from a single machine. For
     52   extremely large files and/or very low per-machine bandwidth, you may want to
     53   split the file and upload it from multiple machines, and later compose these
     54   parts of the file manually. See the 'PARALLEL COMPOSITE UPLOADS' section under
     55   'gsutil help cp' for details.
     56 
     57   Appending simply entails uploading your new data to a temporary object,
     58   composing it with the growing append-target, and deleting the temporary
     59   object:
     60 
     61     $ echo 'new data' | gsutil cp - gs://bucket/data-to-append
     62     $ gsutil compose gs://bucket/append-target gs://bucket/data-to-append \\
     63         gs://bucket/append-target
     64     $ gsutil rm gs://bucket/data-to-append
     65 
     66   Note that there is a limit (currently %d) to the number of components for a
     67   given composite object. This means you can append to each object at most %d
     68   times.
     69 """ % (MAX_COMPONENT_COUNT, MAX_COMPONENT_COUNT - 1))
     70 
     71 
     72 class ComposeCommand(Command):
     73   """Implementation of gsutil compose command."""
     74 
     75   # Command specification. See base class for documentation.
     76   command_spec = Command.CreateCommandSpec(
     77       'compose',
     78       command_name_aliases=['concat'],
     79       usage_synopsis=_SYNOPSIS,
     80       min_args=2,
     81       max_args=MAX_COMPOSE_ARITY + 1,
     82       supported_sub_args='',
     83       # Not files, just object names without gs:// prefix.
     84       file_url_ok=False,
     85       provider_url_ok=False,
     86       urls_start_arg=1,
     87       gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
     88       gs_default_api=ApiSelector.JSON,
     89       argparse_arguments=[
     90           CommandArgument.MakeZeroOrMoreCloudURLsArgument()
     91       ]
     92   )
     93   # Help specification. See help_provider.py for documentation.
     94   help_spec = Command.HelpSpec(
     95       help_name='compose',
     96       help_name_aliases=['concat'],
     97       help_type='command_help',
     98       help_one_line_summary=(
     99           'Concatenate a sequence of objects into a new composite object.'),
    100       help_text=_DETAILED_HELP_TEXT,
    101       subcommand_help_text={},
    102   )
    103 
    104   def CheckProvider(self, url):
    105     if url.scheme != 'gs':
    106       raise CommandException(
    107           '"compose" called on URL with unsupported provider (%s).' % str(url))
    108 
    109   # Command entry point.
    110   def RunCommand(self):
    111     """Command entry point for the compose command."""
    112     target_url_str = self.args[-1]
    113     self.args = self.args[:-1]
    114     target_url = StorageUrlFromString(target_url_str)
    115     self.CheckProvider(target_url)
    116     if target_url.HasGeneration():
    117       raise CommandException('A version-specific URL (%s) cannot be '
    118                              'the destination for gsutil compose - abort.'
    119                              % target_url)
    120 
    121     dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
    122                                                 bucket=target_url.bucket_name)
    123 
    124     components = []
    125     # Remember the first source object so we can get its content type.
    126     first_src_url = None
    127     for src_url_str in self.args:
    128       if ContainsWildcard(src_url_str):
    129         src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
    130       else:
    131         src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
    132       for blr in src_url_iter:
    133         src_url = blr.storage_url
    134         self.CheckProvider(src_url)
    135 
    136         if src_url.bucket_name != target_url.bucket_name:
    137           raise CommandException(
    138               'GCS does not support inter-bucket composing.')
    139 
    140         if not first_src_url:
    141           first_src_url = src_url
    142         src_obj_metadata = (
    143             apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
    144                 name=src_url.object_name))
    145         if src_url.HasGeneration():
    146           src_obj_metadata.generation = src_url.generation
    147         components.append(src_obj_metadata)
    148         # Avoid expanding too many components, and sanity check each name
    149         # expansion result.
    150         if len(components) > MAX_COMPOSE_ARITY:
    151           raise CommandException('"compose" called with too many component '
    152                                  'objects. Limit is %d.' % MAX_COMPOSE_ARITY)
    153 
    154     if len(components) < 2:
    155       raise CommandException('"compose" requires at least 2 component objects.')
    156 
    157     dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
    158         first_src_url.bucket_name, first_src_url.object_name,
    159         provider=first_src_url.scheme, fields=['contentType']).contentType
    160 
    161     preconditions = PreconditionsFromHeaders(self.headers or {})
    162 
    163     self.logger.info(
    164         'Composing %s from %d component objects.', target_url, len(components))
    165     self.gsutil_api.ComposeObject(components, dst_obj_metadata,
    166                                   preconditions=preconditions,
    167                                   provider=target_url.scheme)
    168