Home | History | Annotate | Download | only in commands
      1 # -*- coding: utf-8 -*-
      2 # Copyright 2012 Google Inc. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License, Version 2.0 (the "License");
      5 # you may not use this file except in compliance with the License.
      6 # You may obtain a copy of the License at
      7 #
      8 #     http://www.apache.org/licenses/LICENSE-2.0
      9 #
     10 # Unless required by applicable law or agreed to in writing, software
     11 # distributed under the License is distributed on an "AS IS" BASIS,
     12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 # See the License for the specific language governing permissions and
     14 # limitations under the License.
     15 """Implementation of setmeta command for setting cloud object metadata."""
     16 
     17 from __future__ import absolute_import
     18 
     19 from gslib.cloud_api import AccessDeniedException
     20 from gslib.cloud_api import PreconditionException
     21 from gslib.cloud_api import Preconditions
     22 from gslib.command import Command
     23 from gslib.command_argument import CommandArgument
     24 from gslib.cs_api_map import ApiSelector
     25 from gslib.exception import CommandException
     26 from gslib.name_expansion import NameExpansionIterator
     27 from gslib.storage_url import StorageUrlFromString
     28 from gslib.translation_helper import CopyObjectMetadata
     29 from gslib.translation_helper import ObjectMetadataFromHeaders
     30 from gslib.translation_helper import PreconditionsFromHeaders
     31 from gslib.util import GetCloudApiInstance
     32 from gslib.util import NO_MAX
     33 from gslib.util import Retry
     34 
     35 
     36 _SYNOPSIS = """
     37   gsutil setmeta -h [header:value|header] ... url...
     38 """
     39 
     40 _DETAILED_HELP_TEXT = ("""
     41 <B>SYNOPSIS</B>
     42 """ + _SYNOPSIS + """
     43 
     44 
     45 <B>DESCRIPTION</B>
     46   The gsutil setmeta command allows you to set or remove the metadata on one
     47   or more objects. It takes one or more header arguments followed by one or
     48   more URLs, where each header argument is in one of two forms:
     49 
     50   - if you specify header:value, it will set the given header on all
     51     named objects.
     52 
     53   - if you specify header (with no value), it will remove the given header
     54     from all named objects.
     55 
     56   For example, the following command would set the Content-Type and
     57   Cache-Control and remove the Content-Disposition on the specified objects:
     58 
     59     gsutil setmeta -h "Content-Type:text/html" \\
     60       -h "Cache-Control:public, max-age=3600" \\
     61       -h "Content-Disposition" gs://bucket/*.html
     62 
     63   If you have a large number of objects to update you might want to use the
     64   gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
     65   update:
     66 
     67     gsutil -m setmeta -h "Content-Type:text/html" \\
     68       -h "Cache-Control:public, max-age=3600" \\
     69       -h "Content-Disposition" gs://bucket/*.html
     70 
     71   You can also use the setmeta command to set custom metadata on an object:
     72 
     73     gsutil setmeta -h "x-goog-meta-icecreamflavor:vanilla" gs://bucket/object
     74 
     75   See "gsutil help metadata" for details about how you can set metadata
     76   while uploading objects, what metadata fields can be set and the meaning of
     77   these fields, use of custom metadata, and how to view currently set metadata.
     78 
     79   NOTE: By default, publicly readable objects are served with a Cache-Control
     80   header allowing such objects to be cached for 3600 seconds. For more details
     81   about this default behavior see the CACHE-CONTROL section of
     82   "gsutil help metadata". If you need to ensure that updates become visible
     83   immediately, you should set a Cache-Control header of "Cache-Control:private,
     84   max-age=0, no-transform" on such objects.  You can do this with the command:
     85 
     86     gsutil setmeta -h "Content-Type:text/html" \\
     87       -h "Cache-Control:private, max-age=0, no-transform" gs://bucket/*.html
     88 
     89   The setmeta command reads each object's current generation and metageneration
     90   and uses those as preconditions unless they are otherwise specified by
     91   top-level arguments. For example:
     92 
     93     gsutil -h "x-goog-if-metageneration-match:2" setmeta
     94       -h "x-goog-meta-icecreamflavor:vanilla"
     95 
     96   will set the icecreamflavor:vanilla metadata if the current live object has a
     97   metageneration of 2.
     98 
     99 <B>OPTIONS</B>
    100   -h          Specifies a header:value to be added, or header to be removed,
    101               from each named object.
    102 """)
    103 
    104 # Setmeta assumes a header-like model which doesn't line up with the JSON way
    105 # of doing things. This list comes from functionality that was supported by
    106 # gsutil3 at the time gsutil4 was released.
    107 SETTABLE_FIELDS = ['cache-control', 'content-disposition',
    108                    'content-encoding', 'content-language',
    109                    'content-md5', 'content-type']
    110 
    111 
    112 def _SetMetadataExceptionHandler(cls, e):
    113   """Exception handler that maintains state about post-completion status."""
    114   cls.logger.error(e)
    115   cls.everything_set_okay = False
    116 
    117 
    118 def _SetMetadataFuncWrapper(cls, name_expansion_result, thread_state=None):
    119   cls.SetMetadataFunc(name_expansion_result, thread_state=thread_state)
    120 
    121 
    122 class SetMetaCommand(Command):
    123   """Implementation of gsutil setmeta command."""
    124 
    125   # Command specification. See base class for documentation.
    126   command_spec = Command.CreateCommandSpec(
    127       'setmeta',
    128       command_name_aliases=['setheader'],
    129       usage_synopsis=_SYNOPSIS,
    130       min_args=1,
    131       max_args=NO_MAX,
    132       supported_sub_args='h:rR',
    133       file_url_ok=False,
    134       provider_url_ok=False,
    135       urls_start_arg=1,
    136       gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
    137       gs_default_api=ApiSelector.JSON,
    138       argparse_arguments=[
    139           CommandArgument.MakeZeroOrMoreCloudURLsArgument()
    140       ]
    141   )
    142   # Help specification. See help_provider.py for documentation.
    143   help_spec = Command.HelpSpec(
    144       help_name='setmeta',
    145       help_name_aliases=['setheader'],
    146       help_type='command_help',
    147       help_one_line_summary='Set metadata on already uploaded objects',
    148       help_text=_DETAILED_HELP_TEXT,
    149       subcommand_help_text={},
    150   )
    151 
    152   def RunCommand(self):
    153     """Command entry point for the setmeta command."""
    154     headers = []
    155     if self.sub_opts:
    156       for o, a in self.sub_opts:
    157         if o == '-h':
    158           if 'x-goog-acl' in a or 'x-amz-acl' in a:
    159             raise CommandException(
    160                 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
    161                 'set ... to set canned ACLs.')
    162           headers.append(a)
    163 
    164     (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)
    165 
    166     self.metadata_change = metadata_plus
    167     for header in metadata_minus:
    168       self.metadata_change[header] = ''
    169 
    170     if len(self.args) == 1 and not self.recursion_requested:
    171       url = StorageUrlFromString(self.args[0])
    172       if not (url.IsCloudUrl() and url.IsObject()):
    173         raise CommandException('URL (%s) must name an object' % self.args[0])
    174 
    175     # Used to track if any objects' metadata failed to be set.
    176     self.everything_set_okay = True
    177 
    178     self.preconditions = PreconditionsFromHeaders(self.headers)
    179 
    180     name_expansion_iterator = NameExpansionIterator(
    181         self.command_name, self.debug, self.logger, self.gsutil_api,
    182         self.args, self.recursion_requested, all_versions=self.all_versions,
    183         continue_on_error=self.parallel_operations)
    184 
    185     try:
    186       # Perform requests in parallel (-m) mode, if requested, using
    187       # configured number of parallel processes and threads. Otherwise,
    188       # perform requests with sequential function calls in current process.
    189       self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator,
    190                  _SetMetadataExceptionHandler, fail_on_error=True)
    191     except AccessDeniedException as e:
    192       if e.status == 403:
    193         self._WarnServiceAccounts()
    194       raise
    195 
    196     if not self.everything_set_okay:
    197       raise CommandException('Metadata for some objects could not be set.')
    198 
    199     return 0
    200 
    201   @Retry(PreconditionException, tries=3, timeout_secs=1)
    202   def SetMetadataFunc(self, name_expansion_result, thread_state=None):
    203     """Sets metadata on an object.
    204 
    205     Args:
    206       name_expansion_result: NameExpansionResult describing target object.
    207       thread_state: gsutil Cloud API instance to use for the operation.
    208     """
    209     gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
    210 
    211     exp_src_url = name_expansion_result.expanded_storage_url
    212     self.logger.info('Setting metadata on %s...', exp_src_url)
    213 
    214     fields = ['generation', 'metadata', 'metageneration']
    215     cloud_obj_metadata = gsutil_api.GetObjectMetadata(
    216         exp_src_url.bucket_name, exp_src_url.object_name,
    217         generation=exp_src_url.generation, provider=exp_src_url.scheme,
    218         fields=fields)
    219 
    220     preconditions = Preconditions(
    221         gen_match=self.preconditions.gen_match,
    222         meta_gen_match=self.preconditions.meta_gen_match)
    223     if preconditions.gen_match is None:
    224       preconditions.gen_match = cloud_obj_metadata.generation
    225     if preconditions.meta_gen_match is None:
    226       preconditions.meta_gen_match = cloud_obj_metadata.metageneration
    227 
    228     # Patch handles the patch semantics for most metadata, but we need to
    229     # merge the custom metadata field manually.
    230     patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change)
    231 
    232     api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme)
    233     # For XML we only want to patch through custom metadata that has
    234     # changed.  For JSON we need to build the complete set.
    235     if api == ApiSelector.XML:
    236       pass
    237     elif api == ApiSelector.JSON:
    238       CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata,
    239                          override=True)
    240       patch_obj_metadata = cloud_obj_metadata
    241       # Patch body does not need the object generation and metageneration.
    242       patch_obj_metadata.generation = None
    243       patch_obj_metadata.metageneration = None
    244 
    245     gsutil_api.PatchObjectMetadata(
    246         exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata,
    247         generation=exp_src_url.generation, preconditions=preconditions,
    248         provider=exp_src_url.scheme)
    249 
    250   def _ParseMetadataHeaders(self, headers):
    251     """Validates and parses metadata changes from the headers argument.
    252 
    253     Args:
    254       headers: Header dict to validate and parse.
    255 
    256     Returns:
    257       (metadata_plus, metadata_minus): Tuple of header sets to add and remove.
    258     """
    259     metadata_minus = set()
    260     cust_metadata_minus = set()
    261     metadata_plus = {}
    262     cust_metadata_plus = {}
    263     # Build a count of the keys encountered from each plus and minus arg so we
    264     # can check for dupe field specs.
    265     num_metadata_plus_elems = 0
    266     num_cust_metadata_plus_elems = 0
    267     num_metadata_minus_elems = 0
    268     num_cust_metadata_minus_elems = 0
    269 
    270     for md_arg in headers:
    271       parts = md_arg.split(':')
    272       if len(parts) not in (1, 2):
    273         raise CommandException(
    274             'Invalid argument: must be either header or header:value (%s)' %
    275             md_arg)
    276       if len(parts) == 2:
    277         (header, value) = parts
    278       else:
    279         (header, value) = (parts[0], None)
    280       _InsistAsciiHeader(header)
    281       # Translate headers to lowercase to match the casing assumed by our
    282       # sanity-checking operations.
    283       header = header.lower()
    284       if value:
    285         if _IsCustomMeta(header):
    286           # Allow non-ASCII data for custom metadata fields.
    287           cust_metadata_plus[header] = value
    288           num_cust_metadata_plus_elems += 1
    289         else:
    290           # Don't unicode encode other fields because that would perturb their
    291           # content (e.g., adding %2F's into the middle of a Cache-Control
    292           # value).
    293           _InsistAsciiHeaderValue(header, value)
    294           value = str(value)
    295           metadata_plus[header] = value
    296           num_metadata_plus_elems += 1
    297       else:
    298         if _IsCustomMeta(header):
    299           cust_metadata_minus.add(header)
    300           num_cust_metadata_minus_elems += 1
    301         else:
    302           metadata_minus.add(header)
    303           num_metadata_minus_elems += 1
    304 
    305     if (num_metadata_plus_elems != len(metadata_plus)
    306         or num_cust_metadata_plus_elems != len(cust_metadata_plus)
    307         or num_metadata_minus_elems != len(metadata_minus)
    308         or num_cust_metadata_minus_elems != len(cust_metadata_minus)
    309         or metadata_minus.intersection(set(metadata_plus.keys()))):
    310       raise CommandException('Each header must appear at most once.')
    311     other_than_base_fields = (set(metadata_plus.keys())
    312                               .difference(SETTABLE_FIELDS))
    313     other_than_base_fields.update(
    314         metadata_minus.difference(SETTABLE_FIELDS))
    315     for f in other_than_base_fields:
    316       # This check is overly simple; it would be stronger to check, for each
    317       # URL argument, whether f.startswith the
    318       # provider metadata_prefix, but here we just parse the spec
    319       # once, before processing any of the URLs. This means we will not
    320       # detect if the user tries to set an x-goog-meta- field on an another
    321       # provider's object, for example.
    322       if not _IsCustomMeta(f):
    323         raise CommandException(
    324             'Invalid or disallowed header (%s).\nOnly these fields (plus '
    325             'x-goog-meta-* fields) can be set or unset:\n%s' % (
    326                 f, sorted(list(SETTABLE_FIELDS))))
    327     metadata_plus.update(cust_metadata_plus)
    328     metadata_minus.update(cust_metadata_minus)
    329     return (metadata_minus, metadata_plus)
    330 
    331 
    332 def _InsistAscii(string, message):
    333   if not all(ord(c) < 128 for c in string):
    334     raise CommandException(message)
    335 
    336 
    337 def _InsistAsciiHeader(header):
    338   _InsistAscii(header, 'Invalid non-ASCII header (%s).' % header)
    339 
    340 
    341 def _InsistAsciiHeaderValue(header, value):
    342   _InsistAscii(
    343       value, ('Invalid non-ASCII value (%s) was provided for header %s.'
    344               % (value, header)))
    345 
    346 
    347 def _IsCustomMeta(header):
    348   return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-')
    349