1 # -*- coding: utf-8 -*- 2 # Copyright 2012 Google Inc. All Rights Reserved. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 """Implementation of setmeta command for setting cloud object metadata.""" 16 17 from __future__ import absolute_import 18 19 from gslib.cloud_api import AccessDeniedException 20 from gslib.cloud_api import PreconditionException 21 from gslib.cloud_api import Preconditions 22 from gslib.command import Command 23 from gslib.command_argument import CommandArgument 24 from gslib.cs_api_map import ApiSelector 25 from gslib.exception import CommandException 26 from gslib.name_expansion import NameExpansionIterator 27 from gslib.storage_url import StorageUrlFromString 28 from gslib.translation_helper import CopyObjectMetadata 29 from gslib.translation_helper import ObjectMetadataFromHeaders 30 from gslib.translation_helper import PreconditionsFromHeaders 31 from gslib.util import GetCloudApiInstance 32 from gslib.util import NO_MAX 33 from gslib.util import Retry 34 35 36 _SYNOPSIS = """ 37 gsutil setmeta -h [header:value|header] ... url... 38 """ 39 40 _DETAILED_HELP_TEXT = (""" 41 <B>SYNOPSIS</B> 42 """ + _SYNOPSIS + """ 43 44 45 <B>DESCRIPTION</B> 46 The gsutil setmeta command allows you to set or remove the metadata on one 47 or more objects. It takes one or more header arguments followed by one or 48 more URLs, where each header argument is in one of two forms: 49 50 - if you specify header:value, it will set the given header on all 51 named objects. 52 53 - if you specify header (with no value), it will remove the given header 54 from all named objects. 55 56 For example, the following command would set the Content-Type and 57 Cache-Control and remove the Content-Disposition on the specified objects: 58 59 gsutil setmeta -h "Content-Type:text/html" \\ 60 -h "Cache-Control:public, max-age=3600" \\ 61 -h "Content-Disposition" gs://bucket/*.html 62 63 If you have a large number of objects to update you might want to use the 64 gsutil -m option, to perform a parallel (multi-threaded/multi-processing) 65 update: 66 67 gsutil -m setmeta -h "Content-Type:text/html" \\ 68 -h "Cache-Control:public, max-age=3600" \\ 69 -h "Content-Disposition" gs://bucket/*.html 70 71 You can also use the setmeta command to set custom metadata on an object: 72 73 gsutil setmeta -h "x-goog-meta-icecreamflavor:vanilla" gs://bucket/object 74 75 See "gsutil help metadata" for details about how you can set metadata 76 while uploading objects, what metadata fields can be set and the meaning of 77 these fields, use of custom metadata, and how to view currently set metadata. 78 79 NOTE: By default, publicly readable objects are served with a Cache-Control 80 header allowing such objects to be cached for 3600 seconds. For more details 81 about this default behavior see the CACHE-CONTROL section of 82 "gsutil help metadata". If you need to ensure that updates become visible 83 immediately, you should set a Cache-Control header of "Cache-Control:private, 84 max-age=0, no-transform" on such objects. You can do this with the command: 85 86 gsutil setmeta -h "Content-Type:text/html" \\ 87 -h "Cache-Control:private, max-age=0, no-transform" gs://bucket/*.html 88 89 The setmeta command reads each object's current generation and metageneration 90 and uses those as preconditions unless they are otherwise specified by 91 top-level arguments. For example: 92 93 gsutil -h "x-goog-if-metageneration-match:2" setmeta 94 -h "x-goog-meta-icecreamflavor:vanilla" 95 96 will set the icecreamflavor:vanilla metadata if the current live object has a 97 metageneration of 2. 98 99 <B>OPTIONS</B> 100 -h Specifies a header:value to be added, or header to be removed, 101 from each named object. 102 """) 103 104 # Setmeta assumes a header-like model which doesn't line up with the JSON way 105 # of doing things. This list comes from functionality that was supported by 106 # gsutil3 at the time gsutil4 was released. 107 SETTABLE_FIELDS = ['cache-control', 'content-disposition', 108 'content-encoding', 'content-language', 109 'content-md5', 'content-type'] 110 111 112 def _SetMetadataExceptionHandler(cls, e): 113 """Exception handler that maintains state about post-completion status.""" 114 cls.logger.error(e) 115 cls.everything_set_okay = False 116 117 118 def _SetMetadataFuncWrapper(cls, name_expansion_result, thread_state=None): 119 cls.SetMetadataFunc(name_expansion_result, thread_state=thread_state) 120 121 122 class SetMetaCommand(Command): 123 """Implementation of gsutil setmeta command.""" 124 125 # Command specification. See base class for documentation. 126 command_spec = Command.CreateCommandSpec( 127 'setmeta', 128 command_name_aliases=['setheader'], 129 usage_synopsis=_SYNOPSIS, 130 min_args=1, 131 max_args=NO_MAX, 132 supported_sub_args='h:rR', 133 file_url_ok=False, 134 provider_url_ok=False, 135 urls_start_arg=1, 136 gs_api_support=[ApiSelector.XML, ApiSelector.JSON], 137 gs_default_api=ApiSelector.JSON, 138 argparse_arguments=[ 139 CommandArgument.MakeZeroOrMoreCloudURLsArgument() 140 ] 141 ) 142 # Help specification. See help_provider.py for documentation. 143 help_spec = Command.HelpSpec( 144 help_name='setmeta', 145 help_name_aliases=['setheader'], 146 help_type='command_help', 147 help_one_line_summary='Set metadata on already uploaded objects', 148 help_text=_DETAILED_HELP_TEXT, 149 subcommand_help_text={}, 150 ) 151 152 def RunCommand(self): 153 """Command entry point for the setmeta command.""" 154 headers = [] 155 if self.sub_opts: 156 for o, a in self.sub_opts: 157 if o == '-h': 158 if 'x-goog-acl' in a or 'x-amz-acl' in a: 159 raise CommandException( 160 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl ' 161 'set ... to set canned ACLs.') 162 headers.append(a) 163 164 (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers) 165 166 self.metadata_change = metadata_plus 167 for header in metadata_minus: 168 self.metadata_change[header] = '' 169 170 if len(self.args) == 1 and not self.recursion_requested: 171 url = StorageUrlFromString(self.args[0]) 172 if not (url.IsCloudUrl() and url.IsObject()): 173 raise CommandException('URL (%s) must name an object' % self.args[0]) 174 175 # Used to track if any objects' metadata failed to be set. 176 self.everything_set_okay = True 177 178 self.preconditions = PreconditionsFromHeaders(self.headers) 179 180 name_expansion_iterator = NameExpansionIterator( 181 self.command_name, self.debug, self.logger, self.gsutil_api, 182 self.args, self.recursion_requested, all_versions=self.all_versions, 183 continue_on_error=self.parallel_operations) 184 185 try: 186 # Perform requests in parallel (-m) mode, if requested, using 187 # configured number of parallel processes and threads. Otherwise, 188 # perform requests with sequential function calls in current process. 189 self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator, 190 _SetMetadataExceptionHandler, fail_on_error=True) 191 except AccessDeniedException as e: 192 if e.status == 403: 193 self._WarnServiceAccounts() 194 raise 195 196 if not self.everything_set_okay: 197 raise CommandException('Metadata for some objects could not be set.') 198 199 return 0 200 201 @Retry(PreconditionException, tries=3, timeout_secs=1) 202 def SetMetadataFunc(self, name_expansion_result, thread_state=None): 203 """Sets metadata on an object. 204 205 Args: 206 name_expansion_result: NameExpansionResult describing target object. 207 thread_state: gsutil Cloud API instance to use for the operation. 208 """ 209 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) 210 211 exp_src_url = name_expansion_result.expanded_storage_url 212 self.logger.info('Setting metadata on %s...', exp_src_url) 213 214 fields = ['generation', 'metadata', 'metageneration'] 215 cloud_obj_metadata = gsutil_api.GetObjectMetadata( 216 exp_src_url.bucket_name, exp_src_url.object_name, 217 generation=exp_src_url.generation, provider=exp_src_url.scheme, 218 fields=fields) 219 220 preconditions = Preconditions( 221 gen_match=self.preconditions.gen_match, 222 meta_gen_match=self.preconditions.meta_gen_match) 223 if preconditions.gen_match is None: 224 preconditions.gen_match = cloud_obj_metadata.generation 225 if preconditions.meta_gen_match is None: 226 preconditions.meta_gen_match = cloud_obj_metadata.metageneration 227 228 # Patch handles the patch semantics for most metadata, but we need to 229 # merge the custom metadata field manually. 230 patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change) 231 232 api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme) 233 # For XML we only want to patch through custom metadata that has 234 # changed. For JSON we need to build the complete set. 235 if api == ApiSelector.XML: 236 pass 237 elif api == ApiSelector.JSON: 238 CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata, 239 override=True) 240 patch_obj_metadata = cloud_obj_metadata 241 # Patch body does not need the object generation and metageneration. 242 patch_obj_metadata.generation = None 243 patch_obj_metadata.metageneration = None 244 245 gsutil_api.PatchObjectMetadata( 246 exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata, 247 generation=exp_src_url.generation, preconditions=preconditions, 248 provider=exp_src_url.scheme) 249 250 def _ParseMetadataHeaders(self, headers): 251 """Validates and parses metadata changes from the headers argument. 252 253 Args: 254 headers: Header dict to validate and parse. 255 256 Returns: 257 (metadata_plus, metadata_minus): Tuple of header sets to add and remove. 258 """ 259 metadata_minus = set() 260 cust_metadata_minus = set() 261 metadata_plus = {} 262 cust_metadata_plus = {} 263 # Build a count of the keys encountered from each plus and minus arg so we 264 # can check for dupe field specs. 265 num_metadata_plus_elems = 0 266 num_cust_metadata_plus_elems = 0 267 num_metadata_minus_elems = 0 268 num_cust_metadata_minus_elems = 0 269 270 for md_arg in headers: 271 parts = md_arg.split(':') 272 if len(parts) not in (1, 2): 273 raise CommandException( 274 'Invalid argument: must be either header or header:value (%s)' % 275 md_arg) 276 if len(parts) == 2: 277 (header, value) = parts 278 else: 279 (header, value) = (parts[0], None) 280 _InsistAsciiHeader(header) 281 # Translate headers to lowercase to match the casing assumed by our 282 # sanity-checking operations. 283 header = header.lower() 284 if value: 285 if _IsCustomMeta(header): 286 # Allow non-ASCII data for custom metadata fields. 287 cust_metadata_plus[header] = value 288 num_cust_metadata_plus_elems += 1 289 else: 290 # Don't unicode encode other fields because that would perturb their 291 # content (e.g., adding %2F's into the middle of a Cache-Control 292 # value). 293 _InsistAsciiHeaderValue(header, value) 294 value = str(value) 295 metadata_plus[header] = value 296 num_metadata_plus_elems += 1 297 else: 298 if _IsCustomMeta(header): 299 cust_metadata_minus.add(header) 300 num_cust_metadata_minus_elems += 1 301 else: 302 metadata_minus.add(header) 303 num_metadata_minus_elems += 1 304 305 if (num_metadata_plus_elems != len(metadata_plus) 306 or num_cust_metadata_plus_elems != len(cust_metadata_plus) 307 or num_metadata_minus_elems != len(metadata_minus) 308 or num_cust_metadata_minus_elems != len(cust_metadata_minus) 309 or metadata_minus.intersection(set(metadata_plus.keys()))): 310 raise CommandException('Each header must appear at most once.') 311 other_than_base_fields = (set(metadata_plus.keys()) 312 .difference(SETTABLE_FIELDS)) 313 other_than_base_fields.update( 314 metadata_minus.difference(SETTABLE_FIELDS)) 315 for f in other_than_base_fields: 316 # This check is overly simple; it would be stronger to check, for each 317 # URL argument, whether f.startswith the 318 # provider metadata_prefix, but here we just parse the spec 319 # once, before processing any of the URLs. This means we will not 320 # detect if the user tries to set an x-goog-meta- field on an another 321 # provider's object, for example. 322 if not _IsCustomMeta(f): 323 raise CommandException( 324 'Invalid or disallowed header (%s).\nOnly these fields (plus ' 325 'x-goog-meta-* fields) can be set or unset:\n%s' % ( 326 f, sorted(list(SETTABLE_FIELDS)))) 327 metadata_plus.update(cust_metadata_plus) 328 metadata_minus.update(cust_metadata_minus) 329 return (metadata_minus, metadata_plus) 330 331 332 def _InsistAscii(string, message): 333 if not all(ord(c) < 128 for c in string): 334 raise CommandException(message) 335 336 337 def _InsistAsciiHeader(header): 338 _InsistAscii(header, 'Invalid non-ASCII header (%s).' % header) 339 340 341 def _InsistAsciiHeaderValue(header, value): 342 _InsistAscii( 343 value, ('Invalid non-ASCII value (%s) was provided for header %s.' 344 % (value, header))) 345 346 347 def _IsCustomMeta(header): 348 return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-') 349