1 #!/usr/bin/python 2 3 """ 4 Copyright 2014 Google Inc. 5 6 Use of this source code is governed by a BSD-style license that can be 7 found in the LICENSE file. 8 9 Utilities for accessing Google Cloud Storage. 10 11 TODO(epoger): move this into tools/utils for broader use? 12 """ 13 14 # System-level imports 15 import os 16 import posixpath 17 import sys 18 try: 19 from apiclient.discovery import build as build_service 20 except ImportError: 21 print ('Missing google-api-python-client. Please install it; directions ' 22 'can be found at https://developers.google.com/api-client-library/' 23 'python/start/installation') 24 raise 25 26 # Local imports 27 import url_utils 28 29 30 def download_file(source_bucket, source_path, dest_path, 31 create_subdirs_if_needed=False): 32 """ Downloads a single file from Google Cloud Storage to local disk. 33 34 Args: 35 source_bucket: GCS bucket to download the file from 36 source_path: full path (Posix-style) within that bucket 37 dest_path: full path (local-OS-style) on local disk to copy the file to 38 create_subdirs_if_needed: boolean; whether to create subdirectories as 39 needed to create dest_path 40 """ 41 source_http_url = posixpath.join( 42 'http://storage.googleapis.com', source_bucket, source_path) 43 url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path, 44 create_subdirs_if_needed=create_subdirs_if_needed) 45 46 47 def list_bucket_contents(bucket, subdir=None): 48 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. 49 50 Uses the API documented at 51 https://developers.google.com/storage/docs/json_api/v1/objects/list 52 53 Args: 54 bucket: name of the Google Storage bucket 55 subdir: directory within the bucket to list, or None for root directory 56 """ 57 # The GCS command relies on the subdir name (if any) ending with a slash. 58 if subdir and not subdir.endswith('/'): 59 subdir += '/' 60 subdir_length = len(subdir) if subdir else 0 61 62 storage = build_service('storage', 'v1') 63 command = storage.objects().list( 64 bucket=bucket, delimiter='/', fields='items(name),prefixes', 65 prefix=subdir) 66 results = command.execute() 67 68 # The GCS command returned two subdicts: 69 # prefixes: the full path of every directory within subdir, with trailing '/' 70 # items: property dict for each file object within subdir 71 # (including 'name', which is full path of the object) 72 dirs = [] 73 for dir_fullpath in results.get('prefixes', []): 74 dir_basename = dir_fullpath[subdir_length:] 75 dirs.append(dir_basename[:-1]) # strip trailing slash 76 files = [] 77 for file_properties in results.get('items', []): 78 file_fullpath = file_properties['name'] 79 file_basename = file_fullpath[subdir_length:] 80 files.append(file_basename) 81 return (dirs, files) 82