Home | History | Annotate | Download | only in pyutils
      1 #!/usr/bin/python
      2 
      3 """
      4 Copyright 2014 Google Inc.
      5 
      6 Use of this source code is governed by a BSD-style license that can be
      7 found in the LICENSE file.
      8 
      9 Utilities for accessing Google Cloud Storage.
     10 
     11 TODO(epoger): move this into tools/utils for broader use?
     12 """
     13 
     14 # System-level imports
     15 import os
     16 import posixpath
     17 import sys
     18 try:
     19   from apiclient.discovery import build as build_service
     20 except ImportError:
     21   print ('Missing google-api-python-client.  Please install it; directions '
     22          'can be found at https://developers.google.com/api-client-library/'
     23          'python/start/installation')
     24   raise
     25 
     26 # Local imports
     27 import url_utils
     28 
     29 
     30 def download_file(source_bucket, source_path, dest_path,
     31                   create_subdirs_if_needed=False):
     32   """ Downloads a single file from Google Cloud Storage to local disk.
     33 
     34   Args:
     35     source_bucket: GCS bucket to download the file from
     36     source_path: full path (Posix-style) within that bucket
     37     dest_path: full path (local-OS-style) on local disk to copy the file to
     38     create_subdirs_if_needed: boolean; whether to create subdirectories as
     39         needed to create dest_path
     40   """
     41   source_http_url = posixpath.join(
     42       'http://storage.googleapis.com', source_bucket, source_path)
     43   url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
     44                           create_subdirs_if_needed=create_subdirs_if_needed)
     45 
     46 
     47 def list_bucket_contents(bucket, subdir=None):
     48   """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
     49 
     50   Uses the API documented at
     51   https://developers.google.com/storage/docs/json_api/v1/objects/list
     52 
     53   Args:
     54     bucket: name of the Google Storage bucket
     55     subdir: directory within the bucket to list, or None for root directory
     56   """
     57   # The GCS command relies on the subdir name (if any) ending with a slash.
     58   if subdir and not subdir.endswith('/'):
     59     subdir += '/'
     60   subdir_length = len(subdir) if subdir else 0
     61 
     62   storage = build_service('storage', 'v1')
     63   command = storage.objects().list(
     64       bucket=bucket, delimiter='/', fields='items(name),prefixes',
     65       prefix=subdir)
     66   results = command.execute()
     67 
     68   # The GCS command returned two subdicts:
     69   # prefixes: the full path of every directory within subdir, with trailing '/'
     70   # items: property dict for each file object within subdir
     71   #        (including 'name', which is full path of the object)
     72   dirs = []
     73   for dir_fullpath in results.get('prefixes', []):
     74     dir_basename = dir_fullpath[subdir_length:]
     75     dirs.append(dir_basename[:-1])  # strip trailing slash
     76   files = []
     77   for file_properties in results.get('items', []):
     78     file_fullpath = file_properties['name']
     79     file_basename = file_fullpath[subdir_length:]
     80     files.append(file_basename)
     81   return (dirs, files)
     82