Home | History | Annotate | Download | only in util
      1 # Copyright 2014 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Wrappers for gsutil, for basic interaction with Google Cloud Storage."""
      6 
      7 import contextlib
      8 import cStringIO
      9 import hashlib
     10 import logging
     11 import os
     12 import subprocess
     13 import sys
     14 import tarfile
     15 import urllib2
     16 
     17 from telemetry.core import platform
     18 from telemetry.util import path
     19 
     20 
     21 PUBLIC_BUCKET = 'chromium-telemetry'
     22 PARTNER_BUCKET = 'chrome-partner-telemetry'
     23 INTERNAL_BUCKET = 'chrome-telemetry'
     24 
     25 
     26 BUCKET_ALIASES = {
     27     'public': PUBLIC_BUCKET,
     28     'partner': PARTNER_BUCKET,
     29     'internal': INTERNAL_BUCKET,
     30 }
     31 
     32 
     33 _GSUTIL_URL = 'http://storage.googleapis.com/pub/gsutil.tar.gz'
     34 _DOWNLOAD_PATH = os.path.join(path.GetTelemetryDir(), 'third_party', 'gsutil')
     35 # TODO(tbarzic): A workaround for http://crbug.com/386416 and
     36 #     http://crbug.com/359293. See |_RunCommand|.
     37 _CROS_GSUTIL_HOME_WAR = '/home/chromeos-test/'
     38 
     39 
     40 class CloudStorageError(Exception):
     41   @staticmethod
     42   def _GetConfigInstructions(gsutil_path):
     43     if SupportsProdaccess(gsutil_path) and _FindExecutableInPath('prodaccess'):
     44       return 'Run prodaccess to authenticate.'
     45     else:
     46       if platform.GetHostPlatform().GetOSName() == 'chromeos':
     47         gsutil_path = ('HOME=%s %s' % (_CROS_GSUTIL_HOME_WAR, gsutil_path))
     48       return ('To configure your credentials:\n'
     49               '  1. Run "%s config" and follow its instructions.\n'
     50               '  2. If you have a @google.com account, use that account.\n'
     51               '  3. For the project-id, just enter 0.' % gsutil_path)
     52 
     53 
     54 class PermissionError(CloudStorageError):
     55   def __init__(self, gsutil_path):
     56     super(PermissionError, self).__init__(
     57         'Attempted to access a file from Cloud Storage but you don\'t '
     58         'have permission. ' + self._GetConfigInstructions(gsutil_path))
     59 
     60 
     61 class CredentialsError(CloudStorageError):
     62   def __init__(self, gsutil_path):
     63     super(CredentialsError, self).__init__(
     64         'Attempted to access a file from Cloud Storage but you have no '
     65         'configured credentials. ' + self._GetConfigInstructions(gsutil_path))
     66 
     67 
     68 class NotFoundError(CloudStorageError):
     69   pass
     70 
     71 
     72 # TODO(tonyg/dtu): Can this be replaced with distutils.spawn.find_executable()?
     73 def _FindExecutableInPath(relative_executable_path, *extra_search_paths):
     74   search_paths = list(extra_search_paths) + os.environ['PATH'].split(os.pathsep)
     75   for search_path in search_paths:
     76     executable_path = os.path.join(search_path, relative_executable_path)
     77     if path.IsExecutable(executable_path):
     78       return executable_path
     79   return None
     80 
     81 
     82 def _DownloadGsutil():
     83   logging.info('Downloading gsutil')
     84   with contextlib.closing(urllib2.urlopen(_GSUTIL_URL, timeout=60)) as response:
     85     with tarfile.open(fileobj=cStringIO.StringIO(response.read())) as tar_file:
     86       tar_file.extractall(os.path.dirname(_DOWNLOAD_PATH))
     87   logging.info('Downloaded gsutil to %s' % _DOWNLOAD_PATH)
     88 
     89   return os.path.join(_DOWNLOAD_PATH, 'gsutil')
     90 
     91 
     92 def FindGsutil():
     93   """Return the gsutil executable path. If we can't find it, download it."""
     94   # Look for a depot_tools installation.
     95   gsutil_path = _FindExecutableInPath(
     96       os.path.join('third_party', 'gsutil', 'gsutil'), _DOWNLOAD_PATH)
     97   if gsutil_path:
     98     return gsutil_path
     99 
    100   # Look for a gsutil installation.
    101   gsutil_path = _FindExecutableInPath('gsutil', _DOWNLOAD_PATH)
    102   if gsutil_path:
    103     return gsutil_path
    104 
    105   # Failed to find it. Download it!
    106   return _DownloadGsutil()
    107 
    108 
    109 def SupportsProdaccess(gsutil_path):
    110   with open(gsutil_path, 'r') as gsutil:
    111     return 'prodaccess' in gsutil.read()
    112 
    113 
    114 def _RunCommand(args):
    115   gsutil_path = FindGsutil()
    116 
    117   # On cros device, as telemetry is running as root, home will be set to /root/,
    118   # which is not writable. gsutil will attempt to create a download tracker dir
    119   # in home dir and fail. To avoid this, override HOME dir to something writable
    120   # when running on cros device.
    121   #
    122   # TODO(tbarzic): Figure out a better way to handle gsutil on cros.
    123   #     http://crbug.com/386416, http://crbug.com/359293.
    124   gsutil_env = None
    125   if platform.GetHostPlatform().GetOSName() == 'chromeos':
    126     gsutil_env = os.environ.copy()
    127     gsutil_env['HOME'] = _CROS_GSUTIL_HOME_WAR
    128 
    129   gsutil = subprocess.Popen([sys.executable, gsutil_path] + args,
    130                             stdout=subprocess.PIPE, stderr=subprocess.PIPE,
    131                             env=gsutil_env)
    132   stdout, stderr = gsutil.communicate()
    133 
    134   if gsutil.returncode:
    135     if stderr.startswith((
    136         'You are attempting to access protected data with no configured',
    137         'Failure: No handler was ready to authenticate.')):
    138       raise CredentialsError(gsutil_path)
    139     if 'status=401' in stderr or 'status 401' in stderr:
    140       raise CredentialsError(gsutil_path)
    141     if 'status=403' in stderr or 'status 403' in stderr:
    142       raise PermissionError(gsutil_path)
    143     if (stderr.startswith('InvalidUriError') or 'No such object' in stderr or
    144         'No URLs matched' in stderr):
    145       raise NotFoundError(stderr)
    146     raise CloudStorageError(stderr)
    147 
    148   return stdout
    149 
    150 
    151 def List(bucket):
    152   query = 'gs://%s/' % bucket
    153   stdout = _RunCommand(['ls', query])
    154   return [url[len(query):] for url in stdout.splitlines()]
    155 
    156 
    157 def Exists(bucket, remote_path):
    158   try:
    159     _RunCommand(['ls', 'gs://%s/%s' % (bucket, remote_path)])
    160     return True
    161   except NotFoundError:
    162     return False
    163 
    164 
    165 def Move(bucket1, bucket2, remote_path):
    166   url1 = 'gs://%s/%s' % (bucket1, remote_path)
    167   url2 = 'gs://%s/%s' % (bucket2, remote_path)
    168   logging.info('Moving %s to %s' % (url1, url2))
    169   _RunCommand(['mv', url1, url2])
    170 
    171 
    172 def Delete(bucket, remote_path):
    173   url = 'gs://%s/%s' % (bucket, remote_path)
    174   logging.info('Deleting %s' % url)
    175   _RunCommand(['rm', url])
    176 
    177 
    178 def Get(bucket, remote_path, local_path):
    179   url = 'gs://%s/%s' % (bucket, remote_path)
    180   logging.info('Downloading %s to %s' % (url, local_path))
    181   _RunCommand(['cp', url, local_path])
    182 
    183 
    184 def Insert(bucket, remote_path, local_path, publicly_readable=False):
    185   url = 'gs://%s/%s' % (bucket, remote_path)
    186   command_and_args = ['cp']
    187   extra_info = ''
    188   if publicly_readable:
    189     command_and_args += ['-a', 'public-read']
    190     extra_info = ' (publicly readable)'
    191   command_and_args += [local_path, url]
    192   logging.info('Uploading %s to %s%s' % (local_path, url, extra_info))
    193   _RunCommand(command_and_args)
    194 
    195 
    196 def GetIfChanged(file_path, bucket=None):
    197   """Gets the file at file_path if it has a hash file that doesn't match.
    198 
    199   If the file is not in Cloud Storage, log a warning instead of raising an
    200   exception. We assume that the user just hasn't uploaded the file yet.
    201 
    202   Returns:
    203     True if the binary was changed.
    204   """
    205   hash_path = file_path + '.sha1'
    206   if not os.path.exists(hash_path):
    207     logging.warning('Hash file not found: %s' % hash_path)
    208     return False
    209 
    210   expected_hash = ReadHash(hash_path)
    211   if os.path.exists(file_path) and CalculateHash(file_path) == expected_hash:
    212     return False
    213 
    214   if bucket:
    215     buckets = [bucket]
    216   else:
    217     buckets = [PUBLIC_BUCKET, PARTNER_BUCKET, INTERNAL_BUCKET]
    218 
    219   for bucket in buckets:
    220     try:
    221       url = 'gs://%s/%s' % (bucket, expected_hash)
    222       _RunCommand(['cp', url, file_path])
    223       logging.info('Downloaded %s to %s' % (url, file_path))
    224       return True
    225     except NotFoundError:
    226       continue
    227 
    228   logging.warning('Unable to find file in Cloud Storage: %s', file_path)
    229   return False
    230 
    231 
    232 def CalculateHash(file_path):
    233   """Calculates and returns the hash of the file at file_path."""
    234   sha1 = hashlib.sha1()
    235   with open(file_path, 'rb') as f:
    236     while True:
    237       # Read in 1mb chunks, so it doesn't all have to be loaded into memory.
    238       chunk = f.read(1024*1024)
    239       if not chunk:
    240         break
    241       sha1.update(chunk)
    242   return sha1.hexdigest()
    243 
    244 
    245 def ReadHash(hash_path):
    246   with open(hash_path, 'rb') as f:
    247     return f.read(1024).rstrip()
    248