Home | History | Annotate | Download | only in system
      1 # Copyright (c) 2009, Daniel Krech All rights reserved.
      2 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org)
      3 #
      4 # Redistribution and use in source and binary forms, with or without
      5 # modification, are permitted provided that the following conditions are
      6 # met:
      7 #
      8 #  * Redistributions of source code must retain the above copyright
      9 # notice, this list of conditions and the following disclaimer.
     10 #
     11 #  * Redistributions in binary form must reproduce the above copyright
     12 # notice, this list of conditions and the following disclaimer in the
     13 # documentation and/or other materials provided with the distribution.
     14 #
     15 #  * Neither the name of the Daniel Krech nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 """Support for automatically downloading Python packages from an URL."""
     32 
     33 
     34 from __future__ import with_statement
     35 
     36 import codecs
     37 import logging
     38 import new
     39 import os
     40 import shutil
     41 import sys
     42 import tarfile
     43 import tempfile
     44 import urllib
     45 import urlparse
     46 import zipfile
     47 import zipimport
     48 
     49 _log = logging.getLogger(__name__)
     50 
     51 
     52 class AutoInstaller(object):
     53 
     54     """Supports automatically installing Python packages from an URL.
     55 
     56     Supports uncompressed files, .tar.gz, and .zip formats.
     57 
     58     Basic usage:
     59 
     60     installer = AutoInstaller()
     61 
     62     installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
     63                       url_subpath="pep8-0.5.0/pep8.py")
     64     installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip",
     65                       url_subpath="mechanize")
     66 
     67     """
     68 
     69     def __init__(self, append_to_search_path=False, make_package=True,
     70                  target_dir=None, temp_dir=None):
     71         """Create an AutoInstaller instance, and set up the target directory.
     72 
     73         Args:
     74           append_to_search_path: A boolean value of whether to append the
     75                                  target directory to the sys.path search path.
     76           make_package: A boolean value of whether to make the target
     77                         directory a package.  This adds an __init__.py file
     78                         to the target directory -- allowing packages and
     79                         modules within the target directory to be imported
     80                         explicitly using dotted module names.
     81           target_dir: The directory path to which packages should be installed.
     82                       Defaults to a subdirectory of the folder containing
     83                       this module called "autoinstalled".
     84           temp_dir: The directory path to use for any temporary files
     85                     generated while downloading, unzipping, and extracting
     86                     packages to install.  Defaults to a standard temporary
     87                     location generated by the tempfile module.  This
     88                     parameter should normally be used only for development
     89                     testing.
     90 
     91         """
     92         if target_dir is None:
     93             this_dir = os.path.dirname(__file__)
     94             target_dir = os.path.join(this_dir, "autoinstalled")
     95 
     96         # Ensure that the target directory exists.
     97         self._set_up_target_dir(target_dir, append_to_search_path, make_package)
     98 
     99         self._target_dir = target_dir
    100         self._temp_dir = temp_dir
    101 
    102     def _log_transfer(self, message, source, target, log_method=None):
    103         """Log a debug message that involves a source and target."""
    104         if log_method is None:
    105             log_method = _log.debug
    106 
    107         log_method("%s" % message)
    108         log_method('    From: "%s"' % source)
    109         log_method('      To: "%s"' % target)
    110 
    111     def _create_directory(self, path, name=None):
    112         """Create a directory."""
    113         log = _log.debug
    114 
    115         name = name + " " if name is not None else ""
    116         log('Creating %sdirectory...' % name)
    117         log('    "%s"' % path)
    118 
    119         os.makedirs(path)
    120 
    121     def _write_file(self, path, text, encoding):
    122         """Create a file at the given path with given text.
    123 
    124         This method overwrites any existing file.
    125 
    126         """
    127         _log.debug("Creating file...")
    128         _log.debug('    "%s"' % path)
    129         with codecs.open(path, "w", encoding) as file:
    130             file.write(text)
    131 
    132     def _set_up_target_dir(self, target_dir, append_to_search_path,
    133                            make_package):
    134         """Set up a target directory.
    135 
    136         Args:
    137           target_dir: The path to the target directory to set up.
    138           append_to_search_path: A boolean value of whether to append the
    139                                  target directory to the sys.path search path.
    140           make_package: A boolean value of whether to make the target
    141                         directory a package.  This adds an __init__.py file
    142                         to the target directory -- allowing packages and
    143                         modules within the target directory to be imported
    144                         explicitly using dotted module names.
    145 
    146         """
    147         if not os.path.exists(target_dir):
    148             self._create_directory(target_dir, "autoinstall target")
    149 
    150         if append_to_search_path:
    151             sys.path.append(target_dir)
    152 
    153         if make_package:
    154             init_path = os.path.join(target_dir, "__init__.py")
    155             if not os.path.exists(init_path):
    156                 text = ("# This file is required for Python to search this "
    157                         "directory for modules.\n")
    158                 self._write_file(init_path, text, "ascii")
    159 
    160     def _create_scratch_directory_inner(self, prefix):
    161         """Create a scratch directory without exception handling.
    162 
    163         Creates a scratch directory inside the AutoInstaller temp
    164         directory self._temp_dir, or inside a platform-dependent temp
    165         directory if self._temp_dir is None.  Returns the path to the
    166         created scratch directory.
    167 
    168         Raises:
    169           OSError: [Errno 2] if the containing temp directory self._temp_dir
    170                              is not None and does not exist.
    171 
    172         """
    173         # The tempfile.mkdtemp() method function requires that the
    174         # directory corresponding to the "dir" parameter already exist
    175         # if it is not None.
    176         scratch_dir = tempfile.mkdtemp(prefix=prefix, dir=self._temp_dir)
    177         return scratch_dir
    178 
    179     def _create_scratch_directory(self, target_name):
    180         """Create a temporary scratch directory, and return its path.
    181 
    182         The scratch directory is generated inside the temp directory
    183         of this AutoInstaller instance.  This method also creates the
    184         temp directory if it does not already exist.
    185 
    186         """
    187         prefix = target_name + "_"
    188         try:
    189             scratch_dir = self._create_scratch_directory_inner(prefix)
    190         except OSError:
    191             # Handle case of containing temp directory not existing--
    192             # OSError: [Errno 2] No such file or directory:...
    193             temp_dir = self._temp_dir
    194             if temp_dir is None or os.path.exists(temp_dir):
    195                 raise
    196             # Else try again after creating the temp directory.
    197             self._create_directory(temp_dir, "autoinstall temp")
    198             scratch_dir = self._create_scratch_directory_inner(prefix)
    199 
    200         return scratch_dir
    201 
    202     def _url_downloaded_path(self, target_name):
    203         """Return the path to the file containing the URL downloaded."""
    204         filename = ".%s.url" % target_name
    205         path = os.path.join(self._target_dir, filename)
    206         return path
    207 
    208     def _is_downloaded(self, target_name, url):
    209         """Return whether a package version has been downloaded."""
    210         version_path = self._url_downloaded_path(target_name)
    211 
    212         _log.debug('Checking %s URL downloaded...' % target_name)
    213         _log.debug('    "%s"' % version_path)
    214 
    215         if not os.path.exists(version_path):
    216             # Then no package version has been downloaded.
    217             _log.debug("No URL file found.")
    218             return False
    219 
    220         with codecs.open(version_path, "r", "utf-8") as file:
    221             version = file.read()
    222 
    223         return version.strip() == url.strip()
    224 
    225     def _record_url_downloaded(self, target_name, url):
    226         """Record the URL downloaded to a file."""
    227         version_path = self._url_downloaded_path(target_name)
    228         _log.debug("Recording URL downloaded...")
    229         _log.debug('    URL: "%s"' % url)
    230         _log.debug('     To: "%s"' % version_path)
    231 
    232         self._write_file(version_path, url, "utf-8")
    233 
    234     def _extract_targz(self, path, scratch_dir):
    235         # tarfile.extractall() extracts to a path without the
    236         # trailing ".tar.gz".
    237         target_basename = os.path.basename(path[:-len(".tar.gz")])
    238         target_path = os.path.join(scratch_dir, target_basename)
    239 
    240         self._log_transfer("Starting gunzip/extract...", path, target_path)
    241 
    242         try:
    243             tar_file = tarfile.open(path)
    244         except tarfile.ReadError, err:
    245             # Append existing Error message to new Error.
    246             message = ("Could not open tar file: %s\n"
    247                        " The file probably does not have the correct format.\n"
    248                        " --> Inner message: %s"
    249                        % (path, err))
    250             raise Exception(message)
    251 
    252         try:
    253             # This is helpful for debugging purposes.
    254             _log.debug("Listing tar file contents...")
    255             for name in tar_file.getnames():
    256                 _log.debug('    * "%s"' % name)
    257             _log.debug("Extracting gzipped tar file...")
    258             tar_file.extractall(target_path)
    259         finally:
    260             tar_file.close()
    261 
    262         return target_path
    263 
    264     # This is a replacement for ZipFile.extractall(), which is
    265     # available in Python 2.6 but not in earlier versions.
    266     def _extract_all(self, zip_file, target_dir):
    267         self._log_transfer("Extracting zip file...", zip_file, target_dir)
    268 
    269         # This is helpful for debugging purposes.
    270         _log.debug("Listing zip file contents...")
    271         for name in zip_file.namelist():
    272             _log.debug('    * "%s"' % name)
    273 
    274         for name in zip_file.namelist():
    275             path = os.path.join(target_dir, name)
    276             self._log_transfer("Extracting...", name, path)
    277 
    278             if not os.path.basename(path):
    279                 # Then the path ends in a slash, so it is a directory.
    280                 self._create_directory(path)
    281                 continue
    282             # Otherwise, it is a file.
    283 
    284             try:
    285                 # We open this file w/o encoding, as we're reading/writing
    286                 # the raw byte-stream from the zip file.
    287                 outfile = open(path, 'wb')
    288             except IOError, err:
    289                 # Not all zip files seem to list the directories explicitly,
    290                 # so try again after creating the containing directory.
    291                 _log.debug("Got IOError: retrying after creating directory...")
    292                 dir = os.path.dirname(path)
    293                 self._create_directory(dir)
    294                 outfile = open(path, 'wb')
    295 
    296             try:
    297                 outfile.write(zip_file.read(name))
    298             finally:
    299                 outfile.close()
    300 
    301     def _unzip(self, path, scratch_dir):
    302         # zipfile.extractall() extracts to a path without the
    303         # trailing ".zip".
    304         target_basename = os.path.basename(path[:-len(".zip")])
    305         target_path = os.path.join(scratch_dir, target_basename)
    306 
    307         self._log_transfer("Starting unzip...", path, target_path)
    308 
    309         try:
    310             zip_file = zipfile.ZipFile(path, "r")
    311         except zipfile.BadZipfile, err:
    312             message = ("Could not open zip file: %s\n"
    313                        " --> Inner message: %s"
    314                        % (path, err))
    315             raise Exception(message)
    316 
    317         try:
    318             self._extract_all(zip_file, scratch_dir)
    319         finally:
    320             zip_file.close()
    321 
    322         return target_path
    323 
    324     def _prepare_package(self, path, scratch_dir):
    325         """Prepare a package for use, if necessary, and return the new path.
    326 
    327         For example, this method unzips zipped files and extracts
    328         tar files.
    329 
    330         Args:
    331           path: The path to the downloaded URL contents.
    332           scratch_dir: The scratch directory.  Note that the scratch
    333                        directory contains the file designated by the
    334                        path parameter.
    335 
    336         """
    337         # FIXME: Add other natural extensions.
    338         if path.endswith(".zip"):
    339             new_path = self._unzip(path, scratch_dir)
    340         elif path.endswith(".tar.gz"):
    341             new_path = self._extract_targz(path, scratch_dir)
    342         else:
    343             # No preparation is needed.
    344             new_path = path
    345 
    346         return new_path
    347 
    348     def _download_to_stream(self, url, stream):
    349         """Download an URL to a stream, and return the number of bytes."""
    350         try:
    351             netstream = urllib.urlopen(url)
    352         except IOError, err:
    353             # Append existing Error message to new Error.
    354             message = ('Could not download Python modules from URL "%s".\n'
    355                        " Make sure you are connected to the internet.\n"
    356                        " You must be connected to the internet when "
    357                        "downloading needed modules for the first time.\n"
    358                        " --> Inner message: %s"
    359                        % (url, err))
    360             raise IOError(message)
    361         code = 200
    362         if hasattr(netstream, "getcode"):
    363             code = netstream.getcode()
    364         if not 200 <= code < 300:
    365             raise ValueError("HTTP Error code %s" % code)
    366 
    367         BUFSIZE = 2**13  # 8KB
    368         bytes = 0
    369         while True:
    370             data = netstream.read(BUFSIZE)
    371             if not data:
    372                 break
    373             stream.write(data)
    374             bytes += len(data)
    375         netstream.close()
    376         return bytes
    377 
    378     def _download(self, url, scratch_dir):
    379         """Download URL contents, and return the download path."""
    380         url_path = urlparse.urlsplit(url)[2]
    381         url_path = os.path.normpath(url_path)  # Removes trailing slash.
    382         target_filename = os.path.basename(url_path)
    383         target_path = os.path.join(scratch_dir, target_filename)
    384 
    385         self._log_transfer("Starting download...", url, target_path)
    386 
    387         with open(target_path, "wb") as stream:
    388             bytes = self._download_to_stream(url, stream)
    389 
    390         _log.debug("Downloaded %s bytes." % bytes)
    391 
    392         return target_path
    393 
    394     def _install(self, scratch_dir, package_name, target_path, url,
    395                  url_subpath):
    396         """Install a python package from an URL.
    397 
    398         This internal method overwrites the target path if the target
    399         path already exists.
    400 
    401         """
    402         path = self._download(url=url, scratch_dir=scratch_dir)
    403         path = self._prepare_package(path, scratch_dir)
    404 
    405         if url_subpath is None:
    406             source_path = path
    407         else:
    408             source_path = os.path.join(path, url_subpath)
    409 
    410         if os.path.exists(target_path):
    411             _log.debug('Refreshing install: deleting "%s".' % target_path)
    412             if os.path.isdir(target_path):
    413                 shutil.rmtree(target_path)
    414             else:
    415                 os.remove(target_path)
    416 
    417         self._log_transfer("Moving files into place...", source_path, target_path)
    418 
    419         # The shutil.move() command creates intermediate directories if they
    420         # do not exist, but we do not rely on this behavior since we
    421         # need to create the __init__.py file anyway.
    422         shutil.move(source_path, target_path)
    423 
    424         self._record_url_downloaded(package_name, url)
    425 
    426     def install(self, url, should_refresh=False, target_name=None,
    427                 url_subpath=None):
    428         """Install a python package from an URL.
    429 
    430         Args:
    431           url: The URL from which to download the package.
    432 
    433         Optional Args:
    434           should_refresh: A boolean value of whether the package should be
    435                           downloaded again if the package is already present.
    436           target_name: The name of the folder or file in the autoinstaller
    437                        target directory at which the package should be
    438                        installed.  Defaults to the base name of the
    439                        URL sub-path.  This parameter must be provided if
    440                        the URL sub-path is not specified.
    441           url_subpath: The relative path of the URL directory that should
    442                        be installed.  Defaults to the full directory, or
    443                        the entire URL contents.
    444 
    445         """
    446         if target_name is None:
    447             if not url_subpath:
    448                 raise ValueError('The "target_name" parameter must be '
    449                                  'provided if the "url_subpath" parameter '
    450                                  "is not provided.")
    451             # Remove any trailing slashes.
    452             url_subpath = os.path.normpath(url_subpath)
    453             target_name = os.path.basename(url_subpath)
    454 
    455         target_path = os.path.join(self._target_dir, target_name)
    456         if not should_refresh and self._is_downloaded(target_name, url):
    457             _log.debug('URL for %s already downloaded.  Skipping...'
    458                        % target_name)
    459             _log.debug('    "%s"' % url)
    460             return
    461 
    462         self._log_transfer("Auto-installing package: %s" % target_name,
    463                             url, target_path, log_method=_log.info)
    464 
    465         # The scratch directory is where we will download and prepare
    466         # files specific to this install until they are ready to move
    467         # into place.
    468         scratch_dir = self._create_scratch_directory(target_name)
    469 
    470         try:
    471             self._install(package_name=target_name,
    472                           target_path=target_path,
    473                           scratch_dir=scratch_dir,
    474                           url=url,
    475                           url_subpath=url_subpath)
    476         except Exception, err:
    477             # Append existing Error message to new Error.
    478             message = ("Error auto-installing the %s package to:\n"
    479                        ' "%s"\n'
    480                        " --> Inner message: %s"
    481                        % (target_name, target_path, err))
    482             raise Exception(message)
    483         finally:
    484             _log.debug('Cleaning up: deleting "%s".' % scratch_dir)
    485             shutil.rmtree(scratch_dir)
    486         _log.debug('Auto-installed %s to:' % target_name)
    487         _log.debug('    "%s"' % target_path)
    488 
    489 
    490 if __name__=="__main__":
    491 
    492     # Configure the autoinstall logger to log DEBUG messages for
    493     # development testing purposes.
    494     console = logging.StreamHandler()
    495 
    496     formatter = logging.Formatter('%(name)s: %(levelname)-8s %(message)s')
    497     console.setFormatter(formatter)
    498     _log.addHandler(console)
    499     _log.setLevel(logging.DEBUG)
    500 
    501     # Use a more visible temp directory for debug purposes.
    502     this_dir = os.path.dirname(__file__)
    503     target_dir = os.path.join(this_dir, "autoinstalled")
    504     temp_dir = os.path.join(target_dir, "Temp")
    505 
    506     installer = AutoInstaller(target_dir=target_dir,
    507                               temp_dir=temp_dir)
    508 
    509     installer.install(should_refresh=False,
    510                       target_name="pep8.py",
    511                       url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
    512                       url_subpath="pep8-0.5.0/pep8.py")
    513     installer.install(should_refresh=False,
    514                       target_name="mechanize",
    515                       url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip",
    516                       url_subpath="mechanize")
    517 
    518