1 # Copyright (c) 2009, Daniel Krech All rights reserved. 2 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org) 3 # 4 # Redistribution and use in source and binary forms, with or without 5 # modification, are permitted provided that the following conditions are 6 # met: 7 # 8 # * Redistributions of source code must retain the above copyright 9 # notice, this list of conditions and the following disclaimer. 10 # 11 # * Redistributions in binary form must reproduce the above copyright 12 # notice, this list of conditions and the following disclaimer in the 13 # documentation and/or other materials provided with the distribution. 14 # 15 # * Neither the name of the Daniel Krech nor the names of its 16 # contributors may be used to endorse or promote products derived from 17 # this software without specific prior written permission. 18 # 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 """Support for automatically downloading Python packages from an URL.""" 32 33 34 from __future__ import with_statement 35 36 import codecs 37 import logging 38 import new 39 import os 40 import shutil 41 import sys 42 import tarfile 43 import tempfile 44 import urllib 45 import urlparse 46 import zipfile 47 import zipimport 48 49 _log = logging.getLogger(__name__) 50 51 52 class AutoInstaller(object): 53 54 """Supports automatically installing Python packages from an URL. 55 56 Supports uncompressed files, .tar.gz, and .zip formats. 57 58 Basic usage: 59 60 installer = AutoInstaller() 61 62 installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b", 63 url_subpath="pep8-0.5.0/pep8.py") 64 installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip", 65 url_subpath="mechanize") 66 67 """ 68 69 def __init__(self, append_to_search_path=False, make_package=True, 70 target_dir=None, temp_dir=None): 71 """Create an AutoInstaller instance, and set up the target directory. 72 73 Args: 74 append_to_search_path: A boolean value of whether to append the 75 target directory to the sys.path search path. 76 make_package: A boolean value of whether to make the target 77 directory a package. This adds an __init__.py file 78 to the target directory -- allowing packages and 79 modules within the target directory to be imported 80 explicitly using dotted module names. 81 target_dir: The directory path to which packages should be installed. 82 Defaults to a subdirectory of the folder containing 83 this module called "autoinstalled". 84 temp_dir: The directory path to use for any temporary files 85 generated while downloading, unzipping, and extracting 86 packages to install. Defaults to a standard temporary 87 location generated by the tempfile module. This 88 parameter should normally be used only for development 89 testing. 90 91 """ 92 if target_dir is None: 93 this_dir = os.path.dirname(__file__) 94 target_dir = os.path.join(this_dir, "autoinstalled") 95 96 # Ensure that the target directory exists. 97 self._set_up_target_dir(target_dir, append_to_search_path, make_package) 98 99 self._target_dir = target_dir 100 self._temp_dir = temp_dir 101 102 def _log_transfer(self, message, source, target, log_method=None): 103 """Log a debug message that involves a source and target.""" 104 if log_method is None: 105 log_method = _log.debug 106 107 log_method("%s" % message) 108 log_method(' From: "%s"' % source) 109 log_method(' To: "%s"' % target) 110 111 def _create_directory(self, path, name=None): 112 """Create a directory.""" 113 log = _log.debug 114 115 name = name + " " if name is not None else "" 116 log('Creating %sdirectory...' % name) 117 log(' "%s"' % path) 118 119 os.makedirs(path) 120 121 def _write_file(self, path, text, encoding): 122 """Create a file at the given path with given text. 123 124 This method overwrites any existing file. 125 126 """ 127 _log.debug("Creating file...") 128 _log.debug(' "%s"' % path) 129 with codecs.open(path, "w", encoding) as file: 130 file.write(text) 131 132 def _set_up_target_dir(self, target_dir, append_to_search_path, 133 make_package): 134 """Set up a target directory. 135 136 Args: 137 target_dir: The path to the target directory to set up. 138 append_to_search_path: A boolean value of whether to append the 139 target directory to the sys.path search path. 140 make_package: A boolean value of whether to make the target 141 directory a package. This adds an __init__.py file 142 to the target directory -- allowing packages and 143 modules within the target directory to be imported 144 explicitly using dotted module names. 145 146 """ 147 if not os.path.exists(target_dir): 148 self._create_directory(target_dir, "autoinstall target") 149 150 if append_to_search_path: 151 sys.path.append(target_dir) 152 153 if make_package: 154 init_path = os.path.join(target_dir, "__init__.py") 155 if not os.path.exists(init_path): 156 text = ("# This file is required for Python to search this " 157 "directory for modules.\n") 158 self._write_file(init_path, text, "ascii") 159 160 def _create_scratch_directory_inner(self, prefix): 161 """Create a scratch directory without exception handling. 162 163 Creates a scratch directory inside the AutoInstaller temp 164 directory self._temp_dir, or inside a platform-dependent temp 165 directory if self._temp_dir is None. Returns the path to the 166 created scratch directory. 167 168 Raises: 169 OSError: [Errno 2] if the containing temp directory self._temp_dir 170 is not None and does not exist. 171 172 """ 173 # The tempfile.mkdtemp() method function requires that the 174 # directory corresponding to the "dir" parameter already exist 175 # if it is not None. 176 scratch_dir = tempfile.mkdtemp(prefix=prefix, dir=self._temp_dir) 177 return scratch_dir 178 179 def _create_scratch_directory(self, target_name): 180 """Create a temporary scratch directory, and return its path. 181 182 The scratch directory is generated inside the temp directory 183 of this AutoInstaller instance. This method also creates the 184 temp directory if it does not already exist. 185 186 """ 187 prefix = target_name + "_" 188 try: 189 scratch_dir = self._create_scratch_directory_inner(prefix) 190 except OSError: 191 # Handle case of containing temp directory not existing-- 192 # OSError: [Errno 2] No such file or directory:... 193 temp_dir = self._temp_dir 194 if temp_dir is None or os.path.exists(temp_dir): 195 raise 196 # Else try again after creating the temp directory. 197 self._create_directory(temp_dir, "autoinstall temp") 198 scratch_dir = self._create_scratch_directory_inner(prefix) 199 200 return scratch_dir 201 202 def _url_downloaded_path(self, target_name): 203 """Return the path to the file containing the URL downloaded.""" 204 filename = ".%s.url" % target_name 205 path = os.path.join(self._target_dir, filename) 206 return path 207 208 def _is_downloaded(self, target_name, url): 209 """Return whether a package version has been downloaded.""" 210 version_path = self._url_downloaded_path(target_name) 211 212 _log.debug('Checking %s URL downloaded...' % target_name) 213 _log.debug(' "%s"' % version_path) 214 215 if not os.path.exists(version_path): 216 # Then no package version has been downloaded. 217 _log.debug("No URL file found.") 218 return False 219 220 with codecs.open(version_path, "r", "utf-8") as file: 221 version = file.read() 222 223 return version.strip() == url.strip() 224 225 def _record_url_downloaded(self, target_name, url): 226 """Record the URL downloaded to a file.""" 227 version_path = self._url_downloaded_path(target_name) 228 _log.debug("Recording URL downloaded...") 229 _log.debug(' URL: "%s"' % url) 230 _log.debug(' To: "%s"' % version_path) 231 232 self._write_file(version_path, url, "utf-8") 233 234 def _extract_targz(self, path, scratch_dir): 235 # tarfile.extractall() extracts to a path without the 236 # trailing ".tar.gz". 237 target_basename = os.path.basename(path[:-len(".tar.gz")]) 238 target_path = os.path.join(scratch_dir, target_basename) 239 240 self._log_transfer("Starting gunzip/extract...", path, target_path) 241 242 try: 243 tar_file = tarfile.open(path) 244 except tarfile.ReadError, err: 245 # Append existing Error message to new Error. 246 message = ("Could not open tar file: %s\n" 247 " The file probably does not have the correct format.\n" 248 " --> Inner message: %s" 249 % (path, err)) 250 raise Exception(message) 251 252 try: 253 # This is helpful for debugging purposes. 254 _log.debug("Listing tar file contents...") 255 for name in tar_file.getnames(): 256 _log.debug(' * "%s"' % name) 257 _log.debug("Extracting gzipped tar file...") 258 tar_file.extractall(target_path) 259 finally: 260 tar_file.close() 261 262 return target_path 263 264 # This is a replacement for ZipFile.extractall(), which is 265 # available in Python 2.6 but not in earlier versions. 266 def _extract_all(self, zip_file, target_dir): 267 self._log_transfer("Extracting zip file...", zip_file, target_dir) 268 269 # This is helpful for debugging purposes. 270 _log.debug("Listing zip file contents...") 271 for name in zip_file.namelist(): 272 _log.debug(' * "%s"' % name) 273 274 for name in zip_file.namelist(): 275 path = os.path.join(target_dir, name) 276 self._log_transfer("Extracting...", name, path) 277 278 if not os.path.basename(path): 279 # Then the path ends in a slash, so it is a directory. 280 self._create_directory(path) 281 continue 282 # Otherwise, it is a file. 283 284 try: 285 # We open this file w/o encoding, as we're reading/writing 286 # the raw byte-stream from the zip file. 287 outfile = open(path, 'wb') 288 except IOError, err: 289 # Not all zip files seem to list the directories explicitly, 290 # so try again after creating the containing directory. 291 _log.debug("Got IOError: retrying after creating directory...") 292 dir = os.path.dirname(path) 293 self._create_directory(dir) 294 outfile = open(path, 'wb') 295 296 try: 297 outfile.write(zip_file.read(name)) 298 finally: 299 outfile.close() 300 301 def _unzip(self, path, scratch_dir): 302 # zipfile.extractall() extracts to a path without the 303 # trailing ".zip". 304 target_basename = os.path.basename(path[:-len(".zip")]) 305 target_path = os.path.join(scratch_dir, target_basename) 306 307 self._log_transfer("Starting unzip...", path, target_path) 308 309 try: 310 zip_file = zipfile.ZipFile(path, "r") 311 except zipfile.BadZipfile, err: 312 message = ("Could not open zip file: %s\n" 313 " --> Inner message: %s" 314 % (path, err)) 315 raise Exception(message) 316 317 try: 318 self._extract_all(zip_file, scratch_dir) 319 finally: 320 zip_file.close() 321 322 return target_path 323 324 def _prepare_package(self, path, scratch_dir): 325 """Prepare a package for use, if necessary, and return the new path. 326 327 For example, this method unzips zipped files and extracts 328 tar files. 329 330 Args: 331 path: The path to the downloaded URL contents. 332 scratch_dir: The scratch directory. Note that the scratch 333 directory contains the file designated by the 334 path parameter. 335 336 """ 337 # FIXME: Add other natural extensions. 338 if path.endswith(".zip"): 339 new_path = self._unzip(path, scratch_dir) 340 elif path.endswith(".tar.gz"): 341 new_path = self._extract_targz(path, scratch_dir) 342 else: 343 # No preparation is needed. 344 new_path = path 345 346 return new_path 347 348 def _download_to_stream(self, url, stream): 349 """Download an URL to a stream, and return the number of bytes.""" 350 try: 351 netstream = urllib.urlopen(url) 352 except IOError, err: 353 # Append existing Error message to new Error. 354 message = ('Could not download Python modules from URL "%s".\n' 355 " Make sure you are connected to the internet.\n" 356 " You must be connected to the internet when " 357 "downloading needed modules for the first time.\n" 358 " --> Inner message: %s" 359 % (url, err)) 360 raise IOError(message) 361 code = 200 362 if hasattr(netstream, "getcode"): 363 code = netstream.getcode() 364 if not 200 <= code < 300: 365 raise ValueError("HTTP Error code %s" % code) 366 367 BUFSIZE = 2**13 # 8KB 368 bytes = 0 369 while True: 370 data = netstream.read(BUFSIZE) 371 if not data: 372 break 373 stream.write(data) 374 bytes += len(data) 375 netstream.close() 376 return bytes 377 378 def _download(self, url, scratch_dir): 379 """Download URL contents, and return the download path.""" 380 url_path = urlparse.urlsplit(url)[2] 381 url_path = os.path.normpath(url_path) # Removes trailing slash. 382 target_filename = os.path.basename(url_path) 383 target_path = os.path.join(scratch_dir, target_filename) 384 385 self._log_transfer("Starting download...", url, target_path) 386 387 with open(target_path, "wb") as stream: 388 bytes = self._download_to_stream(url, stream) 389 390 _log.debug("Downloaded %s bytes." % bytes) 391 392 return target_path 393 394 def _install(self, scratch_dir, package_name, target_path, url, 395 url_subpath): 396 """Install a python package from an URL. 397 398 This internal method overwrites the target path if the target 399 path already exists. 400 401 """ 402 path = self._download(url=url, scratch_dir=scratch_dir) 403 path = self._prepare_package(path, scratch_dir) 404 405 if url_subpath is None: 406 source_path = path 407 else: 408 source_path = os.path.join(path, url_subpath) 409 410 if os.path.exists(target_path): 411 _log.debug('Refreshing install: deleting "%s".' % target_path) 412 if os.path.isdir(target_path): 413 shutil.rmtree(target_path) 414 else: 415 os.remove(target_path) 416 417 self._log_transfer("Moving files into place...", source_path, target_path) 418 419 # The shutil.move() command creates intermediate directories if they 420 # do not exist, but we do not rely on this behavior since we 421 # need to create the __init__.py file anyway. 422 shutil.move(source_path, target_path) 423 424 self._record_url_downloaded(package_name, url) 425 426 def install(self, url, should_refresh=False, target_name=None, 427 url_subpath=None): 428 """Install a python package from an URL. 429 430 Args: 431 url: The URL from which to download the package. 432 433 Optional Args: 434 should_refresh: A boolean value of whether the package should be 435 downloaded again if the package is already present. 436 target_name: The name of the folder or file in the autoinstaller 437 target directory at which the package should be 438 installed. Defaults to the base name of the 439 URL sub-path. This parameter must be provided if 440 the URL sub-path is not specified. 441 url_subpath: The relative path of the URL directory that should 442 be installed. Defaults to the full directory, or 443 the entire URL contents. 444 445 """ 446 if target_name is None: 447 if not url_subpath: 448 raise ValueError('The "target_name" parameter must be ' 449 'provided if the "url_subpath" parameter ' 450 "is not provided.") 451 # Remove any trailing slashes. 452 url_subpath = os.path.normpath(url_subpath) 453 target_name = os.path.basename(url_subpath) 454 455 target_path = os.path.join(self._target_dir, target_name) 456 if not should_refresh and self._is_downloaded(target_name, url): 457 _log.debug('URL for %s already downloaded. Skipping...' 458 % target_name) 459 _log.debug(' "%s"' % url) 460 return 461 462 self._log_transfer("Auto-installing package: %s" % target_name, 463 url, target_path, log_method=_log.info) 464 465 # The scratch directory is where we will download and prepare 466 # files specific to this install until they are ready to move 467 # into place. 468 scratch_dir = self._create_scratch_directory(target_name) 469 470 try: 471 self._install(package_name=target_name, 472 target_path=target_path, 473 scratch_dir=scratch_dir, 474 url=url, 475 url_subpath=url_subpath) 476 except Exception, err: 477 # Append existing Error message to new Error. 478 message = ("Error auto-installing the %s package to:\n" 479 ' "%s"\n' 480 " --> Inner message: %s" 481 % (target_name, target_path, err)) 482 raise Exception(message) 483 finally: 484 _log.debug('Cleaning up: deleting "%s".' % scratch_dir) 485 shutil.rmtree(scratch_dir) 486 _log.debug('Auto-installed %s to:' % target_name) 487 _log.debug(' "%s"' % target_path) 488 489 490 if __name__=="__main__": 491 492 # Configure the autoinstall logger to log DEBUG messages for 493 # development testing purposes. 494 console = logging.StreamHandler() 495 496 formatter = logging.Formatter('%(name)s: %(levelname)-8s %(message)s') 497 console.setFormatter(formatter) 498 _log.addHandler(console) 499 _log.setLevel(logging.DEBUG) 500 501 # Use a more visible temp directory for debug purposes. 502 this_dir = os.path.dirname(__file__) 503 target_dir = os.path.join(this_dir, "autoinstalled") 504 temp_dir = os.path.join(target_dir, "Temp") 505 506 installer = AutoInstaller(target_dir=target_dir, 507 temp_dir=temp_dir) 508 509 installer.install(should_refresh=False, 510 target_name="pep8.py", 511 url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b", 512 url_subpath="pep8-0.5.0/pep8.py") 513 installer.install(should_refresh=False, 514 target_name="mechanize", 515 url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip", 516 url_subpath="mechanize") 517 518