Home | History | Annotate | Download | only in Lib
      1 """Utility functions for copying and archiving files and directory trees.
      2 
      3 XXX The functions here don't copy the resource fork or other metadata on Mac.
      4 
      5 """
      6 
      7 import os
      8 import sys
      9 import stat
     10 import fnmatch
     11 import collections
     12 import errno
     13 
     14 try:
     15     import zlib
     16     del zlib
     17     _ZLIB_SUPPORTED = True
     18 except ImportError:
     19     _ZLIB_SUPPORTED = False
     20 
     21 try:
     22     import bz2
     23     del bz2
     24     _BZ2_SUPPORTED = True
     25 except ImportError:
     26     _BZ2_SUPPORTED = False
     27 
     28 try:
     29     import lzma
     30     del lzma
     31     _LZMA_SUPPORTED = True
     32 except ImportError:
     33     _LZMA_SUPPORTED = False
     34 
     35 try:
     36     from pwd import getpwnam
     37 except ImportError:
     38     getpwnam = None
     39 
     40 try:
     41     from grp import getgrnam
     42 except ImportError:
     43     getgrnam = None
     44 
     45 __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
     46            "copytree", "move", "rmtree", "Error", "SpecialFileError",
     47            "ExecError", "make_archive", "get_archive_formats",
     48            "register_archive_format", "unregister_archive_format",
     49            "get_unpack_formats", "register_unpack_format",
     50            "unregister_unpack_format", "unpack_archive",
     51            "ignore_patterns", "chown", "which", "get_terminal_size",
     52            "SameFileError"]
     53            # disk_usage is added later, if available on the platform
     54 
     55 class Error(OSError):
     56     pass
     57 
     58 class SameFileError(Error):
     59     """Raised when source and destination are the same file."""
     60 
     61 class SpecialFileError(OSError):
     62     """Raised when trying to do a kind of operation (e.g. copying) which is
     63     not supported on a special file (e.g. a named pipe)"""
     64 
     65 class ExecError(OSError):
     66     """Raised when a command could not be executed"""
     67 
     68 class ReadError(OSError):
     69     """Raised when an archive cannot be read"""
     70 
     71 class RegistryError(Exception):
     72     """Raised when a registry operation with the archiving
     73     and unpacking registries fails"""
     74 
     75 
     76 def copyfileobj(fsrc, fdst, length=16*1024):
     77     """copy data from file-like object fsrc to file-like object fdst"""
     78     while 1:
     79         buf = fsrc.read(length)
     80         if not buf:
     81             break
     82         fdst.write(buf)
     83 
     84 def _samefile(src, dst):
     85     # Macintosh, Unix.
     86     if hasattr(os.path, 'samefile'):
     87         try:
     88             return os.path.samefile(src, dst)
     89         except OSError:
     90             return False
     91 
     92     # All other platforms: check for same pathname.
     93     return (os.path.normcase(os.path.abspath(src)) ==
     94             os.path.normcase(os.path.abspath(dst)))
     95 
     96 def copyfile(src, dst, *, follow_symlinks=True):
     97     """Copy data from src to dst.
     98 
     99     If follow_symlinks is not set and src is a symbolic link, a new
    100     symlink will be created instead of copying the file it points to.
    101 
    102     """
    103     if _samefile(src, dst):
    104         raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
    105 
    106     for fn in [src, dst]:
    107         try:
    108             st = os.stat(fn)
    109         except OSError:
    110             # File most likely does not exist
    111             pass
    112         else:
    113             # XXX What about other special files? (sockets, devices...)
    114             if stat.S_ISFIFO(st.st_mode):
    115                 raise SpecialFileError("`%s` is a named pipe" % fn)
    116 
    117     if not follow_symlinks and os.path.islink(src):
    118         os.symlink(os.readlink(src), dst)
    119     else:
    120         with open(src, 'rb') as fsrc:
    121             with open(dst, 'wb') as fdst:
    122                 copyfileobj(fsrc, fdst)
    123     return dst
    124 
    125 def copymode(src, dst, *, follow_symlinks=True):
    126     """Copy mode bits from src to dst.
    127 
    128     If follow_symlinks is not set, symlinks aren't followed if and only
    129     if both `src` and `dst` are symlinks.  If `lchmod` isn't available
    130     (e.g. Linux) this method does nothing.
    131 
    132     """
    133     if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
    134         if hasattr(os, 'lchmod'):
    135             stat_func, chmod_func = os.lstat, os.lchmod
    136         else:
    137             return
    138     elif hasattr(os, 'chmod'):
    139         stat_func, chmod_func = os.stat, os.chmod
    140     else:
    141         return
    142 
    143     st = stat_func(src)
    144     chmod_func(dst, stat.S_IMODE(st.st_mode))
    145 
    146 if hasattr(os, 'listxattr'):
    147     def _copyxattr(src, dst, *, follow_symlinks=True):
    148         """Copy extended filesystem attributes from `src` to `dst`.
    149 
    150         Overwrite existing attributes.
    151 
    152         If `follow_symlinks` is false, symlinks won't be followed.
    153 
    154         """
    155 
    156         try:
    157             names = os.listxattr(src, follow_symlinks=follow_symlinks)
    158         except OSError as e:
    159             if e.errno not in (errno.ENOTSUP, errno.ENODATA):
    160                 raise
    161             return
    162         for name in names:
    163             try:
    164                 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
    165                 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
    166             except OSError as e:
    167                 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
    168                     raise
    169 else:
    170     def _copyxattr(*args, **kwargs):
    171         pass
    172 
    173 def copystat(src, dst, *, follow_symlinks=True):
    174     """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
    175 
    176     If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
    177     only if both `src` and `dst` are symlinks.
    178 
    179     """
    180     def _nop(*args, ns=None, follow_symlinks=None):
    181         pass
    182 
    183     # follow symlinks (aka don't not follow symlinks)
    184     follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
    185     if follow:
    186         # use the real function if it exists
    187         def lookup(name):
    188             return getattr(os, name, _nop)
    189     else:
    190         # use the real function only if it exists
    191         # *and* it supports follow_symlinks
    192         def lookup(name):
    193             fn = getattr(os, name, _nop)
    194             if fn in os.supports_follow_symlinks:
    195                 return fn
    196             return _nop
    197 
    198     st = lookup("stat")(src, follow_symlinks=follow)
    199     mode = stat.S_IMODE(st.st_mode)
    200     lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
    201         follow_symlinks=follow)
    202     try:
    203         lookup("chmod")(dst, mode, follow_symlinks=follow)
    204     except NotImplementedError:
    205         # if we got a NotImplementedError, it's because
    206         #   * follow_symlinks=False,
    207         #   * lchown() is unavailable, and
    208         #   * either
    209         #       * fchownat() is unavailable or
    210         #       * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
    211         #         (it returned ENOSUP.)
    212         # therefore we're out of options--we simply cannot chown the
    213         # symlink.  give up, suppress the error.
    214         # (which is what shutil always did in this circumstance.)
    215         pass
    216     if hasattr(st, 'st_flags'):
    217         try:
    218             lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
    219         except OSError as why:
    220             for err in 'EOPNOTSUPP', 'ENOTSUP':
    221                 if hasattr(errno, err) and why.errno == getattr(errno, err):
    222                     break
    223             else:
    224                 raise
    225     _copyxattr(src, dst, follow_symlinks=follow)
    226 
    227 def copy(src, dst, *, follow_symlinks=True):
    228     """Copy data and mode bits ("cp src dst"). Return the file's destination.
    229 
    230     The destination may be a directory.
    231 
    232     If follow_symlinks is false, symlinks won't be followed. This
    233     resembles GNU's "cp -P src dst".
    234 
    235     If source and destination are the same file, a SameFileError will be
    236     raised.
    237 
    238     """
    239     if os.path.isdir(dst):
    240         dst = os.path.join(dst, os.path.basename(src))
    241     copyfile(src, dst, follow_symlinks=follow_symlinks)
    242     copymode(src, dst, follow_symlinks=follow_symlinks)
    243     return dst
    244 
    245 def copy2(src, dst, *, follow_symlinks=True):
    246     """Copy data and all stat info ("cp -p src dst"). Return the file's
    247     destination."
    248 
    249     The destination may be a directory.
    250 
    251     If follow_symlinks is false, symlinks won't be followed. This
    252     resembles GNU's "cp -P src dst".
    253 
    254     """
    255     if os.path.isdir(dst):
    256         dst = os.path.join(dst, os.path.basename(src))
    257     copyfile(src, dst, follow_symlinks=follow_symlinks)
    258     copystat(src, dst, follow_symlinks=follow_symlinks)
    259     return dst
    260 
    261 def ignore_patterns(*patterns):
    262     """Function that can be used as copytree() ignore parameter.
    263 
    264     Patterns is a sequence of glob-style patterns
    265     that are used to exclude files"""
    266     def _ignore_patterns(path, names):
    267         ignored_names = []
    268         for pattern in patterns:
    269             ignored_names.extend(fnmatch.filter(names, pattern))
    270         return set(ignored_names)
    271     return _ignore_patterns
    272 
    273 def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
    274              ignore_dangling_symlinks=False):
    275     """Recursively copy a directory tree.
    276 
    277     The destination directory must not already exist.
    278     If exception(s) occur, an Error is raised with a list of reasons.
    279 
    280     If the optional symlinks flag is true, symbolic links in the
    281     source tree result in symbolic links in the destination tree; if
    282     it is false, the contents of the files pointed to by symbolic
    283     links are copied. If the file pointed by the symlink doesn't
    284     exist, an exception will be added in the list of errors raised in
    285     an Error exception at the end of the copy process.
    286 
    287     You can set the optional ignore_dangling_symlinks flag to true if you
    288     want to silence this exception. Notice that this has no effect on
    289     platforms that don't support os.symlink.
    290 
    291     The optional ignore argument is a callable. If given, it
    292     is called with the `src` parameter, which is the directory
    293     being visited by copytree(), and `names` which is the list of
    294     `src` contents, as returned by os.listdir():
    295 
    296         callable(src, names) -> ignored_names
    297 
    298     Since copytree() is called recursively, the callable will be
    299     called once for each directory that is copied. It returns a
    300     list of names relative to the `src` directory that should
    301     not be copied.
    302 
    303     The optional copy_function argument is a callable that will be used
    304     to copy each file. It will be called with the source path and the
    305     destination path as arguments. By default, copy2() is used, but any
    306     function that supports the same signature (like copy()) can be used.
    307 
    308     """
    309     names = os.listdir(src)
    310     if ignore is not None:
    311         ignored_names = ignore(src, names)
    312     else:
    313         ignored_names = set()
    314 
    315     os.makedirs(dst)
    316     errors = []
    317     for name in names:
    318         if name in ignored_names:
    319             continue
    320         srcname = os.path.join(src, name)
    321         dstname = os.path.join(dst, name)
    322         try:
    323             if os.path.islink(srcname):
    324                 linkto = os.readlink(srcname)
    325                 if symlinks:
    326                     # We can't just leave it to `copy_function` because legacy
    327                     # code with a custom `copy_function` may rely on copytree
    328                     # doing the right thing.
    329                     os.symlink(linkto, dstname)
    330                     copystat(srcname, dstname, follow_symlinks=not symlinks)
    331                 else:
    332                     # ignore dangling symlink if the flag is on
    333                     if not os.path.exists(linkto) and ignore_dangling_symlinks:
    334                         continue
    335                     # otherwise let the copy occurs. copy2 will raise an error
    336                     if os.path.isdir(srcname):
    337                         copytree(srcname, dstname, symlinks, ignore,
    338                                  copy_function)
    339                     else:
    340                         copy_function(srcname, dstname)
    341             elif os.path.isdir(srcname):
    342                 copytree(srcname, dstname, symlinks, ignore, copy_function)
    343             else:
    344                 # Will raise a SpecialFileError for unsupported file types
    345                 copy_function(srcname, dstname)
    346         # catch the Error from the recursive copytree so that we can
    347         # continue with other files
    348         except Error as err:
    349             errors.extend(err.args[0])
    350         except OSError as why:
    351             errors.append((srcname, dstname, str(why)))
    352     try:
    353         copystat(src, dst)
    354     except OSError as why:
    355         # Copying file access times may fail on Windows
    356         if getattr(why, 'winerror', None) is None:
    357             errors.append((src, dst, str(why)))
    358     if errors:
    359         raise Error(errors)
    360     return dst
    361 
    362 # version vulnerable to race conditions
    363 def _rmtree_unsafe(path, onerror):
    364     try:
    365         if os.path.islink(path):
    366             # symlinks to directories are forbidden, see bug #1669
    367             raise OSError("Cannot call rmtree on a symbolic link")
    368     except OSError:
    369         onerror(os.path.islink, path, sys.exc_info())
    370         # can't continue even if onerror hook returns
    371         return
    372     names = []
    373     try:
    374         names = os.listdir(path)
    375     except OSError:
    376         onerror(os.listdir, path, sys.exc_info())
    377     for name in names:
    378         fullname = os.path.join(path, name)
    379         try:
    380             mode = os.lstat(fullname).st_mode
    381         except OSError:
    382             mode = 0
    383         if stat.S_ISDIR(mode):
    384             _rmtree_unsafe(fullname, onerror)
    385         else:
    386             try:
    387                 os.unlink(fullname)
    388             except OSError:
    389                 onerror(os.unlink, fullname, sys.exc_info())
    390     try:
    391         os.rmdir(path)
    392     except OSError:
    393         onerror(os.rmdir, path, sys.exc_info())
    394 
    395 # Version using fd-based APIs to protect against races
    396 def _rmtree_safe_fd(topfd, path, onerror):
    397     names = []
    398     try:
    399         names = os.listdir(topfd)
    400     except OSError as err:
    401         err.filename = path
    402         onerror(os.listdir, path, sys.exc_info())
    403     for name in names:
    404         fullname = os.path.join(path, name)
    405         try:
    406             orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
    407             mode = orig_st.st_mode
    408         except OSError:
    409             mode = 0
    410         if stat.S_ISDIR(mode):
    411             try:
    412                 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
    413             except OSError:
    414                 onerror(os.open, fullname, sys.exc_info())
    415             else:
    416                 try:
    417                     if os.path.samestat(orig_st, os.fstat(dirfd)):
    418                         _rmtree_safe_fd(dirfd, fullname, onerror)
    419                         try:
    420                             os.rmdir(name, dir_fd=topfd)
    421                         except OSError:
    422                             onerror(os.rmdir, fullname, sys.exc_info())
    423                     else:
    424                         try:
    425                             # This can only happen if someone replaces
    426                             # a directory with a symlink after the call to
    427                             # stat.S_ISDIR above.
    428                             raise OSError("Cannot call rmtree on a symbolic "
    429                                           "link")
    430                         except OSError:
    431                             onerror(os.path.islink, fullname, sys.exc_info())
    432                 finally:
    433                     os.close(dirfd)
    434         else:
    435             try:
    436                 os.unlink(name, dir_fd=topfd)
    437             except OSError:
    438                 onerror(os.unlink, fullname, sys.exc_info())
    439 
    440 _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
    441                      os.supports_dir_fd and
    442                      os.listdir in os.supports_fd and
    443                      os.stat in os.supports_follow_symlinks)
    444 
    445 def rmtree(path, ignore_errors=False, onerror=None):
    446     """Recursively delete a directory tree.
    447 
    448     If ignore_errors is set, errors are ignored; otherwise, if onerror
    449     is set, it is called to handle the error with arguments (func,
    450     path, exc_info) where func is platform and implementation dependent;
    451     path is the argument to that function that caused it to fail; and
    452     exc_info is a tuple returned by sys.exc_info().  If ignore_errors
    453     is false and onerror is None, an exception is raised.
    454 
    455     """
    456     if ignore_errors:
    457         def onerror(*args):
    458             pass
    459     elif onerror is None:
    460         def onerror(*args):
    461             raise
    462     if _use_fd_functions:
    463         # While the unsafe rmtree works fine on bytes, the fd based does not.
    464         if isinstance(path, bytes):
    465             path = os.fsdecode(path)
    466         # Note: To guard against symlink races, we use the standard
    467         # lstat()/open()/fstat() trick.
    468         try:
    469             orig_st = os.lstat(path)
    470         except Exception:
    471             onerror(os.lstat, path, sys.exc_info())
    472             return
    473         try:
    474             fd = os.open(path, os.O_RDONLY)
    475         except Exception:
    476             onerror(os.lstat, path, sys.exc_info())
    477             return
    478         try:
    479             if os.path.samestat(orig_st, os.fstat(fd)):
    480                 _rmtree_safe_fd(fd, path, onerror)
    481                 try:
    482                     os.rmdir(path)
    483                 except OSError:
    484                     onerror(os.rmdir, path, sys.exc_info())
    485             else:
    486                 try:
    487                     # symlinks to directories are forbidden, see bug #1669
    488                     raise OSError("Cannot call rmtree on a symbolic link")
    489                 except OSError:
    490                     onerror(os.path.islink, path, sys.exc_info())
    491         finally:
    492             os.close(fd)
    493     else:
    494         return _rmtree_unsafe(path, onerror)
    495 
    496 # Allow introspection of whether or not the hardening against symlink
    497 # attacks is supported on the current platform
    498 rmtree.avoids_symlink_attacks = _use_fd_functions
    499 
    500 def _basename(path):
    501     # A basename() variant which first strips the trailing slash, if present.
    502     # Thus we always get the last component of the path, even for directories.
    503     sep = os.path.sep + (os.path.altsep or '')
    504     return os.path.basename(path.rstrip(sep))
    505 
    506 def move(src, dst, copy_function=copy2):
    507     """Recursively move a file or directory to another location. This is
    508     similar to the Unix "mv" command. Return the file or directory's
    509     destination.
    510 
    511     If the destination is a directory or a symlink to a directory, the source
    512     is moved inside the directory. The destination path must not already
    513     exist.
    514 
    515     If the destination already exists but is not a directory, it may be
    516     overwritten depending on os.rename() semantics.
    517 
    518     If the destination is on our current filesystem, then rename() is used.
    519     Otherwise, src is copied to the destination and then removed. Symlinks are
    520     recreated under the new name if os.rename() fails because of cross
    521     filesystem renames.
    522 
    523     The optional `copy_function` argument is a callable that will be used
    524     to copy the source or it will be delegated to `copytree`.
    525     By default, copy2() is used, but any function that supports the same
    526     signature (like copy()) can be used.
    527 
    528     A lot more could be done here...  A look at a mv.c shows a lot of
    529     the issues this implementation glosses over.
    530 
    531     """
    532     real_dst = dst
    533     if os.path.isdir(dst):
    534         if _samefile(src, dst):
    535             # We might be on a case insensitive filesystem,
    536             # perform the rename anyway.
    537             os.rename(src, dst)
    538             return
    539 
    540         real_dst = os.path.join(dst, _basename(src))
    541         if os.path.exists(real_dst):
    542             raise Error("Destination path '%s' already exists" % real_dst)
    543     try:
    544         os.rename(src, real_dst)
    545     except OSError:
    546         if os.path.islink(src):
    547             linkto = os.readlink(src)
    548             os.symlink(linkto, real_dst)
    549             os.unlink(src)
    550         elif os.path.isdir(src):
    551             if _destinsrc(src, dst):
    552                 raise Error("Cannot move a directory '%s' into itself"
    553                             " '%s'." % (src, dst))
    554             copytree(src, real_dst, copy_function=copy_function,
    555                      symlinks=True)
    556             rmtree(src)
    557         else:
    558             copy_function(src, real_dst)
    559             os.unlink(src)
    560     return real_dst
    561 
    562 def _destinsrc(src, dst):
    563     src = os.path.abspath(src)
    564     dst = os.path.abspath(dst)
    565     if not src.endswith(os.path.sep):
    566         src += os.path.sep
    567     if not dst.endswith(os.path.sep):
    568         dst += os.path.sep
    569     return dst.startswith(src)
    570 
    571 def _get_gid(name):
    572     """Returns a gid, given a group name."""
    573     if getgrnam is None or name is None:
    574         return None
    575     try:
    576         result = getgrnam(name)
    577     except KeyError:
    578         result = None
    579     if result is not None:
    580         return result[2]
    581     return None
    582 
    583 def _get_uid(name):
    584     """Returns an uid, given a user name."""
    585     if getpwnam is None or name is None:
    586         return None
    587     try:
    588         result = getpwnam(name)
    589     except KeyError:
    590         result = None
    591     if result is not None:
    592         return result[2]
    593     return None
    594 
    595 def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
    596                   owner=None, group=None, logger=None):
    597     """Create a (possibly compressed) tar file from all the files under
    598     'base_dir'.
    599 
    600     'compress' must be "gzip" (the default), "bzip2", "xz", or None.
    601 
    602     'owner' and 'group' can be used to define an owner and a group for the
    603     archive that is being built. If not provided, the current owner and group
    604     will be used.
    605 
    606     The output tar file will be named 'base_name' +  ".tar", possibly plus
    607     the appropriate compression extension (".gz", ".bz2", or ".xz").
    608 
    609     Returns the output filename.
    610     """
    611     if compress is None:
    612         tar_compression = ''
    613     elif _ZLIB_SUPPORTED and compress == 'gzip':
    614         tar_compression = 'gz'
    615     elif _BZ2_SUPPORTED and compress == 'bzip2':
    616         tar_compression = 'bz2'
    617     elif _LZMA_SUPPORTED and compress == 'xz':
    618         tar_compression = 'xz'
    619     else:
    620         raise ValueError("bad value for 'compress', or compression format not "
    621                          "supported : {0}".format(compress))
    622 
    623     import tarfile  # late import for breaking circular dependency
    624 
    625     compress_ext = '.' + tar_compression if compress else ''
    626     archive_name = base_name + '.tar' + compress_ext
    627     archive_dir = os.path.dirname(archive_name)
    628 
    629     if archive_dir and not os.path.exists(archive_dir):
    630         if logger is not None:
    631             logger.info("creating %s", archive_dir)
    632         if not dry_run:
    633             os.makedirs(archive_dir)
    634 
    635     # creating the tarball
    636     if logger is not None:
    637         logger.info('Creating tar archive')
    638 
    639     uid = _get_uid(owner)
    640     gid = _get_gid(group)
    641 
    642     def _set_uid_gid(tarinfo):
    643         if gid is not None:
    644             tarinfo.gid = gid
    645             tarinfo.gname = group
    646         if uid is not None:
    647             tarinfo.uid = uid
    648             tarinfo.uname = owner
    649         return tarinfo
    650 
    651     if not dry_run:
    652         tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
    653         try:
    654             tar.add(base_dir, filter=_set_uid_gid)
    655         finally:
    656             tar.close()
    657 
    658     return archive_name
    659 
    660 def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
    661     """Create a zip file from all the files under 'base_dir'.
    662 
    663     The output zip file will be named 'base_name' + ".zip".  Returns the
    664     name of the output zip file.
    665     """
    666     import zipfile  # late import for breaking circular dependency
    667 
    668     zip_filename = base_name + ".zip"
    669     archive_dir = os.path.dirname(base_name)
    670 
    671     if archive_dir and not os.path.exists(archive_dir):
    672         if logger is not None:
    673             logger.info("creating %s", archive_dir)
    674         if not dry_run:
    675             os.makedirs(archive_dir)
    676 
    677     if logger is not None:
    678         logger.info("creating '%s' and adding '%s' to it",
    679                     zip_filename, base_dir)
    680 
    681     if not dry_run:
    682         with zipfile.ZipFile(zip_filename, "w",
    683                              compression=zipfile.ZIP_DEFLATED) as zf:
    684             path = os.path.normpath(base_dir)
    685             if path != os.curdir:
    686                 zf.write(path, path)
    687                 if logger is not None:
    688                     logger.info("adding '%s'", path)
    689             for dirpath, dirnames, filenames in os.walk(base_dir):
    690                 for name in sorted(dirnames):
    691                     path = os.path.normpath(os.path.join(dirpath, name))
    692                     zf.write(path, path)
    693                     if logger is not None:
    694                         logger.info("adding '%s'", path)
    695                 for name in filenames:
    696                     path = os.path.normpath(os.path.join(dirpath, name))
    697                     if os.path.isfile(path):
    698                         zf.write(path, path)
    699                         if logger is not None:
    700                             logger.info("adding '%s'", path)
    701 
    702     return zip_filename
    703 
    704 _ARCHIVE_FORMATS = {
    705     'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
    706 }
    707 
    708 if _ZLIB_SUPPORTED:
    709     _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
    710                                 "gzip'ed tar-file")
    711     _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
    712 
    713 if _BZ2_SUPPORTED:
    714     _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
    715                                 "bzip2'ed tar-file")
    716 
    717 if _LZMA_SUPPORTED:
    718     _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
    719                                 "xz'ed tar-file")
    720 
    721 def get_archive_formats():
    722     """Returns a list of supported formats for archiving and unarchiving.
    723 
    724     Each element of the returned sequence is a tuple (name, description)
    725     """
    726     formats = [(name, registry[2]) for name, registry in
    727                _ARCHIVE_FORMATS.items()]
    728     formats.sort()
    729     return formats
    730 
    731 def register_archive_format(name, function, extra_args=None, description=''):
    732     """Registers an archive format.
    733 
    734     name is the name of the format. function is the callable that will be
    735     used to create archives. If provided, extra_args is a sequence of
    736     (name, value) tuples that will be passed as arguments to the callable.
    737     description can be provided to describe the format, and will be returned
    738     by the get_archive_formats() function.
    739     """
    740     if extra_args is None:
    741         extra_args = []
    742     if not callable(function):
    743         raise TypeError('The %s object is not callable' % function)
    744     if not isinstance(extra_args, (tuple, list)):
    745         raise TypeError('extra_args needs to be a sequence')
    746     for element in extra_args:
    747         if not isinstance(element, (tuple, list)) or len(element) !=2:
    748             raise TypeError('extra_args elements are : (arg_name, value)')
    749 
    750     _ARCHIVE_FORMATS[name] = (function, extra_args, description)
    751 
    752 def unregister_archive_format(name):
    753     del _ARCHIVE_FORMATS[name]
    754 
    755 def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
    756                  dry_run=0, owner=None, group=None, logger=None):
    757     """Create an archive file (eg. zip or tar).
    758 
    759     'base_name' is the name of the file to create, minus any format-specific
    760     extension; 'format' is the archive format: one of "zip", "tar", "gztar",
    761     "bztar", or "xztar".  Or any other registered format.
    762 
    763     'root_dir' is a directory that will be the root directory of the
    764     archive; ie. we typically chdir into 'root_dir' before creating the
    765     archive.  'base_dir' is the directory where we start archiving from;
    766     ie. 'base_dir' will be the common prefix of all files and
    767     directories in the archive.  'root_dir' and 'base_dir' both default
    768     to the current directory.  Returns the name of the archive file.
    769 
    770     'owner' and 'group' are used when creating a tar archive. By default,
    771     uses the current owner and group.
    772     """
    773     save_cwd = os.getcwd()
    774     if root_dir is not None:
    775         if logger is not None:
    776             logger.debug("changing into '%s'", root_dir)
    777         base_name = os.path.abspath(base_name)
    778         if not dry_run:
    779             os.chdir(root_dir)
    780 
    781     if base_dir is None:
    782         base_dir = os.curdir
    783 
    784     kwargs = {'dry_run': dry_run, 'logger': logger}
    785 
    786     try:
    787         format_info = _ARCHIVE_FORMATS[format]
    788     except KeyError:
    789         raise ValueError("unknown archive format '%s'" % format)
    790 
    791     func = format_info[0]
    792     for arg, val in format_info[1]:
    793         kwargs[arg] = val
    794 
    795     if format != 'zip':
    796         kwargs['owner'] = owner
    797         kwargs['group'] = group
    798 
    799     try:
    800         filename = func(base_name, base_dir, **kwargs)
    801     finally:
    802         if root_dir is not None:
    803             if logger is not None:
    804                 logger.debug("changing back to '%s'", save_cwd)
    805             os.chdir(save_cwd)
    806 
    807     return filename
    808 
    809 
    810 def get_unpack_formats():
    811     """Returns a list of supported formats for unpacking.
    812 
    813     Each element of the returned sequence is a tuple
    814     (name, extensions, description)
    815     """
    816     formats = [(name, info[0], info[3]) for name, info in
    817                _UNPACK_FORMATS.items()]
    818     formats.sort()
    819     return formats
    820 
    821 def _check_unpack_options(extensions, function, extra_args):
    822     """Checks what gets registered as an unpacker."""
    823     # first make sure no other unpacker is registered for this extension
    824     existing_extensions = {}
    825     for name, info in _UNPACK_FORMATS.items():
    826         for ext in info[0]:
    827             existing_extensions[ext] = name
    828 
    829     for extension in extensions:
    830         if extension in existing_extensions:
    831             msg = '%s is already registered for "%s"'
    832             raise RegistryError(msg % (extension,
    833                                        existing_extensions[extension]))
    834 
    835     if not callable(function):
    836         raise TypeError('The registered function must be a callable')
    837 
    838 
    839 def register_unpack_format(name, extensions, function, extra_args=None,
    840                            description=''):
    841     """Registers an unpack format.
    842 
    843     `name` is the name of the format. `extensions` is a list of extensions
    844     corresponding to the format.
    845 
    846     `function` is the callable that will be
    847     used to unpack archives. The callable will receive archives to unpack.
    848     If it's unable to handle an archive, it needs to raise a ReadError
    849     exception.
    850 
    851     If provided, `extra_args` is a sequence of
    852     (name, value) tuples that will be passed as arguments to the callable.
    853     description can be provided to describe the format, and will be returned
    854     by the get_unpack_formats() function.
    855     """
    856     if extra_args is None:
    857         extra_args = []
    858     _check_unpack_options(extensions, function, extra_args)
    859     _UNPACK_FORMATS[name] = extensions, function, extra_args, description
    860 
    861 def unregister_unpack_format(name):
    862     """Removes the pack format from the registry."""
    863     del _UNPACK_FORMATS[name]
    864 
    865 def _ensure_directory(path):
    866     """Ensure that the parent directory of `path` exists"""
    867     dirname = os.path.dirname(path)
    868     if not os.path.isdir(dirname):
    869         os.makedirs(dirname)
    870 
    871 def _unpack_zipfile(filename, extract_dir):
    872     """Unpack zip `filename` to `extract_dir`
    873     """
    874     import zipfile  # late import for breaking circular dependency
    875 
    876     if not zipfile.is_zipfile(filename):
    877         raise ReadError("%s is not a zip file" % filename)
    878 
    879     zip = zipfile.ZipFile(filename)
    880     try:
    881         for info in zip.infolist():
    882             name = info.filename
    883 
    884             # don't extract absolute paths or ones with .. in them
    885             if name.startswith('/') or '..' in name:
    886                 continue
    887 
    888             target = os.path.join(extract_dir, *name.split('/'))
    889             if not target:
    890                 continue
    891 
    892             _ensure_directory(target)
    893             if not name.endswith('/'):
    894                 # file
    895                 data = zip.read(info.filename)
    896                 f = open(target, 'wb')
    897                 try:
    898                     f.write(data)
    899                 finally:
    900                     f.close()
    901                     del data
    902     finally:
    903         zip.close()
    904 
    905 def _unpack_tarfile(filename, extract_dir):
    906     """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
    907     """
    908     import tarfile  # late import for breaking circular dependency
    909     try:
    910         tarobj = tarfile.open(filename)
    911     except tarfile.TarError:
    912         raise ReadError(
    913             "%s is not a compressed or uncompressed tar file" % filename)
    914     try:
    915         tarobj.extractall(extract_dir)
    916     finally:
    917         tarobj.close()
    918 
    919 _UNPACK_FORMATS = {
    920     'tar':   (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
    921     'zip':   (['.zip'], _unpack_zipfile, [], "ZIP file"),
    922 }
    923 
    924 if _ZLIB_SUPPORTED:
    925     _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
    926                                 "gzip'ed tar-file")
    927 
    928 if _BZ2_SUPPORTED:
    929     _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
    930                                 "bzip2'ed tar-file")
    931 
    932 if _LZMA_SUPPORTED:
    933     _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
    934                                 "xz'ed tar-file")
    935 
    936 def _find_unpack_format(filename):
    937     for name, info in _UNPACK_FORMATS.items():
    938         for extension in info[0]:
    939             if filename.endswith(extension):
    940                 return name
    941     return None
    942 
    943 def unpack_archive(filename, extract_dir=None, format=None):
    944     """Unpack an archive.
    945 
    946     `filename` is the name of the archive.
    947 
    948     `extract_dir` is the name of the target directory, where the archive
    949     is unpacked. If not provided, the current working directory is used.
    950 
    951     `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
    952     or "xztar".  Or any other registered format.  If not provided,
    953     unpack_archive will use the filename extension and see if an unpacker
    954     was registered for that extension.
    955 
    956     In case none is found, a ValueError is raised.
    957     """
    958     if extract_dir is None:
    959         extract_dir = os.getcwd()
    960 
    961     if format is not None:
    962         try:
    963             format_info = _UNPACK_FORMATS[format]
    964         except KeyError:
    965             raise ValueError("Unknown unpack format '{0}'".format(format))
    966 
    967         func = format_info[1]
    968         func(filename, extract_dir, **dict(format_info[2]))
    969     else:
    970         # we need to look at the registered unpackers supported extensions
    971         format = _find_unpack_format(filename)
    972         if format is None:
    973             raise ReadError("Unknown archive format '{0}'".format(filename))
    974 
    975         func = _UNPACK_FORMATS[format][1]
    976         kwargs = dict(_UNPACK_FORMATS[format][2])
    977         func(filename, extract_dir, **kwargs)
    978 
    979 
    980 if hasattr(os, 'statvfs'):
    981 
    982     __all__.append('disk_usage')
    983     _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
    984     _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
    985     _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
    986     _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
    987 
    988     def disk_usage(path):
    989         """Return disk usage statistics about the given path.
    990 
    991         Returned value is a named tuple with attributes 'total', 'used' and
    992         'free', which are the amount of total, used and free space, in bytes.
    993         """
    994         st = os.statvfs(path)
    995         free = st.f_bavail * st.f_frsize
    996         total = st.f_blocks * st.f_frsize
    997         used = (st.f_blocks - st.f_bfree) * st.f_frsize
    998         return _ntuple_diskusage(total, used, free)
    999 
   1000 elif os.name == 'nt':
   1001 
   1002     import nt
   1003     __all__.append('disk_usage')
   1004     _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
   1005 
   1006     def disk_usage(path):
   1007         """Return disk usage statistics about the given path.
   1008 
   1009         Returned values is a named tuple with attributes 'total', 'used' and
   1010         'free', which are the amount of total, used and free space, in bytes.
   1011         """
   1012         total, free = nt._getdiskusage(path)
   1013         used = total - free
   1014         return _ntuple_diskusage(total, used, free)
   1015 
   1016 
   1017 def chown(path, user=None, group=None):
   1018     """Change owner user and group of the given path.
   1019 
   1020     user and group can be the uid/gid or the user/group names, and in that case,
   1021     they are converted to their respective uid/gid.
   1022     """
   1023 
   1024     if user is None and group is None:
   1025         raise ValueError("user and/or group must be set")
   1026 
   1027     _user = user
   1028     _group = group
   1029 
   1030     # -1 means don't change it
   1031     if user is None:
   1032         _user = -1
   1033     # user can either be an int (the uid) or a string (the system username)
   1034     elif isinstance(user, str):
   1035         _user = _get_uid(user)
   1036         if _user is None:
   1037             raise LookupError("no such user: {!r}".format(user))
   1038 
   1039     if group is None:
   1040         _group = -1
   1041     elif not isinstance(group, int):
   1042         _group = _get_gid(group)
   1043         if _group is None:
   1044             raise LookupError("no such group: {!r}".format(group))
   1045 
   1046     os.chown(path, _user, _group)
   1047 
   1048 def get_terminal_size(fallback=(80, 24)):
   1049     """Get the size of the terminal window.
   1050 
   1051     For each of the two dimensions, the environment variable, COLUMNS
   1052     and LINES respectively, is checked. If the variable is defined and
   1053     the value is a positive integer, it is used.
   1054 
   1055     When COLUMNS or LINES is not defined, which is the common case,
   1056     the terminal connected to sys.__stdout__ is queried
   1057     by invoking os.get_terminal_size.
   1058 
   1059     If the terminal size cannot be successfully queried, either because
   1060     the system doesn't support querying, or because we are not
   1061     connected to a terminal, the value given in fallback parameter
   1062     is used. Fallback defaults to (80, 24) which is the default
   1063     size used by many terminal emulators.
   1064 
   1065     The value returned is a named tuple of type os.terminal_size.
   1066     """
   1067     # columns, lines are the working values
   1068     try:
   1069         columns = int(os.environ['COLUMNS'])
   1070     except (KeyError, ValueError):
   1071         columns = 0
   1072 
   1073     try:
   1074         lines = int(os.environ['LINES'])
   1075     except (KeyError, ValueError):
   1076         lines = 0
   1077 
   1078     # only query if necessary
   1079     if columns <= 0 or lines <= 0:
   1080         try:
   1081             size = os.get_terminal_size(sys.__stdout__.fileno())
   1082         except (AttributeError, ValueError, OSError):
   1083             # stdout is None, closed, detached, or not a terminal, or
   1084             # os.get_terminal_size() is unsupported
   1085             size = os.terminal_size(fallback)
   1086         if columns <= 0:
   1087             columns = size.columns
   1088         if lines <= 0:
   1089             lines = size.lines
   1090 
   1091     return os.terminal_size((columns, lines))
   1092 
   1093 def which(cmd, mode=os.F_OK | os.X_OK, path=None):
   1094     """Given a command, mode, and a PATH string, return the path which
   1095     conforms to the given mode on the PATH, or None if there is no such
   1096     file.
   1097 
   1098     `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
   1099     of os.environ.get("PATH"), or can be overridden with a custom search
   1100     path.
   1101 
   1102     """
   1103     # Check that a given file can be accessed with the correct mode.
   1104     # Additionally check that `file` is not a directory, as on Windows
   1105     # directories pass the os.access check.
   1106     def _access_check(fn, mode):
   1107         return (os.path.exists(fn) and os.access(fn, mode)
   1108                 and not os.path.isdir(fn))
   1109 
   1110     # If we're given a path with a directory part, look it up directly rather
   1111     # than referring to PATH directories. This includes checking relative to the
   1112     # current directory, e.g. ./script
   1113     if os.path.dirname(cmd):
   1114         if _access_check(cmd, mode):
   1115             return cmd
   1116         return None
   1117 
   1118     if path is None:
   1119         path = os.environ.get("PATH", os.defpath)
   1120     if not path:
   1121         return None
   1122     path = path.split(os.pathsep)
   1123 
   1124     if sys.platform == "win32":
   1125         # The current directory takes precedence on Windows.
   1126         if not os.curdir in path:
   1127             path.insert(0, os.curdir)
   1128 
   1129         # PATHEXT is necessary to check on Windows.
   1130         pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
   1131         # See if the given file matches any of the expected path extensions.
   1132         # This will allow us to short circuit when given "python.exe".
   1133         # If it does match, only test that one, otherwise we have to try
   1134         # others.
   1135         if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
   1136             files = [cmd]
   1137         else:
   1138             files = [cmd + ext for ext in pathext]
   1139     else:
   1140         # On other platforms you don't have things like PATHEXT to tell you
   1141         # what file suffixes are executable, so just pass on cmd as-is.
   1142         files = [cmd]
   1143 
   1144     seen = set()
   1145     for dir in path:
   1146         normdir = os.path.normcase(dir)
   1147         if not normdir in seen:
   1148             seen.add(normdir)
   1149             for thefile in files:
   1150                 name = os.path.join(dir, thefile)
   1151                 if _access_check(name, mode):
   1152                     return name
   1153     return None
   1154