Home | History | Annotate | Download | only in Lib
      1 """Utility functions for copying and archiving files and directory trees.
      2 
      3 XXX The functions here don't copy the resource fork or other metadata on Mac.
      4 
      5 """
      6 
      7 import os
      8 import sys
      9 import stat
     10 from os.path import abspath
     11 import fnmatch
     12 import collections
     13 import errno
     14 
     15 try:
     16     from pwd import getpwnam
     17 except ImportError:
     18     getpwnam = None
     19 
     20 try:
     21     from grp import getgrnam
     22 except ImportError:
     23     getgrnam = None
     24 
     25 __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
     26            "copytree", "move", "rmtree", "Error", "SpecialFileError",
     27            "ExecError", "make_archive", "get_archive_formats",
     28            "register_archive_format", "unregister_archive_format"]
     29 
     30 class Error(EnvironmentError):
     31     pass
     32 
     33 class SpecialFileError(EnvironmentError):
     34     """Raised when trying to do a kind of operation (e.g. copying) which is
     35     not supported on a special file (e.g. a named pipe)"""
     36 
     37 class ExecError(EnvironmentError):
     38     """Raised when a command could not be executed"""
     39 
     40 try:
     41     WindowsError
     42 except NameError:
     43     WindowsError = None
     44 
     45 def copyfileobj(fsrc, fdst, length=16*1024):
     46     """copy data from file-like object fsrc to file-like object fdst"""
     47     while 1:
     48         buf = fsrc.read(length)
     49         if not buf:
     50             break
     51         fdst.write(buf)
     52 
     53 def _samefile(src, dst):
     54     # Macintosh, Unix.

     55     if hasattr(os.path, 'samefile'):
     56         try:
     57             return os.path.samefile(src, dst)
     58         except OSError:
     59             return False
     60 
     61     # All other platforms: check for same pathname.

     62     return (os.path.normcase(os.path.abspath(src)) ==
     63             os.path.normcase(os.path.abspath(dst)))
     64 
     65 def copyfile(src, dst):
     66     """Copy data from src to dst"""
     67     if _samefile(src, dst):
     68         raise Error("`%s` and `%s` are the same file" % (src, dst))
     69 
     70     for fn in [src, dst]:
     71         try:
     72             st = os.stat(fn)
     73         except OSError:
     74             # File most likely does not exist

     75             pass
     76         else:
     77             # XXX What about other special files? (sockets, devices...)

     78             if stat.S_ISFIFO(st.st_mode):
     79                 raise SpecialFileError("`%s` is a named pipe" % fn)
     80 
     81     with open(src, 'rb') as fsrc:
     82         with open(dst, 'wb') as fdst:
     83             copyfileobj(fsrc, fdst)
     84 
     85 def copymode(src, dst):
     86     """Copy mode bits from src to dst"""
     87     if hasattr(os, 'chmod'):
     88         st = os.stat(src)
     89         mode = stat.S_IMODE(st.st_mode)
     90         os.chmod(dst, mode)
     91 
     92 def copystat(src, dst):
     93     """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
     94     st = os.stat(src)
     95     mode = stat.S_IMODE(st.st_mode)
     96     if hasattr(os, 'utime'):
     97         os.utime(dst, (st.st_atime, st.st_mtime))
     98     if hasattr(os, 'chmod'):
     99         os.chmod(dst, mode)
    100     if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
    101         try:
    102             os.chflags(dst, st.st_flags)
    103         except OSError, why:
    104             if (not hasattr(errno, 'EOPNOTSUPP') or
    105                 why.errno != errno.EOPNOTSUPP):
    106                 raise
    107 
    108 def copy(src, dst):
    109     """Copy data and mode bits ("cp src dst").
    110 
    111     The destination may be a directory.
    112 
    113     """
    114     if os.path.isdir(dst):
    115         dst = os.path.join(dst, os.path.basename(src))
    116     copyfile(src, dst)
    117     copymode(src, dst)
    118 
    119 def copy2(src, dst):
    120     """Copy data and all stat info ("cp -p src dst").
    121 
    122     The destination may be a directory.
    123 
    124     """
    125     if os.path.isdir(dst):
    126         dst = os.path.join(dst, os.path.basename(src))
    127     copyfile(src, dst)
    128     copystat(src, dst)
    129 
    130 def ignore_patterns(*patterns):
    131     """Function that can be used as copytree() ignore parameter.
    132 
    133     Patterns is a sequence of glob-style patterns
    134     that are used to exclude files"""
    135     def _ignore_patterns(path, names):
    136         ignored_names = []
    137         for pattern in patterns:
    138             ignored_names.extend(fnmatch.filter(names, pattern))
    139         return set(ignored_names)
    140     return _ignore_patterns
    141 
    142 def copytree(src, dst, symlinks=False, ignore=None):
    143     """Recursively copy a directory tree using copy2().
    144 
    145     The destination directory must not already exist.
    146     If exception(s) occur, an Error is raised with a list of reasons.
    147 
    148     If the optional symlinks flag is true, symbolic links in the
    149     source tree result in symbolic links in the destination tree; if
    150     it is false, the contents of the files pointed to by symbolic
    151     links are copied.
    152 
    153     The optional ignore argument is a callable. If given, it
    154     is called with the `src` parameter, which is the directory
    155     being visited by copytree(), and `names` which is the list of
    156     `src` contents, as returned by os.listdir():
    157 
    158         callable(src, names) -> ignored_names
    159 
    160     Since copytree() is called recursively, the callable will be
    161     called once for each directory that is copied. It returns a
    162     list of names relative to the `src` directory that should
    163     not be copied.
    164 
    165     XXX Consider this example code rather than the ultimate tool.
    166 
    167     """
    168     names = os.listdir(src)
    169     if ignore is not None:
    170         ignored_names = ignore(src, names)
    171     else:
    172         ignored_names = set()
    173 
    174     os.makedirs(dst)
    175     errors = []
    176     for name in names:
    177         if name in ignored_names:
    178             continue
    179         srcname = os.path.join(src, name)
    180         dstname = os.path.join(dst, name)
    181         try:
    182             if symlinks and os.path.islink(srcname):
    183                 linkto = os.readlink(srcname)
    184                 os.symlink(linkto, dstname)
    185             elif os.path.isdir(srcname):
    186                 copytree(srcname, dstname, symlinks, ignore)
    187             else:
    188                 # Will raise a SpecialFileError for unsupported file types

    189                 copy2(srcname, dstname)
    190         # catch the Error from the recursive copytree so that we can

    191         # continue with other files

    192         except Error, err:
    193             errors.extend(err.args[0])
    194         except EnvironmentError, why:
    195             errors.append((srcname, dstname, str(why)))
    196     try:
    197         copystat(src, dst)
    198     except OSError, why:
    199         if WindowsError is not None and isinstance(why, WindowsError):
    200             # Copying file access times may fail on Windows

    201             pass
    202         else:
    203             errors.extend((src, dst, str(why)))
    204     if errors:
    205         raise Error, errors
    206 
    207 def rmtree(path, ignore_errors=False, onerror=None):
    208     """Recursively delete a directory tree.
    209 
    210     If ignore_errors is set, errors are ignored; otherwise, if onerror
    211     is set, it is called to handle the error with arguments (func,
    212     path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
    213     path is the argument to that function that caused it to fail; and
    214     exc_info is a tuple returned by sys.exc_info().  If ignore_errors
    215     is false and onerror is None, an exception is raised.
    216 
    217     """
    218     if ignore_errors:
    219         def onerror(*args):
    220             pass
    221     elif onerror is None:
    222         def onerror(*args):
    223             raise
    224     try:
    225         if os.path.islink(path):
    226             # symlinks to directories are forbidden, see bug #1669

    227             raise OSError("Cannot call rmtree on a symbolic link")
    228     except OSError:
    229         onerror(os.path.islink, path, sys.exc_info())
    230         # can't continue even if onerror hook returns

    231         return
    232     names = []
    233     try:
    234         names = os.listdir(path)
    235     except os.error, err:
    236         onerror(os.listdir, path, sys.exc_info())
    237     for name in names:
    238         fullname = os.path.join(path, name)
    239         try:
    240             mode = os.lstat(fullname).st_mode
    241         except os.error:
    242             mode = 0
    243         if stat.S_ISDIR(mode):
    244             rmtree(fullname, ignore_errors, onerror)
    245         else:
    246             try:
    247                 os.remove(fullname)
    248             except os.error, err:
    249                 onerror(os.remove, fullname, sys.exc_info())
    250     try:
    251         os.rmdir(path)
    252     except os.error:
    253         onerror(os.rmdir, path, sys.exc_info())
    254 
    255 
    256 def _basename(path):
    257     # A basename() variant which first strips the trailing slash, if present.

    258     # Thus we always get the last component of the path, even for directories.

    259     return os.path.basename(path.rstrip(os.path.sep))
    260 
    261 def move(src, dst):
    262     """Recursively move a file or directory to another location. This is
    263     similar to the Unix "mv" command.
    264 
    265     If the destination is a directory or a symlink to a directory, the source
    266     is moved inside the directory. The destination path must not already
    267     exist.
    268 
    269     If the destination already exists but is not a directory, it may be
    270     overwritten depending on os.rename() semantics.
    271 
    272     If the destination is on our current filesystem, then rename() is used.
    273     Otherwise, src is copied to the destination and then removed.
    274     A lot more could be done here...  A look at a mv.c shows a lot of
    275     the issues this implementation glosses over.
    276 
    277     """
    278     real_dst = dst
    279     if os.path.isdir(dst):
    280         if _samefile(src, dst):
    281             # We might be on a case insensitive filesystem,

    282             # perform the rename anyway.

    283             os.rename(src, dst)
    284             return
    285 
    286         real_dst = os.path.join(dst, _basename(src))
    287         if os.path.exists(real_dst):
    288             raise Error, "Destination path '%s' already exists" % real_dst
    289     try:
    290         os.rename(src, real_dst)
    291     except OSError:
    292         if os.path.isdir(src):
    293             if _destinsrc(src, dst):
    294                 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
    295             copytree(src, real_dst, symlinks=True)
    296             rmtree(src)
    297         else:
    298             copy2(src, real_dst)
    299             os.unlink(src)
    300 
    301 def _destinsrc(src, dst):
    302     src = abspath(src)
    303     dst = abspath(dst)
    304     if not src.endswith(os.path.sep):
    305         src += os.path.sep
    306     if not dst.endswith(os.path.sep):
    307         dst += os.path.sep
    308     return dst.startswith(src)
    309 
    310 def _get_gid(name):
    311     """Returns a gid, given a group name."""
    312     if getgrnam is None or name is None:
    313         return None
    314     try:
    315         result = getgrnam(name)
    316     except KeyError:
    317         result = None
    318     if result is not None:
    319         return result[2]
    320     return None
    321 
    322 def _get_uid(name):
    323     """Returns an uid, given a user name."""
    324     if getpwnam is None or name is None:
    325         return None
    326     try:
    327         result = getpwnam(name)
    328     except KeyError:
    329         result = None
    330     if result is not None:
    331         return result[2]
    332     return None
    333 
    334 def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
    335                   owner=None, group=None, logger=None):
    336     """Create a (possibly compressed) tar file from all the files under
    337     'base_dir'.
    338 
    339     'compress' must be "gzip" (the default), "bzip2", or None.
    340 
    341     'owner' and 'group' can be used to define an owner and a group for the
    342     archive that is being built. If not provided, the current owner and group
    343     will be used.
    344 
    345     The output tar file will be named 'base_name' +  ".tar", possibly plus
    346     the appropriate compression extension (".gz", or ".bz2").
    347 
    348     Returns the output filename.
    349     """
    350     tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
    351     compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
    352 
    353     # flags for compression program, each element of list will be an argument

    354     if compress is not None and compress not in compress_ext.keys():
    355         raise ValueError, \
    356               ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
    357 
    358     archive_name = base_name + '.tar' + compress_ext.get(compress, '')
    359     archive_dir = os.path.dirname(archive_name)
    360 
    361     if not os.path.exists(archive_dir):
    362         logger.info("creating %s" % archive_dir)
    363         if not dry_run:
    364             os.makedirs(archive_dir)
    365 
    366 
    367     # creating the tarball

    368     import tarfile  # late import so Python build itself doesn't break

    369 
    370     if logger is not None:
    371         logger.info('Creating tar archive')
    372 
    373     uid = _get_uid(owner)
    374     gid = _get_gid(group)
    375 
    376     def _set_uid_gid(tarinfo):
    377         if gid is not None:
    378             tarinfo.gid = gid
    379             tarinfo.gname = group
    380         if uid is not None:
    381             tarinfo.uid = uid
    382             tarinfo.uname = owner
    383         return tarinfo
    384 
    385     if not dry_run:
    386         tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
    387         try:
    388             tar.add(base_dir, filter=_set_uid_gid)
    389         finally:
    390             tar.close()
    391 
    392     return archive_name
    393 
    394 def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
    395     # XXX see if we want to keep an external call here

    396     if verbose:
    397         zipoptions = "-r"
    398     else:
    399         zipoptions = "-rq"
    400     from distutils.errors import DistutilsExecError
    401     from distutils.spawn import spawn
    402     try:
    403         spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
    404     except DistutilsExecError:
    405         # XXX really should distinguish between "couldn't find

    406         # external 'zip' command" and "zip failed".

    407         raise ExecError, \
    408             ("unable to create zip file '%s': "
    409             "could neither import the 'zipfile' module nor "
    410             "find a standalone zip utility") % zip_filename
    411 
    412 def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
    413     """Create a zip file from all the files under 'base_dir'.
    414 
    415     The output zip file will be named 'base_name' + ".zip".  Uses either the
    416     "zipfile" Python module (if available) or the InfoZIP "zip" utility
    417     (if installed and found on the default search path).  If neither tool is
    418     available, raises ExecError.  Returns the name of the output zip
    419     file.
    420     """
    421     zip_filename = base_name + ".zip"
    422     archive_dir = os.path.dirname(base_name)
    423 
    424     if not os.path.exists(archive_dir):
    425         if logger is not None:
    426             logger.info("creating %s", archive_dir)
    427         if not dry_run:
    428             os.makedirs(archive_dir)
    429 
    430     # If zipfile module is not available, try spawning an external 'zip'

    431     # command.

    432     try:
    433         import zipfile
    434     except ImportError:
    435         zipfile = None
    436 
    437     if zipfile is None:
    438         _call_external_zip(base_dir, zip_filename, verbose, dry_run)
    439     else:
    440         if logger is not None:
    441             logger.info("creating '%s' and adding '%s' to it",
    442                         zip_filename, base_dir)
    443 
    444         if not dry_run:
    445             zip = zipfile.ZipFile(zip_filename, "w",
    446                                   compression=zipfile.ZIP_DEFLATED)
    447 
    448             for dirpath, dirnames, filenames in os.walk(base_dir):
    449                 for name in filenames:
    450                     path = os.path.normpath(os.path.join(dirpath, name))
    451                     if os.path.isfile(path):
    452                         zip.write(path, path)
    453                         if logger is not None:
    454                             logger.info("adding '%s'", path)
    455             zip.close()
    456 
    457     return zip_filename
    458 
    459 _ARCHIVE_FORMATS = {
    460     'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
    461     'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
    462     'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
    463     'zip':   (_make_zipfile, [],"ZIP file")
    464     }
    465 
    466 def get_archive_formats():
    467     """Returns a list of supported formats for archiving and unarchiving.
    468 
    469     Each element of the returned sequence is a tuple (name, description)
    470     """
    471     formats = [(name, registry[2]) for name, registry in
    472                _ARCHIVE_FORMATS.items()]
    473     formats.sort()
    474     return formats
    475 
    476 def register_archive_format(name, function, extra_args=None, description=''):
    477     """Registers an archive format.
    478 
    479     name is the name of the format. function is the callable that will be
    480     used to create archives. If provided, extra_args is a sequence of
    481     (name, value) tuples that will be passed as arguments to the callable.
    482     description can be provided to describe the format, and will be returned
    483     by the get_archive_formats() function.
    484     """
    485     if extra_args is None:
    486         extra_args = []
    487     if not isinstance(function, collections.Callable):
    488         raise TypeError('The %s object is not callable' % function)
    489     if not isinstance(extra_args, (tuple, list)):
    490         raise TypeError('extra_args needs to be a sequence')
    491     for element in extra_args:
    492         if not isinstance(element, (tuple, list)) or len(element) !=2 :
    493             raise TypeError('extra_args elements are : (arg_name, value)')
    494 
    495     _ARCHIVE_FORMATS[name] = (function, extra_args, description)
    496 
    497 def unregister_archive_format(name):
    498     del _ARCHIVE_FORMATS[name]
    499 
    500 def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
    501                  dry_run=0, owner=None, group=None, logger=None):
    502     """Create an archive file (eg. zip or tar).
    503 
    504     'base_name' is the name of the file to create, minus any format-specific
    505     extension; 'format' is the archive format: one of "zip", "tar", "bztar"
    506     or "gztar".
    507 
    508     'root_dir' is a directory that will be the root directory of the
    509     archive; ie. we typically chdir into 'root_dir' before creating the
    510     archive.  'base_dir' is the directory where we start archiving from;
    511     ie. 'base_dir' will be the common prefix of all files and
    512     directories in the archive.  'root_dir' and 'base_dir' both default
    513     to the current directory.  Returns the name of the archive file.
    514 
    515     'owner' and 'group' are used when creating a tar archive. By default,
    516     uses the current owner and group.
    517     """
    518     save_cwd = os.getcwd()
    519     if root_dir is not None:
    520         if logger is not None:
    521             logger.debug("changing into '%s'", root_dir)
    522         base_name = os.path.abspath(base_name)
    523         if not dry_run:
    524             os.chdir(root_dir)
    525 
    526     if base_dir is None:
    527         base_dir = os.curdir
    528 
    529     kwargs = {'dry_run': dry_run, 'logger': logger}
    530 
    531     try:
    532         format_info = _ARCHIVE_FORMATS[format]
    533     except KeyError:
    534         raise ValueError, "unknown archive format '%s'" % format
    535 
    536     func = format_info[0]
    537     for arg, val in format_info[1]:
    538         kwargs[arg] = val
    539 
    540     if format != 'zip':
    541         kwargs['owner'] = owner
    542         kwargs['group'] = group
    543 
    544     try:
    545         filename = func(base_name, base_dir, **kwargs)
    546     finally:
    547         if root_dir is not None:
    548             if logger is not None:
    549                 logger.debug("changing back to '%s'", save_cwd)
    550             os.chdir(save_cwd)
    551 
    552     return filename
    553