Home | History | Annotate | Download | only in python2.7
      1 """Utility functions for copying and archiving files and directory trees.
      2 
      3 XXX The functions here don't copy the resource fork or other metadata on Mac.
      4 
      5 """
      6 
      7 import os
      8 import sys
      9 import stat
     10 from os.path import abspath
     11 import fnmatch
     12 import collections
     13 import errno
     14 
     15 try:
     16     from pwd import getpwnam
     17 except ImportError:
     18     getpwnam = None
     19 
     20 try:
     21     from grp import getgrnam
     22 except ImportError:
     23     getgrnam = None
     24 
     25 __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
     26            "copytree", "move", "rmtree", "Error", "SpecialFileError",
     27            "ExecError", "make_archive", "get_archive_formats",
     28            "register_archive_format", "unregister_archive_format",
     29            "ignore_patterns"]
     30 
     31 class Error(EnvironmentError):
     32     pass
     33 
     34 class SpecialFileError(EnvironmentError):
     35     """Raised when trying to do a kind of operation (e.g. copying) which is
     36     not supported on a special file (e.g. a named pipe)"""
     37 
     38 class ExecError(EnvironmentError):
     39     """Raised when a command could not be executed"""
     40 
     41 try:
     42     WindowsError
     43 except NameError:
     44     WindowsError = None
     45 
     46 def copyfileobj(fsrc, fdst, length=16*1024):
     47     """copy data from file-like object fsrc to file-like object fdst"""
     48     while 1:
     49         buf = fsrc.read(length)
     50         if not buf:
     51             break
     52         fdst.write(buf)
     53 
     54 def _samefile(src, dst):
     55     # Macintosh, Unix.
     56     if hasattr(os.path, 'samefile'):
     57         try:
     58             return os.path.samefile(src, dst)
     59         except OSError:
     60             return False
     61 
     62     # All other platforms: check for same pathname.
     63     return (os.path.normcase(os.path.abspath(src)) ==
     64             os.path.normcase(os.path.abspath(dst)))
     65 
     66 def copyfile(src, dst):
     67     """Copy data from src to dst"""
     68     if _samefile(src, dst):
     69         raise Error("`%s` and `%s` are the same file" % (src, dst))
     70 
     71     for fn in [src, dst]:
     72         try:
     73             st = os.stat(fn)
     74         except OSError:
     75             # File most likely does not exist
     76             pass
     77         else:
     78             # XXX What about other special files? (sockets, devices...)
     79             if stat.S_ISFIFO(st.st_mode):
     80                 raise SpecialFileError("`%s` is a named pipe" % fn)
     81 
     82     with open(src, 'rb') as fsrc:
     83         with open(dst, 'wb') as fdst:
     84             copyfileobj(fsrc, fdst)
     85 
     86 def copymode(src, dst):
     87     """Copy mode bits from src to dst"""
     88     if hasattr(os, 'chmod'):
     89         st = os.stat(src)
     90         mode = stat.S_IMODE(st.st_mode)
     91         os.chmod(dst, mode)
     92 
     93 def copystat(src, dst):
     94     """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
     95     st = os.stat(src)
     96     mode = stat.S_IMODE(st.st_mode)
     97     if hasattr(os, 'utime'):
     98         os.utime(dst, (st.st_atime, st.st_mtime))
     99     if hasattr(os, 'chmod'):
    100         os.chmod(dst, mode)
    101     if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
    102         try:
    103             os.chflags(dst, st.st_flags)
    104         except OSError, why:
    105             for err in 'EOPNOTSUPP', 'ENOTSUP':
    106                 if hasattr(errno, err) and why.errno == getattr(errno, err):
    107                     break
    108             else:
    109                 raise
    110 
    111 def copy(src, dst):
    112     """Copy data and mode bits ("cp src dst").
    113 
    114     The destination may be a directory.
    115 
    116     """
    117     if os.path.isdir(dst):
    118         dst = os.path.join(dst, os.path.basename(src))
    119     copyfile(src, dst)
    120     copymode(src, dst)
    121 
    122 def copy2(src, dst):
    123     """Copy data and all stat info ("cp -p src dst").
    124 
    125     The destination may be a directory.
    126 
    127     """
    128     if os.path.isdir(dst):
    129         dst = os.path.join(dst, os.path.basename(src))
    130     copyfile(src, dst)
    131     copystat(src, dst)
    132 
    133 def ignore_patterns(*patterns):
    134     """Function that can be used as copytree() ignore parameter.
    135 
    136     Patterns is a sequence of glob-style patterns
    137     that are used to exclude files"""
    138     def _ignore_patterns(path, names):
    139         ignored_names = []
    140         for pattern in patterns:
    141             ignored_names.extend(fnmatch.filter(names, pattern))
    142         return set(ignored_names)
    143     return _ignore_patterns
    144 
    145 def copytree(src, dst, symlinks=False, ignore=None):
    146     """Recursively copy a directory tree using copy2().
    147 
    148     The destination directory must not already exist.
    149     If exception(s) occur, an Error is raised with a list of reasons.
    150 
    151     If the optional symlinks flag is true, symbolic links in the
    152     source tree result in symbolic links in the destination tree; if
    153     it is false, the contents of the files pointed to by symbolic
    154     links are copied.
    155 
    156     The optional ignore argument is a callable. If given, it
    157     is called with the `src` parameter, which is the directory
    158     being visited by copytree(), and `names` which is the list of
    159     `src` contents, as returned by os.listdir():
    160 
    161         callable(src, names) -> ignored_names
    162 
    163     Since copytree() is called recursively, the callable will be
    164     called once for each directory that is copied. It returns a
    165     list of names relative to the `src` directory that should
    166     not be copied.
    167 
    168     XXX Consider this example code rather than the ultimate tool.
    169 
    170     """
    171     names = os.listdir(src)
    172     if ignore is not None:
    173         ignored_names = ignore(src, names)
    174     else:
    175         ignored_names = set()
    176 
    177     os.makedirs(dst)
    178     errors = []
    179     for name in names:
    180         if name in ignored_names:
    181             continue
    182         srcname = os.path.join(src, name)
    183         dstname = os.path.join(dst, name)
    184         try:
    185             if symlinks and os.path.islink(srcname):
    186                 linkto = os.readlink(srcname)
    187                 os.symlink(linkto, dstname)
    188             elif os.path.isdir(srcname):
    189                 copytree(srcname, dstname, symlinks, ignore)
    190             else:
    191                 # Will raise a SpecialFileError for unsupported file types
    192                 copy2(srcname, dstname)
    193         # catch the Error from the recursive copytree so that we can
    194         # continue with other files
    195         except Error, err:
    196             errors.extend(err.args[0])
    197         except EnvironmentError, why:
    198             errors.append((srcname, dstname, str(why)))
    199     try:
    200         copystat(src, dst)
    201     except OSError, why:
    202         if WindowsError is not None and isinstance(why, WindowsError):
    203             # Copying file access times may fail on Windows
    204             pass
    205         else:
    206             errors.append((src, dst, str(why)))
    207     if errors:
    208         raise Error, errors
    209 
    210 def rmtree(path, ignore_errors=False, onerror=None):
    211     """Recursively delete a directory tree.
    212 
    213     If ignore_errors is set, errors are ignored; otherwise, if onerror
    214     is set, it is called to handle the error with arguments (func,
    215     path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
    216     path is the argument to that function that caused it to fail; and
    217     exc_info is a tuple returned by sys.exc_info().  If ignore_errors
    218     is false and onerror is None, an exception is raised.
    219 
    220     """
    221     if ignore_errors:
    222         def onerror(*args):
    223             pass
    224     elif onerror is None:
    225         def onerror(*args):
    226             raise
    227     try:
    228         if os.path.islink(path):
    229             # symlinks to directories are forbidden, see bug #1669
    230             raise OSError("Cannot call rmtree on a symbolic link")
    231     except OSError:
    232         onerror(os.path.islink, path, sys.exc_info())
    233         # can't continue even if onerror hook returns
    234         return
    235     names = []
    236     try:
    237         names = os.listdir(path)
    238     except os.error, err:
    239         onerror(os.listdir, path, sys.exc_info())
    240     for name in names:
    241         fullname = os.path.join(path, name)
    242         try:
    243             mode = os.lstat(fullname).st_mode
    244         except os.error:
    245             mode = 0
    246         if stat.S_ISDIR(mode):
    247             rmtree(fullname, ignore_errors, onerror)
    248         else:
    249             try:
    250                 os.remove(fullname)
    251             except os.error, err:
    252                 onerror(os.remove, fullname, sys.exc_info())
    253     try:
    254         os.rmdir(path)
    255     except os.error:
    256         onerror(os.rmdir, path, sys.exc_info())
    257 
    258 
    259 def _basename(path):
    260     # A basename() variant which first strips the trailing slash, if present.
    261     # Thus we always get the last component of the path, even for directories.
    262     return os.path.basename(path.rstrip(os.path.sep))
    263 
    264 def move(src, dst):
    265     """Recursively move a file or directory to another location. This is
    266     similar to the Unix "mv" command.
    267 
    268     If the destination is a directory or a symlink to a directory, the source
    269     is moved inside the directory. The destination path must not already
    270     exist.
    271 
    272     If the destination already exists but is not a directory, it may be
    273     overwritten depending on os.rename() semantics.
    274 
    275     If the destination is on our current filesystem, then rename() is used.
    276     Otherwise, src is copied to the destination and then removed.
    277     A lot more could be done here...  A look at a mv.c shows a lot of
    278     the issues this implementation glosses over.
    279 
    280     """
    281     real_dst = dst
    282     if os.path.isdir(dst):
    283         if _samefile(src, dst):
    284             # We might be on a case insensitive filesystem,
    285             # perform the rename anyway.
    286             os.rename(src, dst)
    287             return
    288 
    289         real_dst = os.path.join(dst, _basename(src))
    290         if os.path.exists(real_dst):
    291             raise Error, "Destination path '%s' already exists" % real_dst
    292     try:
    293         os.rename(src, real_dst)
    294     except OSError:
    295         if os.path.isdir(src):
    296             if _destinsrc(src, dst):
    297                 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
    298             copytree(src, real_dst, symlinks=True)
    299             rmtree(src)
    300         else:
    301             copy2(src, real_dst)
    302             os.unlink(src)
    303 
    304 def _destinsrc(src, dst):
    305     src = abspath(src)
    306     dst = abspath(dst)
    307     if not src.endswith(os.path.sep):
    308         src += os.path.sep
    309     if not dst.endswith(os.path.sep):
    310         dst += os.path.sep
    311     return dst.startswith(src)
    312 
    313 def _get_gid(name):
    314     """Returns a gid, given a group name."""
    315     if getgrnam is None or name is None:
    316         return None
    317     try:
    318         result = getgrnam(name)
    319     except KeyError:
    320         result = None
    321     if result is not None:
    322         return result[2]
    323     return None
    324 
    325 def _get_uid(name):
    326     """Returns an uid, given a user name."""
    327     if getpwnam is None or name is None:
    328         return None
    329     try:
    330         result = getpwnam(name)
    331     except KeyError:
    332         result = None
    333     if result is not None:
    334         return result[2]
    335     return None
    336 
    337 def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
    338                   owner=None, group=None, logger=None):
    339     """Create a (possibly compressed) tar file from all the files under
    340     'base_dir'.
    341 
    342     'compress' must be "gzip" (the default), "bzip2", or None.
    343 
    344     'owner' and 'group' can be used to define an owner and a group for the
    345     archive that is being built. If not provided, the current owner and group
    346     will be used.
    347 
    348     The output tar file will be named 'base_name' +  ".tar", possibly plus
    349     the appropriate compression extension (".gz", or ".bz2").
    350 
    351     Returns the output filename.
    352     """
    353     tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
    354     compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
    355 
    356     # flags for compression program, each element of list will be an argument
    357     if compress is not None and compress not in compress_ext.keys():
    358         raise ValueError, \
    359               ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
    360 
    361     archive_name = base_name + '.tar' + compress_ext.get(compress, '')
    362     archive_dir = os.path.dirname(archive_name)
    363 
    364     if not os.path.exists(archive_dir):
    365         if logger is not None:
    366             logger.info("creating %s", archive_dir)
    367         if not dry_run:
    368             os.makedirs(archive_dir)
    369 
    370 
    371     # creating the tarball
    372     import tarfile  # late import so Python build itself doesn't break
    373 
    374     if logger is not None:
    375         logger.info('Creating tar archive')
    376 
    377     uid = _get_uid(owner)
    378     gid = _get_gid(group)
    379 
    380     def _set_uid_gid(tarinfo):
    381         if gid is not None:
    382             tarinfo.gid = gid
    383             tarinfo.gname = group
    384         if uid is not None:
    385             tarinfo.uid = uid
    386             tarinfo.uname = owner
    387         return tarinfo
    388 
    389     if not dry_run:
    390         tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
    391         try:
    392             tar.add(base_dir, filter=_set_uid_gid)
    393         finally:
    394             tar.close()
    395 
    396     return archive_name
    397 
    398 def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
    399     # XXX see if we want to keep an external call here
    400     if verbose:
    401         zipoptions = "-r"
    402     else:
    403         zipoptions = "-rq"
    404     from distutils.errors import DistutilsExecError
    405     from distutils.spawn import spawn
    406     try:
    407         spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
    408     except DistutilsExecError:
    409         # XXX really should distinguish between "couldn't find
    410         # external 'zip' command" and "zip failed".
    411         raise ExecError, \
    412             ("unable to create zip file '%s': "
    413             "could neither import the 'zipfile' module nor "
    414             "find a standalone zip utility") % zip_filename
    415 
    416 def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
    417     """Create a zip file from all the files under 'base_dir'.
    418 
    419     The output zip file will be named 'base_name' + ".zip".  Uses either the
    420     "zipfile" Python module (if available) or the InfoZIP "zip" utility
    421     (if installed and found on the default search path).  If neither tool is
    422     available, raises ExecError.  Returns the name of the output zip
    423     file.
    424     """
    425     zip_filename = base_name + ".zip"
    426     archive_dir = os.path.dirname(base_name)
    427 
    428     if not os.path.exists(archive_dir):
    429         if logger is not None:
    430             logger.info("creating %s", archive_dir)
    431         if not dry_run:
    432             os.makedirs(archive_dir)
    433 
    434     # If zipfile module is not available, try spawning an external 'zip'
    435     # command.
    436     try:
    437         import zipfile
    438     except ImportError:
    439         zipfile = None
    440 
    441     if zipfile is None:
    442         _call_external_zip(base_dir, zip_filename, verbose, dry_run)
    443     else:
    444         if logger is not None:
    445             logger.info("creating '%s' and adding '%s' to it",
    446                         zip_filename, base_dir)
    447 
    448         if not dry_run:
    449             zip = zipfile.ZipFile(zip_filename, "w",
    450                                   compression=zipfile.ZIP_DEFLATED)
    451 
    452             for dirpath, dirnames, filenames in os.walk(base_dir):
    453                 for name in filenames:
    454                     path = os.path.normpath(os.path.join(dirpath, name))
    455                     if os.path.isfile(path):
    456                         zip.write(path, path)
    457                         if logger is not None:
    458                             logger.info("adding '%s'", path)
    459             zip.close()
    460 
    461     return zip_filename
    462 
    463 _ARCHIVE_FORMATS = {
    464     'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
    465     'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
    466     'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
    467     'zip':   (_make_zipfile, [],"ZIP file")
    468     }
    469 
    470 def get_archive_formats():
    471     """Returns a list of supported formats for archiving and unarchiving.
    472 
    473     Each element of the returned sequence is a tuple (name, description)
    474     """
    475     formats = [(name, registry[2]) for name, registry in
    476                _ARCHIVE_FORMATS.items()]
    477     formats.sort()
    478     return formats
    479 
    480 def register_archive_format(name, function, extra_args=None, description=''):
    481     """Registers an archive format.
    482 
    483     name is the name of the format. function is the callable that will be
    484     used to create archives. If provided, extra_args is a sequence of
    485     (name, value) tuples that will be passed as arguments to the callable.
    486     description can be provided to describe the format, and will be returned
    487     by the get_archive_formats() function.
    488     """
    489     if extra_args is None:
    490         extra_args = []
    491     if not isinstance(function, collections.Callable):
    492         raise TypeError('The %s object is not callable' % function)
    493     if not isinstance(extra_args, (tuple, list)):
    494         raise TypeError('extra_args needs to be a sequence')
    495     for element in extra_args:
    496         if not isinstance(element, (tuple, list)) or len(element) !=2 :
    497             raise TypeError('extra_args elements are : (arg_name, value)')
    498 
    499     _ARCHIVE_FORMATS[name] = (function, extra_args, description)
    500 
    501 def unregister_archive_format(name):
    502     del _ARCHIVE_FORMATS[name]
    503 
    504 def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
    505                  dry_run=0, owner=None, group=None, logger=None):
    506     """Create an archive file (eg. zip or tar).
    507 
    508     'base_name' is the name of the file to create, minus any format-specific
    509     extension; 'format' is the archive format: one of "zip", "tar", "bztar"
    510     or "gztar".
    511 
    512     'root_dir' is a directory that will be the root directory of the
    513     archive; ie. we typically chdir into 'root_dir' before creating the
    514     archive.  'base_dir' is the directory where we start archiving from;
    515     ie. 'base_dir' will be the common prefix of all files and
    516     directories in the archive.  'root_dir' and 'base_dir' both default
    517     to the current directory.  Returns the name of the archive file.
    518 
    519     'owner' and 'group' are used when creating a tar archive. By default,
    520     uses the current owner and group.
    521     """
    522     save_cwd = os.getcwd()
    523     if root_dir is not None:
    524         if logger is not None:
    525             logger.debug("changing into '%s'", root_dir)
    526         base_name = os.path.abspath(base_name)
    527         if not dry_run:
    528             os.chdir(root_dir)
    529 
    530     if base_dir is None:
    531         base_dir = os.curdir
    532 
    533     kwargs = {'dry_run': dry_run, 'logger': logger}
    534 
    535     try:
    536         format_info = _ARCHIVE_FORMATS[format]
    537     except KeyError:
    538         raise ValueError, "unknown archive format '%s'" % format
    539 
    540     func = format_info[0]
    541     for arg, val in format_info[1]:
    542         kwargs[arg] = val
    543 
    544     if format != 'zip':
    545         kwargs['owner'] = owner
    546         kwargs['group'] = group
    547 
    548     try:
    549         filename = func(base_name, base_dir, **kwargs)
    550     finally:
    551         if root_dir is not None:
    552             if logger is not None:
    553                 logger.debug("changing back to '%s'", save_cwd)
    554             os.chdir(save_cwd)
    555 
    556     return filename
    557