Home | History | Annotate | Download | only in Lib
      1 """Utility functions for copying and archiving files and directory trees.
      2 
      3 XXX The functions here don't copy the resource fork or other metadata on Mac.
      4 
      5 """
      6 
      7 import os
      8 import sys
      9 import stat
     10 from os.path import abspath
     11 import fnmatch
     12 import collections
     13 import errno
     14 
     15 try:
     16     from pwd import getpwnam
     17 except ImportError:
     18     getpwnam = None
     19 
     20 try:
     21     from grp import getgrnam
     22 except ImportError:
     23     getgrnam = None
     24 
     25 __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
     26            "copytree", "move", "rmtree", "Error", "SpecialFileError",
     27            "ExecError", "make_archive", "get_archive_formats",
     28            "register_archive_format", "unregister_archive_format",
     29            "ignore_patterns"]
     30 
     31 class Error(EnvironmentError):
     32     pass
     33 
     34 class SpecialFileError(EnvironmentError):
     35     """Raised when trying to do a kind of operation (e.g. copying) which is
     36     not supported on a special file (e.g. a named pipe)"""
     37 
     38 class ExecError(EnvironmentError):
     39     """Raised when a command could not be executed"""
     40 
     41 try:
     42     WindowsError
     43 except NameError:
     44     WindowsError = None
     45 
     46 def copyfileobj(fsrc, fdst, length=16*1024):
     47     """copy data from file-like object fsrc to file-like object fdst"""
     48     while 1:
     49         buf = fsrc.read(length)
     50         if not buf:
     51             break
     52         fdst.write(buf)
     53 
     54 def _samefile(src, dst):
     55     # Macintosh, Unix.
     56     if hasattr(os.path, 'samefile'):
     57         try:
     58             return os.path.samefile(src, dst)
     59         except OSError:
     60             return False
     61 
     62     # All other platforms: check for same pathname.
     63     return (os.path.normcase(os.path.abspath(src)) ==
     64             os.path.normcase(os.path.abspath(dst)))
     65 
     66 def copyfile(src, dst):
     67     """Copy data from src to dst"""
     68     if _samefile(src, dst):
     69         raise Error("`%s` and `%s` are the same file" % (src, dst))
     70 
     71     for fn in [src, dst]:
     72         try:
     73             st = os.stat(fn)
     74         except OSError:
     75             # File most likely does not exist
     76             pass
     77         else:
     78             # XXX What about other special files? (sockets, devices...)
     79             if stat.S_ISFIFO(st.st_mode):
     80                 raise SpecialFileError("`%s` is a named pipe" % fn)
     81 
     82     with open(src, 'rb') as fsrc:
     83         with open(dst, 'wb') as fdst:
     84             copyfileobj(fsrc, fdst)
     85 
     86 def copymode(src, dst):
     87     """Copy mode bits from src to dst"""
     88     if hasattr(os, 'chmod'):
     89         st = os.stat(src)
     90         mode = stat.S_IMODE(st.st_mode)
     91         os.chmod(dst, mode)
     92 
     93 def copystat(src, dst):
     94     """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
     95     st = os.stat(src)
     96     mode = stat.S_IMODE(st.st_mode)
     97     if hasattr(os, 'utime'):
     98         os.utime(dst, (st.st_atime, st.st_mtime))
     99     if hasattr(os, 'chmod'):
    100         os.chmod(dst, mode)
    101     if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
    102         try:
    103             os.chflags(dst, st.st_flags)
    104         except OSError, why:
    105             for err in 'EOPNOTSUPP', 'ENOTSUP':
    106                 if hasattr(errno, err) and why.errno == getattr(errno, err):
    107                     break
    108             else:
    109                 raise
    110 
    111 def copy(src, dst):
    112     """Copy data and mode bits ("cp src dst").
    113 
    114     The destination may be a directory.
    115 
    116     """
    117     if os.path.isdir(dst):
    118         dst = os.path.join(dst, os.path.basename(src))
    119     copyfile(src, dst)
    120     copymode(src, dst)
    121 
    122 def copy2(src, dst):
    123     """Copy data and all stat info ("cp -p src dst").
    124 
    125     The destination may be a directory.
    126 
    127     """
    128     if os.path.isdir(dst):
    129         dst = os.path.join(dst, os.path.basename(src))
    130     copyfile(src, dst)
    131     copystat(src, dst)
    132 
    133 def ignore_patterns(*patterns):
    134     """Function that can be used as copytree() ignore parameter.
    135 
    136     Patterns is a sequence of glob-style patterns
    137     that are used to exclude files"""
    138     def _ignore_patterns(path, names):
    139         ignored_names = []
    140         for pattern in patterns:
    141             ignored_names.extend(fnmatch.filter(names, pattern))
    142         return set(ignored_names)
    143     return _ignore_patterns
    144 
    145 def copytree(src, dst, symlinks=False, ignore=None):
    146     """Recursively copy a directory tree using copy2().
    147 
    148     The destination directory must not already exist.
    149     If exception(s) occur, an Error is raised with a list of reasons.
    150 
    151     If the optional symlinks flag is true, symbolic links in the
    152     source tree result in symbolic links in the destination tree; if
    153     it is false, the contents of the files pointed to by symbolic
    154     links are copied.
    155 
    156     The optional ignore argument is a callable. If given, it
    157     is called with the `src` parameter, which is the directory
    158     being visited by copytree(), and `names` which is the list of
    159     `src` contents, as returned by os.listdir():
    160 
    161         callable(src, names) -> ignored_names
    162 
    163     Since copytree() is called recursively, the callable will be
    164     called once for each directory that is copied. It returns a
    165     list of names relative to the `src` directory that should
    166     not be copied.
    167 
    168     XXX Consider this example code rather than the ultimate tool.
    169 
    170     """
    171     names = os.listdir(src)
    172     if ignore is not None:
    173         ignored_names = ignore(src, names)
    174     else:
    175         ignored_names = set()
    176 
    177     os.makedirs(dst)
    178     errors = []
    179     for name in names:
    180         if name in ignored_names:
    181             continue
    182         srcname = os.path.join(src, name)
    183         dstname = os.path.join(dst, name)
    184         try:
    185             if symlinks and os.path.islink(srcname):
    186                 linkto = os.readlink(srcname)
    187                 os.symlink(linkto, dstname)
    188             elif os.path.isdir(srcname):
    189                 copytree(srcname, dstname, symlinks, ignore)
    190             else:
    191                 # Will raise a SpecialFileError for unsupported file types
    192                 copy2(srcname, dstname)
    193         # catch the Error from the recursive copytree so that we can
    194         # continue with other files
    195         except Error, err:
    196             errors.extend(err.args[0])
    197         except EnvironmentError, why:
    198             errors.append((srcname, dstname, str(why)))
    199     try:
    200         copystat(src, dst)
    201     except OSError, why:
    202         if WindowsError is not None and isinstance(why, WindowsError):
    203             # Copying file access times may fail on Windows
    204             pass
    205         else:
    206             errors.append((src, dst, str(why)))
    207     if errors:
    208         raise Error, errors
    209 
    210 def rmtree(path, ignore_errors=False, onerror=None):
    211     """Recursively delete a directory tree.
    212 
    213     If ignore_errors is set, errors are ignored; otherwise, if onerror
    214     is set, it is called to handle the error with arguments (func,
    215     path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
    216     path is the argument to that function that caused it to fail; and
    217     exc_info is a tuple returned by sys.exc_info().  If ignore_errors
    218     is false and onerror is None, an exception is raised.
    219 
    220     """
    221     if ignore_errors:
    222         def onerror(*args):
    223             pass
    224     elif onerror is None:
    225         def onerror(*args):
    226             raise
    227     try:
    228         if os.path.islink(path):
    229             # symlinks to directories are forbidden, see bug #1669
    230             raise OSError("Cannot call rmtree on a symbolic link")
    231     except OSError:
    232         onerror(os.path.islink, path, sys.exc_info())
    233         # can't continue even if onerror hook returns
    234         return
    235     names = []
    236     try:
    237         names = os.listdir(path)
    238     except os.error, err:
    239         onerror(os.listdir, path, sys.exc_info())
    240     for name in names:
    241         fullname = os.path.join(path, name)
    242         try:
    243             mode = os.lstat(fullname).st_mode
    244         except os.error:
    245             mode = 0
    246         if stat.S_ISDIR(mode):
    247             rmtree(fullname, ignore_errors, onerror)
    248         else:
    249             try:
    250                 os.remove(fullname)
    251             except os.error, err:
    252                 onerror(os.remove, fullname, sys.exc_info())
    253     try:
    254         os.rmdir(path)
    255     except os.error:
    256         onerror(os.rmdir, path, sys.exc_info())
    257 
    258 
    259 def _basename(path):
    260     # A basename() variant which first strips the trailing slash, if present.
    261     # Thus we always get the last component of the path, even for directories.
    262     sep = os.path.sep + (os.path.altsep or '')
    263     return os.path.basename(path.rstrip(sep))
    264 
    265 def move(src, dst):
    266     """Recursively move a file or directory to another location. This is
    267     similar to the Unix "mv" command.
    268 
    269     If the destination is a directory or a symlink to a directory, the source
    270     is moved inside the directory. The destination path must not already
    271     exist.
    272 
    273     If the destination already exists but is not a directory, it may be
    274     overwritten depending on os.rename() semantics.
    275 
    276     If the destination is on our current filesystem, then rename() is used.
    277     Otherwise, src is copied to the destination and then removed.
    278     A lot more could be done here...  A look at a mv.c shows a lot of
    279     the issues this implementation glosses over.
    280 
    281     """
    282     real_dst = dst
    283     if os.path.isdir(dst):
    284         if _samefile(src, dst):
    285             # We might be on a case insensitive filesystem,
    286             # perform the rename anyway.
    287             os.rename(src, dst)
    288             return
    289 
    290         real_dst = os.path.join(dst, _basename(src))
    291         if os.path.exists(real_dst):
    292             raise Error, "Destination path '%s' already exists" % real_dst
    293     try:
    294         os.rename(src, real_dst)
    295     except OSError:
    296         if os.path.isdir(src):
    297             if _destinsrc(src, dst):
    298                 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
    299             copytree(src, real_dst, symlinks=True)
    300             rmtree(src)
    301         else:
    302             copy2(src, real_dst)
    303             os.unlink(src)
    304 
    305 def _destinsrc(src, dst):
    306     src = abspath(src)
    307     dst = abspath(dst)
    308     if not src.endswith(os.path.sep):
    309         src += os.path.sep
    310     if not dst.endswith(os.path.sep):
    311         dst += os.path.sep
    312     return dst.startswith(src)
    313 
    314 def _get_gid(name):
    315     """Returns a gid, given a group name."""
    316     if getgrnam is None or name is None:
    317         return None
    318     try:
    319         result = getgrnam(name)
    320     except KeyError:
    321         result = None
    322     if result is not None:
    323         return result[2]
    324     return None
    325 
    326 def _get_uid(name):
    327     """Returns an uid, given a user name."""
    328     if getpwnam is None or name is None:
    329         return None
    330     try:
    331         result = getpwnam(name)
    332     except KeyError:
    333         result = None
    334     if result is not None:
    335         return result[2]
    336     return None
    337 
    338 def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
    339                   owner=None, group=None, logger=None):
    340     """Create a (possibly compressed) tar file from all the files under
    341     'base_dir'.
    342 
    343     'compress' must be "gzip" (the default), "bzip2", or None.
    344 
    345     'owner' and 'group' can be used to define an owner and a group for the
    346     archive that is being built. If not provided, the current owner and group
    347     will be used.
    348 
    349     The output tar file will be named 'base_name' +  ".tar", possibly plus
    350     the appropriate compression extension (".gz", or ".bz2").
    351 
    352     Returns the output filename.
    353     """
    354     tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
    355     compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
    356 
    357     # flags for compression program, each element of list will be an argument
    358     if compress is not None and compress not in compress_ext.keys():
    359         raise ValueError, \
    360               ("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
    361 
    362     archive_name = base_name + '.tar' + compress_ext.get(compress, '')
    363     archive_dir = os.path.dirname(archive_name)
    364 
    365     if archive_dir and not os.path.exists(archive_dir):
    366         if logger is not None:
    367             logger.info("creating %s", archive_dir)
    368         if not dry_run:
    369             os.makedirs(archive_dir)
    370 
    371 
    372     # creating the tarball
    373     import tarfile  # late import so Python build itself doesn't break
    374 
    375     if logger is not None:
    376         logger.info('Creating tar archive')
    377 
    378     uid = _get_uid(owner)
    379     gid = _get_gid(group)
    380 
    381     def _set_uid_gid(tarinfo):
    382         if gid is not None:
    383             tarinfo.gid = gid
    384             tarinfo.gname = group
    385         if uid is not None:
    386             tarinfo.uid = uid
    387             tarinfo.uname = owner
    388         return tarinfo
    389 
    390     if not dry_run:
    391         tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
    392         try:
    393             tar.add(base_dir, filter=_set_uid_gid)
    394         finally:
    395             tar.close()
    396 
    397     return archive_name
    398 
    399 def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
    400     # XXX see if we want to keep an external call here
    401     if verbose:
    402         zipoptions = "-r"
    403     else:
    404         zipoptions = "-rq"
    405     from distutils.errors import DistutilsExecError
    406     from distutils.spawn import spawn
    407     try:
    408         spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
    409     except DistutilsExecError:
    410         # XXX really should distinguish between "couldn't find
    411         # external 'zip' command" and "zip failed".
    412         raise ExecError, \
    413             ("unable to create zip file '%s': "
    414             "could neither import the 'zipfile' module nor "
    415             "find a standalone zip utility") % zip_filename
    416 
    417 def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
    418     """Create a zip file from all the files under 'base_dir'.
    419 
    420     The output zip file will be named 'base_name' + ".zip".  Uses either the
    421     "zipfile" Python module (if available) or the InfoZIP "zip" utility
    422     (if installed and found on the default search path).  If neither tool is
    423     available, raises ExecError.  Returns the name of the output zip
    424     file.
    425     """
    426     zip_filename = base_name + ".zip"
    427     archive_dir = os.path.dirname(base_name)
    428 
    429     if archive_dir and not os.path.exists(archive_dir):
    430         if logger is not None:
    431             logger.info("creating %s", archive_dir)
    432         if not dry_run:
    433             os.makedirs(archive_dir)
    434 
    435     # If zipfile module is not available, try spawning an external 'zip'
    436     # command.
    437     try:
    438         import zipfile
    439     except ImportError:
    440         zipfile = None
    441 
    442     if zipfile is None:
    443         _call_external_zip(base_dir, zip_filename, verbose, dry_run)
    444     else:
    445         if logger is not None:
    446             logger.info("creating '%s' and adding '%s' to it",
    447                         zip_filename, base_dir)
    448 
    449         if not dry_run:
    450             with zipfile.ZipFile(zip_filename, "w",
    451                                  compression=zipfile.ZIP_DEFLATED) as zf:
    452                 path = os.path.normpath(base_dir)
    453                 if path != os.curdir:
    454                     zf.write(path, path)
    455                     if logger is not None:
    456                         logger.info("adding '%s'", path)
    457                 for dirpath, dirnames, filenames in os.walk(base_dir):
    458                     for name in sorted(dirnames):
    459                         path = os.path.normpath(os.path.join(dirpath, name))
    460                         zf.write(path, path)
    461                         if logger is not None:
    462                             logger.info("adding '%s'", path)
    463                     for name in filenames:
    464                         path = os.path.normpath(os.path.join(dirpath, name))
    465                         if os.path.isfile(path):
    466                             zf.write(path, path)
    467                             if logger is not None:
    468                                 logger.info("adding '%s'", path)
    469 
    470     return zip_filename
    471 
    472 _ARCHIVE_FORMATS = {
    473     'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
    474     'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
    475     'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
    476     'zip':   (_make_zipfile, [],"ZIP file")
    477     }
    478 
    479 def get_archive_formats():
    480     """Returns a list of supported formats for archiving and unarchiving.
    481 
    482     Each element of the returned sequence is a tuple (name, description)
    483     """
    484     formats = [(name, registry[2]) for name, registry in
    485                _ARCHIVE_FORMATS.items()]
    486     formats.sort()
    487     return formats
    488 
    489 def register_archive_format(name, function, extra_args=None, description=''):
    490     """Registers an archive format.
    491 
    492     name is the name of the format. function is the callable that will be
    493     used to create archives. If provided, extra_args is a sequence of
    494     (name, value) tuples that will be passed as arguments to the callable.
    495     description can be provided to describe the format, and will be returned
    496     by the get_archive_formats() function.
    497     """
    498     if extra_args is None:
    499         extra_args = []
    500     if not isinstance(function, collections.Callable):
    501         raise TypeError('The %s object is not callable' % function)
    502     if not isinstance(extra_args, (tuple, list)):
    503         raise TypeError('extra_args needs to be a sequence')
    504     for element in extra_args:
    505         if not isinstance(element, (tuple, list)) or len(element) !=2 :
    506             raise TypeError('extra_args elements are : (arg_name, value)')
    507 
    508     _ARCHIVE_FORMATS[name] = (function, extra_args, description)
    509 
    510 def unregister_archive_format(name):
    511     del _ARCHIVE_FORMATS[name]
    512 
    513 def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
    514                  dry_run=0, owner=None, group=None, logger=None):
    515     """Create an archive file (eg. zip or tar).
    516 
    517     'base_name' is the name of the file to create, minus any format-specific
    518     extension; 'format' is the archive format: one of "zip", "tar", "bztar"
    519     or "gztar".
    520 
    521     'root_dir' is a directory that will be the root directory of the
    522     archive; ie. we typically chdir into 'root_dir' before creating the
    523     archive.  'base_dir' is the directory where we start archiving from;
    524     ie. 'base_dir' will be the common prefix of all files and
    525     directories in the archive.  'root_dir' and 'base_dir' both default
    526     to the current directory.  Returns the name of the archive file.
    527 
    528     'owner' and 'group' are used when creating a tar archive. By default,
    529     uses the current owner and group.
    530     """
    531     save_cwd = os.getcwd()
    532     if root_dir is not None:
    533         if logger is not None:
    534             logger.debug("changing into '%s'", root_dir)
    535         base_name = os.path.abspath(base_name)
    536         if not dry_run:
    537             os.chdir(root_dir)
    538 
    539     if base_dir is None:
    540         base_dir = os.curdir
    541 
    542     kwargs = {'dry_run': dry_run, 'logger': logger}
    543 
    544     try:
    545         format_info = _ARCHIVE_FORMATS[format]
    546     except KeyError:
    547         raise ValueError, "unknown archive format '%s'" % format
    548 
    549     func = format_info[0]
    550     for arg, val in format_info[1]:
    551         kwargs[arg] = val
    552 
    553     if format != 'zip':
    554         kwargs['owner'] = owner
    555         kwargs['group'] = group
    556 
    557     try:
    558         filename = func(base_name, base_dir, **kwargs)
    559     finally:
    560         if root_dir is not None:
    561             if logger is not None:
    562                 logger.debug("changing back to '%s'", save_cwd)
    563             os.chdir(save_cwd)
    564 
    565     return filename
    566