1 """Utility functions for copying and archiving files and directory trees. 2 3 XXX The functions here don't copy the resource fork or other metadata on Mac. 4 5 """ 6 7 import os 8 import sys 9 import stat 10 from os.path import abspath 11 import fnmatch 12 import collections 13 import errno 14 15 try: 16 from pwd import getpwnam 17 except ImportError: 18 getpwnam = None 19 20 try: 21 from grp import getgrnam 22 except ImportError: 23 getgrnam = None 24 25 __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", 26 "copytree", "move", "rmtree", "Error", "SpecialFileError", 27 "ExecError", "make_archive", "get_archive_formats", 28 "register_archive_format", "unregister_archive_format", 29 "ignore_patterns"] 30 31 class Error(EnvironmentError): 32 pass 33 34 class SpecialFileError(EnvironmentError): 35 """Raised when trying to do a kind of operation (e.g. copying) which is 36 not supported on a special file (e.g. a named pipe)""" 37 38 class ExecError(EnvironmentError): 39 """Raised when a command could not be executed""" 40 41 try: 42 WindowsError 43 except NameError: 44 WindowsError = None 45 46 def copyfileobj(fsrc, fdst, length=16*1024): 47 """copy data from file-like object fsrc to file-like object fdst""" 48 while 1: 49 buf = fsrc.read(length) 50 if not buf: 51 break 52 fdst.write(buf) 53 54 def _samefile(src, dst): 55 # Macintosh, Unix. 56 if hasattr(os.path, 'samefile'): 57 try: 58 return os.path.samefile(src, dst) 59 except OSError: 60 return False 61 62 # All other platforms: check for same pathname. 63 return (os.path.normcase(os.path.abspath(src)) == 64 os.path.normcase(os.path.abspath(dst))) 65 66 def copyfile(src, dst): 67 """Copy data from src to dst""" 68 if _samefile(src, dst): 69 raise Error("`%s` and `%s` are the same file" % (src, dst)) 70 71 for fn in [src, dst]: 72 try: 73 st = os.stat(fn) 74 except OSError: 75 # File most likely does not exist 76 pass 77 else: 78 # XXX What about other special files? (sockets, devices...) 79 if stat.S_ISFIFO(st.st_mode): 80 raise SpecialFileError("`%s` is a named pipe" % fn) 81 82 with open(src, 'rb') as fsrc: 83 with open(dst, 'wb') as fdst: 84 copyfileobj(fsrc, fdst) 85 86 def copymode(src, dst): 87 """Copy mode bits from src to dst""" 88 if hasattr(os, 'chmod'): 89 st = os.stat(src) 90 mode = stat.S_IMODE(st.st_mode) 91 os.chmod(dst, mode) 92 93 def copystat(src, dst): 94 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" 95 st = os.stat(src) 96 mode = stat.S_IMODE(st.st_mode) 97 if hasattr(os, 'utime'): 98 os.utime(dst, (st.st_atime, st.st_mtime)) 99 if hasattr(os, 'chmod'): 100 os.chmod(dst, mode) 101 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): 102 try: 103 os.chflags(dst, st.st_flags) 104 except OSError, why: 105 for err in 'EOPNOTSUPP', 'ENOTSUP': 106 if hasattr(errno, err) and why.errno == getattr(errno, err): 107 break 108 else: 109 raise 110 111 def copy(src, dst): 112 """Copy data and mode bits ("cp src dst"). 113 114 The destination may be a directory. 115 116 """ 117 if os.path.isdir(dst): 118 dst = os.path.join(dst, os.path.basename(src)) 119 copyfile(src, dst) 120 copymode(src, dst) 121 122 def copy2(src, dst): 123 """Copy data and all stat info ("cp -p src dst"). 124 125 The destination may be a directory. 126 127 """ 128 if os.path.isdir(dst): 129 dst = os.path.join(dst, os.path.basename(src)) 130 copyfile(src, dst) 131 copystat(src, dst) 132 133 def ignore_patterns(*patterns): 134 """Function that can be used as copytree() ignore parameter. 135 136 Patterns is a sequence of glob-style patterns 137 that are used to exclude files""" 138 def _ignore_patterns(path, names): 139 ignored_names = [] 140 for pattern in patterns: 141 ignored_names.extend(fnmatch.filter(names, pattern)) 142 return set(ignored_names) 143 return _ignore_patterns 144 145 def copytree(src, dst, symlinks=False, ignore=None): 146 """Recursively copy a directory tree using copy2(). 147 148 The destination directory must not already exist. 149 If exception(s) occur, an Error is raised with a list of reasons. 150 151 If the optional symlinks flag is true, symbolic links in the 152 source tree result in symbolic links in the destination tree; if 153 it is false, the contents of the files pointed to by symbolic 154 links are copied. 155 156 The optional ignore argument is a callable. If given, it 157 is called with the `src` parameter, which is the directory 158 being visited by copytree(), and `names` which is the list of 159 `src` contents, as returned by os.listdir(): 160 161 callable(src, names) -> ignored_names 162 163 Since copytree() is called recursively, the callable will be 164 called once for each directory that is copied. It returns a 165 list of names relative to the `src` directory that should 166 not be copied. 167 168 XXX Consider this example code rather than the ultimate tool. 169 170 """ 171 names = os.listdir(src) 172 if ignore is not None: 173 ignored_names = ignore(src, names) 174 else: 175 ignored_names = set() 176 177 os.makedirs(dst) 178 errors = [] 179 for name in names: 180 if name in ignored_names: 181 continue 182 srcname = os.path.join(src, name) 183 dstname = os.path.join(dst, name) 184 try: 185 if symlinks and os.path.islink(srcname): 186 linkto = os.readlink(srcname) 187 os.symlink(linkto, dstname) 188 elif os.path.isdir(srcname): 189 copytree(srcname, dstname, symlinks, ignore) 190 else: 191 # Will raise a SpecialFileError for unsupported file types 192 copy2(srcname, dstname) 193 # catch the Error from the recursive copytree so that we can 194 # continue with other files 195 except Error, err: 196 errors.extend(err.args[0]) 197 except EnvironmentError, why: 198 errors.append((srcname, dstname, str(why))) 199 try: 200 copystat(src, dst) 201 except OSError, why: 202 if WindowsError is not None and isinstance(why, WindowsError): 203 # Copying file access times may fail on Windows 204 pass 205 else: 206 errors.append((src, dst, str(why))) 207 if errors: 208 raise Error, errors 209 210 def rmtree(path, ignore_errors=False, onerror=None): 211 """Recursively delete a directory tree. 212 213 If ignore_errors is set, errors are ignored; otherwise, if onerror 214 is set, it is called to handle the error with arguments (func, 215 path, exc_info) where func is os.listdir, os.remove, or os.rmdir; 216 path is the argument to that function that caused it to fail; and 217 exc_info is a tuple returned by sys.exc_info(). If ignore_errors 218 is false and onerror is None, an exception is raised. 219 220 """ 221 if ignore_errors: 222 def onerror(*args): 223 pass 224 elif onerror is None: 225 def onerror(*args): 226 raise 227 try: 228 if os.path.islink(path): 229 # symlinks to directories are forbidden, see bug #1669 230 raise OSError("Cannot call rmtree on a symbolic link") 231 except OSError: 232 onerror(os.path.islink, path, sys.exc_info()) 233 # can't continue even if onerror hook returns 234 return 235 names = [] 236 try: 237 names = os.listdir(path) 238 except os.error, err: 239 onerror(os.listdir, path, sys.exc_info()) 240 for name in names: 241 fullname = os.path.join(path, name) 242 try: 243 mode = os.lstat(fullname).st_mode 244 except os.error: 245 mode = 0 246 if stat.S_ISDIR(mode): 247 rmtree(fullname, ignore_errors, onerror) 248 else: 249 try: 250 os.remove(fullname) 251 except os.error, err: 252 onerror(os.remove, fullname, sys.exc_info()) 253 try: 254 os.rmdir(path) 255 except os.error: 256 onerror(os.rmdir, path, sys.exc_info()) 257 258 259 def _basename(path): 260 # A basename() variant which first strips the trailing slash, if present. 261 # Thus we always get the last component of the path, even for directories. 262 return os.path.basename(path.rstrip(os.path.sep)) 263 264 def move(src, dst): 265 """Recursively move a file or directory to another location. This is 266 similar to the Unix "mv" command. 267 268 If the destination is a directory or a symlink to a directory, the source 269 is moved inside the directory. The destination path must not already 270 exist. 271 272 If the destination already exists but is not a directory, it may be 273 overwritten depending on os.rename() semantics. 274 275 If the destination is on our current filesystem, then rename() is used. 276 Otherwise, src is copied to the destination and then removed. 277 A lot more could be done here... A look at a mv.c shows a lot of 278 the issues this implementation glosses over. 279 280 """ 281 real_dst = dst 282 if os.path.isdir(dst): 283 if _samefile(src, dst): 284 # We might be on a case insensitive filesystem, 285 # perform the rename anyway. 286 os.rename(src, dst) 287 return 288 289 real_dst = os.path.join(dst, _basename(src)) 290 if os.path.exists(real_dst): 291 raise Error, "Destination path '%s' already exists" % real_dst 292 try: 293 os.rename(src, real_dst) 294 except OSError: 295 if os.path.isdir(src): 296 if _destinsrc(src, dst): 297 raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst) 298 copytree(src, real_dst, symlinks=True) 299 rmtree(src) 300 else: 301 copy2(src, real_dst) 302 os.unlink(src) 303 304 def _destinsrc(src, dst): 305 src = abspath(src) 306 dst = abspath(dst) 307 if not src.endswith(os.path.sep): 308 src += os.path.sep 309 if not dst.endswith(os.path.sep): 310 dst += os.path.sep 311 return dst.startswith(src) 312 313 def _get_gid(name): 314 """Returns a gid, given a group name.""" 315 if getgrnam is None or name is None: 316 return None 317 try: 318 result = getgrnam(name) 319 except KeyError: 320 result = None 321 if result is not None: 322 return result[2] 323 return None 324 325 def _get_uid(name): 326 """Returns an uid, given a user name.""" 327 if getpwnam is None or name is None: 328 return None 329 try: 330 result = getpwnam(name) 331 except KeyError: 332 result = None 333 if result is not None: 334 return result[2] 335 return None 336 337 def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, 338 owner=None, group=None, logger=None): 339 """Create a (possibly compressed) tar file from all the files under 340 'base_dir'. 341 342 'compress' must be "gzip" (the default), "bzip2", or None. 343 344 'owner' and 'group' can be used to define an owner and a group for the 345 archive that is being built. If not provided, the current owner and group 346 will be used. 347 348 The output tar file will be named 'base_name' + ".tar", possibly plus 349 the appropriate compression extension (".gz", or ".bz2"). 350 351 Returns the output filename. 352 """ 353 tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''} 354 compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'} 355 356 # flags for compression program, each element of list will be an argument 357 if compress is not None and compress not in compress_ext.keys(): 358 raise ValueError, \ 359 ("bad value for 'compress': must be None, 'gzip' or 'bzip2'") 360 361 archive_name = base_name + '.tar' + compress_ext.get(compress, '') 362 archive_dir = os.path.dirname(archive_name) 363 364 if not os.path.exists(archive_dir): 365 if logger is not None: 366 logger.info("creating %s", archive_dir) 367 if not dry_run: 368 os.makedirs(archive_dir) 369 370 371 # creating the tarball 372 import tarfile # late import so Python build itself doesn't break 373 374 if logger is not None: 375 logger.info('Creating tar archive') 376 377 uid = _get_uid(owner) 378 gid = _get_gid(group) 379 380 def _set_uid_gid(tarinfo): 381 if gid is not None: 382 tarinfo.gid = gid 383 tarinfo.gname = group 384 if uid is not None: 385 tarinfo.uid = uid 386 tarinfo.uname = owner 387 return tarinfo 388 389 if not dry_run: 390 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) 391 try: 392 tar.add(base_dir, filter=_set_uid_gid) 393 finally: 394 tar.close() 395 396 return archive_name 397 398 def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): 399 # XXX see if we want to keep an external call here 400 if verbose: 401 zipoptions = "-r" 402 else: 403 zipoptions = "-rq" 404 from distutils.errors import DistutilsExecError 405 from distutils.spawn import spawn 406 try: 407 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) 408 except DistutilsExecError: 409 # XXX really should distinguish between "couldn't find 410 # external 'zip' command" and "zip failed". 411 raise ExecError, \ 412 ("unable to create zip file '%s': " 413 "could neither import the 'zipfile' module nor " 414 "find a standalone zip utility") % zip_filename 415 416 def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): 417 """Create a zip file from all the files under 'base_dir'. 418 419 The output zip file will be named 'base_name' + ".zip". Uses either the 420 "zipfile" Python module (if available) or the InfoZIP "zip" utility 421 (if installed and found on the default search path). If neither tool is 422 available, raises ExecError. Returns the name of the output zip 423 file. 424 """ 425 zip_filename = base_name + ".zip" 426 archive_dir = os.path.dirname(base_name) 427 428 if not os.path.exists(archive_dir): 429 if logger is not None: 430 logger.info("creating %s", archive_dir) 431 if not dry_run: 432 os.makedirs(archive_dir) 433 434 # If zipfile module is not available, try spawning an external 'zip' 435 # command. 436 try: 437 import zipfile 438 except ImportError: 439 zipfile = None 440 441 if zipfile is None: 442 _call_external_zip(base_dir, zip_filename, verbose, dry_run) 443 else: 444 if logger is not None: 445 logger.info("creating '%s' and adding '%s' to it", 446 zip_filename, base_dir) 447 448 if not dry_run: 449 zip = zipfile.ZipFile(zip_filename, "w", 450 compression=zipfile.ZIP_DEFLATED) 451 452 for dirpath, dirnames, filenames in os.walk(base_dir): 453 for name in filenames: 454 path = os.path.normpath(os.path.join(dirpath, name)) 455 if os.path.isfile(path): 456 zip.write(path, path) 457 if logger is not None: 458 logger.info("adding '%s'", path) 459 zip.close() 460 461 return zip_filename 462 463 _ARCHIVE_FORMATS = { 464 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), 465 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), 466 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), 467 'zip': (_make_zipfile, [],"ZIP file") 468 } 469 470 def get_archive_formats(): 471 """Returns a list of supported formats for archiving and unarchiving. 472 473 Each element of the returned sequence is a tuple (name, description) 474 """ 475 formats = [(name, registry[2]) for name, registry in 476 _ARCHIVE_FORMATS.items()] 477 formats.sort() 478 return formats 479 480 def register_archive_format(name, function, extra_args=None, description=''): 481 """Registers an archive format. 482 483 name is the name of the format. function is the callable that will be 484 used to create archives. If provided, extra_args is a sequence of 485 (name, value) tuples that will be passed as arguments to the callable. 486 description can be provided to describe the format, and will be returned 487 by the get_archive_formats() function. 488 """ 489 if extra_args is None: 490 extra_args = [] 491 if not isinstance(function, collections.Callable): 492 raise TypeError('The %s object is not callable' % function) 493 if not isinstance(extra_args, (tuple, list)): 494 raise TypeError('extra_args needs to be a sequence') 495 for element in extra_args: 496 if not isinstance(element, (tuple, list)) or len(element) !=2 : 497 raise TypeError('extra_args elements are : (arg_name, value)') 498 499 _ARCHIVE_FORMATS[name] = (function, extra_args, description) 500 501 def unregister_archive_format(name): 502 del _ARCHIVE_FORMATS[name] 503 504 def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, 505 dry_run=0, owner=None, group=None, logger=None): 506 """Create an archive file (eg. zip or tar). 507 508 'base_name' is the name of the file to create, minus any format-specific 509 extension; 'format' is the archive format: one of "zip", "tar", "bztar" 510 or "gztar". 511 512 'root_dir' is a directory that will be the root directory of the 513 archive; ie. we typically chdir into 'root_dir' before creating the 514 archive. 'base_dir' is the directory where we start archiving from; 515 ie. 'base_dir' will be the common prefix of all files and 516 directories in the archive. 'root_dir' and 'base_dir' both default 517 to the current directory. Returns the name of the archive file. 518 519 'owner' and 'group' are used when creating a tar archive. By default, 520 uses the current owner and group. 521 """ 522 save_cwd = os.getcwd() 523 if root_dir is not None: 524 if logger is not None: 525 logger.debug("changing into '%s'", root_dir) 526 base_name = os.path.abspath(base_name) 527 if not dry_run: 528 os.chdir(root_dir) 529 530 if base_dir is None: 531 base_dir = os.curdir 532 533 kwargs = {'dry_run': dry_run, 'logger': logger} 534 535 try: 536 format_info = _ARCHIVE_FORMATS[format] 537 except KeyError: 538 raise ValueError, "unknown archive format '%s'" % format 539 540 func = format_info[0] 541 for arg, val in format_info[1]: 542 kwargs[arg] = val 543 544 if format != 'zip': 545 kwargs['owner'] = owner 546 kwargs['group'] = group 547 548 try: 549 filename = func(base_name, base_dir, **kwargs) 550 finally: 551 if root_dir is not None: 552 if logger is not None: 553 logger.debug("changing back to '%s'", save_cwd) 554 os.chdir(save_cwd) 555 556 return filename 557