Home | History | Annotate | Download | only in python2.7
      1 """Common operations on Posix pathnames.
      2 
      3 Instead of importing this module directly, import os and refer to
      4 this module as os.path.  The "os.path" name is an alias for this
      5 module on Posix systems; on other systems (e.g. Mac, Windows),
      6 os.path provides the same operations in a manner specific to that
      7 platform, and is an alias to another module (e.g. macpath, ntpath).
      8 
      9 Some of this can actually be useful on non-Posix systems too, e.g.
     10 for manipulation of the pathname component of URLs.
     11 """
     12 
     13 import os
     14 import sys
     15 import stat
     16 import genericpath
     17 import warnings
     18 from genericpath import *
     19 
     20 try:
     21     _unicode = unicode
     22 except NameError:
     23     # If Python is built without Unicode support, the unicode type
     24     # will not exist. Fake one.
     25     class _unicode(object):
     26         pass
     27 
     28 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
     29            "basename","dirname","commonprefix","getsize","getmtime",
     30            "getatime","getctime","islink","exists","lexists","isdir","isfile",
     31            "ismount","walk","expanduser","expandvars","normpath","abspath",
     32            "samefile","sameopenfile","samestat",
     33            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
     34            "devnull","realpath","supports_unicode_filenames","relpath"]
     35 
     36 # strings representing various path-related bits and pieces
     37 curdir = '.'
     38 pardir = '..'
     39 extsep = '.'
     40 sep = '/'
     41 pathsep = ':'
     42 defpath = ':/bin:/usr/bin'
     43 altsep = None
     44 devnull = '/dev/null'
     45 
     46 # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
     47 # On MS-DOS this may also turn slashes into backslashes; however, other
     48 # normalizations (such as optimizing '../' away) are not allowed
     49 # (another function should be defined to do that).
     50 
     51 def normcase(s):
     52     """Normalize case of pathname.  Has no effect under Posix"""
     53     return s
     54 
     55 
     56 # Return whether a path is absolute.
     57 # Trivial in Posix, harder on the Mac or MS-DOS.
     58 
     59 def isabs(s):
     60     """Test whether a path is absolute"""
     61     return s.startswith('/')
     62 
     63 
     64 # Join pathnames.
     65 # Ignore the previous parts if a part is absolute.
     66 # Insert a '/' unless the first part is empty or already ends in '/'.
     67 
     68 def join(a, *p):
     69     """Join two or more pathname components, inserting '/' as needed.
     70     If any component is an absolute path, all previous path components
     71     will be discarded.  An empty last part will result in a path that
     72     ends with a separator."""
     73     path = a
     74     for b in p:
     75         if b.startswith('/'):
     76             path = b
     77         elif path == '' or path.endswith('/'):
     78             path +=  b
     79         else:
     80             path += '/' + b
     81     return path
     82 
     83 
     84 # Split a path in head (everything up to the last '/') and tail (the
     85 # rest).  If the path ends in '/', tail will be empty.  If there is no
     86 # '/' in the path, head  will be empty.
     87 # Trailing '/'es are stripped from head unless it is the root.
     88 
     89 def split(p):
     90     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
     91     everything after the final slash.  Either part may be empty."""
     92     i = p.rfind('/') + 1
     93     head, tail = p[:i], p[i:]
     94     if head and head != '/'*len(head):
     95         head = head.rstrip('/')
     96     return head, tail
     97 
     98 
     99 # Split a path in root and extension.
    100 # The extension is everything starting at the last dot in the last
    101 # pathname component; the root is everything before that.
    102 # It is always true that root + ext == p.
    103 
    104 def splitext(p):
    105     return genericpath._splitext(p, sep, altsep, extsep)
    106 splitext.__doc__ = genericpath._splitext.__doc__
    107 
    108 # Split a pathname into a drive specification and the rest of the
    109 # path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
    110 
    111 def splitdrive(p):
    112     """Split a pathname into drive and path. On Posix, drive is always
    113     empty."""
    114     return '', p
    115 
    116 
    117 # Return the tail (basename) part of a path, same as split(path)[1].
    118 
    119 def basename(p):
    120     """Returns the final component of a pathname"""
    121     i = p.rfind('/') + 1
    122     return p[i:]
    123 
    124 
    125 # Return the head (dirname) part of a path, same as split(path)[0].
    126 
    127 def dirname(p):
    128     """Returns the directory component of a pathname"""
    129     i = p.rfind('/') + 1
    130     head = p[:i]
    131     if head and head != '/'*len(head):
    132         head = head.rstrip('/')
    133     return head
    134 
    135 
    136 # Is a path a symbolic link?
    137 # This will always return false on systems where os.lstat doesn't exist.
    138 
    139 def islink(path):
    140     """Test whether a path is a symbolic link"""
    141     try:
    142         st = os.lstat(path)
    143     except (os.error, AttributeError):
    144         return False
    145     return stat.S_ISLNK(st.st_mode)
    146 
    147 # Being true for dangling symbolic links is also useful.
    148 
    149 def lexists(path):
    150     """Test whether a path exists.  Returns True for broken symbolic links"""
    151     try:
    152         os.lstat(path)
    153     except os.error:
    154         return False
    155     return True
    156 
    157 
    158 # Are two filenames really pointing to the same file?
    159 
    160 def samefile(f1, f2):
    161     """Test whether two pathnames reference the same actual file"""
    162     s1 = os.stat(f1)
    163     s2 = os.stat(f2)
    164     return samestat(s1, s2)
    165 
    166 
    167 # Are two open files really referencing the same file?
    168 # (Not necessarily the same file descriptor!)
    169 
    170 def sameopenfile(fp1, fp2):
    171     """Test whether two open file objects reference the same file"""
    172     s1 = os.fstat(fp1)
    173     s2 = os.fstat(fp2)
    174     return samestat(s1, s2)
    175 
    176 
    177 # Are two stat buffers (obtained from stat, fstat or lstat)
    178 # describing the same file?
    179 
    180 def samestat(s1, s2):
    181     """Test whether two stat buffers reference the same file"""
    182     return s1.st_ino == s2.st_ino and \
    183            s1.st_dev == s2.st_dev
    184 
    185 
    186 # Is a path a mount point?
    187 # (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
    188 
    189 def ismount(path):
    190     """Test whether a path is a mount point"""
    191     if islink(path):
    192         # A symlink can never be a mount point
    193         return False
    194     try:
    195         s1 = os.lstat(path)
    196         s2 = os.lstat(join(path, '..'))
    197     except os.error:
    198         return False # It doesn't exist -- so not a mount point :-)
    199     dev1 = s1.st_dev
    200     dev2 = s2.st_dev
    201     if dev1 != dev2:
    202         return True     # path/.. on a different device as path
    203     ino1 = s1.st_ino
    204     ino2 = s2.st_ino
    205     if ino1 == ino2:
    206         return True     # path/.. is the same i-node as path
    207     return False
    208 
    209 
    210 # Directory tree walk.
    211 # For each directory under top (including top itself, but excluding
    212 # '.' and '..'), func(arg, dirname, filenames) is called, where
    213 # dirname is the name of the directory and filenames is the list
    214 # of files (and subdirectories etc.) in the directory.
    215 # The func may modify the filenames list, to implement a filter,
    216 # or to impose a different order of visiting.
    217 
    218 def walk(top, func, arg):
    219     """Directory tree walk with callback function.
    220 
    221     For each directory in the directory tree rooted at top (including top
    222     itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
    223     dirname is the name of the directory, and fnames a list of the names of
    224     the files and subdirectories in dirname (excluding '.' and '..').  func
    225     may modify the fnames list in-place (e.g. via del or slice assignment),
    226     and walk will only recurse into the subdirectories whose names remain in
    227     fnames; this can be used to implement a filter, or to impose a specific
    228     order of visiting.  No semantics are defined for, or required of, arg,
    229     beyond that arg is always passed to func.  It can be used, e.g., to pass
    230     a filename pattern, or a mutable object designed to accumulate
    231     statistics.  Passing None for arg is common."""
    232     warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
    233                       stacklevel=2)
    234     try:
    235         names = os.listdir(top)
    236     except os.error:
    237         return
    238     func(arg, top, names)
    239     for name in names:
    240         name = join(top, name)
    241         try:
    242             st = os.lstat(name)
    243         except os.error:
    244             continue
    245         if stat.S_ISDIR(st.st_mode):
    246             walk(name, func, arg)
    247 
    248 
    249 # Expand paths beginning with '~' or '~user'.
    250 # '~' means $HOME; '~user' means that user's home directory.
    251 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
    252 # the path is returned unchanged (leaving error reporting to whatever
    253 # function is called with the expanded path as argument).
    254 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
    255 # (A function should also be defined to do full *sh-style environment
    256 # variable expansion.)
    257 
    258 def expanduser(path):
    259     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
    260     do nothing."""
    261     if not path.startswith('~'):
    262         return path
    263     i = path.find('/', 1)
    264     if i < 0:
    265         i = len(path)
    266     if i == 1:
    267         if 'HOME' not in os.environ:
    268             import pwd
    269             userhome = pwd.getpwuid(os.getuid()).pw_dir
    270         else:
    271             userhome = os.environ['HOME']
    272     else:
    273         import pwd
    274         try:
    275             pwent = pwd.getpwnam(path[1:i])
    276         except KeyError:
    277             return path
    278         userhome = pwent.pw_dir
    279     userhome = userhome.rstrip('/')
    280     return (userhome + path[i:]) or '/'
    281 
    282 
    283 # Expand paths containing shell variable substitutions.
    284 # This expands the forms $variable and ${variable} only.
    285 # Non-existent variables are left unchanged.
    286 
    287 _varprog = None
    288 
    289 def expandvars(path):
    290     """Expand shell variables of form $var and ${var}.  Unknown variables
    291     are left unchanged."""
    292     global _varprog
    293     if '$' not in path:
    294         return path
    295     if not _varprog:
    296         import re
    297         _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
    298     i = 0
    299     while True:
    300         m = _varprog.search(path, i)
    301         if not m:
    302             break
    303         i, j = m.span(0)
    304         name = m.group(1)
    305         if name.startswith('{') and name.endswith('}'):
    306             name = name[1:-1]
    307         if name in os.environ:
    308             tail = path[j:]
    309             path = path[:i] + os.environ[name]
    310             i = len(path)
    311             path += tail
    312         else:
    313             i = j
    314     return path
    315 
    316 
    317 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
    318 # It should be understood that this may change the meaning of the path
    319 # if it contains symbolic links!
    320 
    321 def normpath(path):
    322     """Normalize path, eliminating double slashes, etc."""
    323     # Preserve unicode (if path is unicode)
    324     slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.')
    325     if path == '':
    326         return dot
    327     initial_slashes = path.startswith('/')
    328     # POSIX allows one or two initial slashes, but treats three or more
    329     # as single slash.
    330     if (initial_slashes and
    331         path.startswith('//') and not path.startswith('///')):
    332         initial_slashes = 2
    333     comps = path.split('/')
    334     new_comps = []
    335     for comp in comps:
    336         if comp in ('', '.'):
    337             continue
    338         if (comp != '..' or (not initial_slashes and not new_comps) or
    339              (new_comps and new_comps[-1] == '..')):
    340             new_comps.append(comp)
    341         elif new_comps:
    342             new_comps.pop()
    343     comps = new_comps
    344     path = slash.join(comps)
    345     if initial_slashes:
    346         path = slash*initial_slashes + path
    347     return path or dot
    348 
    349 
    350 def abspath(path):
    351     """Return an absolute path."""
    352     if not isabs(path):
    353         if isinstance(path, _unicode):
    354             cwd = os.getcwdu()
    355         else:
    356             cwd = os.getcwd()
    357         path = join(cwd, path)
    358     return normpath(path)
    359 
    360 
    361 # Return a canonical path (i.e. the absolute location of a file on the
    362 # filesystem).
    363 
    364 def realpath(filename):
    365     """Return the canonical path of the specified filename, eliminating any
    366 symbolic links encountered in the path."""
    367     path, ok = _joinrealpath('', filename, {})
    368     return abspath(path)
    369 
    370 # Join two paths, normalizing ang eliminating any symbolic links
    371 # encountered in the second path.
    372 def _joinrealpath(path, rest, seen):
    373     if isabs(rest):
    374         rest = rest[1:]
    375         path = sep
    376 
    377     while rest:
    378         name, _, rest = rest.partition(sep)
    379         if not name or name == curdir:
    380             # current dir
    381             continue
    382         if name == pardir:
    383             # parent dir
    384             if path:
    385                 path, name = split(path)
    386                 if name == pardir:
    387                     path = join(path, pardir, pardir)
    388             else:
    389                 path = pardir
    390             continue
    391         newpath = join(path, name)
    392         if not islink(newpath):
    393             path = newpath
    394             continue
    395         # Resolve the symbolic link
    396         if newpath in seen:
    397             # Already seen this path
    398             path = seen[newpath]
    399             if path is not None:
    400                 # use cached value
    401                 continue
    402             # The symlink is not resolved, so we must have a symlink loop.
    403             # Return already resolved part + rest of the path unchanged.
    404             return join(newpath, rest), False
    405         seen[newpath] = None # not resolved symlink
    406         path, ok = _joinrealpath(path, os.readlink(newpath), seen)
    407         if not ok:
    408             return join(path, rest), False
    409         seen[newpath] = path # resolved symlink
    410 
    411     return path, True
    412 
    413 
    414 supports_unicode_filenames = (sys.platform == 'darwin')
    415 
    416 def relpath(path, start=curdir):
    417     """Return a relative version of a path"""
    418 
    419     if not path:
    420         raise ValueError("no path specified")
    421 
    422     start_list = [x for x in abspath(start).split(sep) if x]
    423     path_list = [x for x in abspath(path).split(sep) if x]
    424 
    425     # Work out how much of the filepath is shared by start and path.
    426     i = len(commonprefix([start_list, path_list]))
    427 
    428     rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
    429     if not rel_list:
    430         return curdir
    431     return join(*rel_list)
    432