Home | History | Annotate | Download | only in Lib
      1 """Common operations on Posix pathnames.
      2 
      3 Instead of importing this module directly, import os and refer to
      4 this module as os.path.  The "os.path" name is an alias for this
      5 module on Posix systems; on other systems (e.g. Mac, Windows),
      6 os.path provides the same operations in a manner specific to that
      7 platform, and is an alias to another module (e.g. macpath, ntpath).
      8 
      9 Some of this can actually be useful on non-Posix systems too, e.g.
     10 for manipulation of the pathname component of URLs.
     11 """
     12 
     13 import os
     14 import sys
     15 import stat
     16 import genericpath
     17 import warnings
     18 from genericpath import *
     19 from genericpath import _unicode
     20 
     21 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
     22            "basename","dirname","commonprefix","getsize","getmtime",
     23            "getatime","getctime","islink","exists","lexists","isdir","isfile",
     24            "ismount","walk","expanduser","expandvars","normpath","abspath",
     25            "samefile","sameopenfile","samestat",
     26            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
     27            "devnull","realpath","supports_unicode_filenames","relpath"]
     28 
     29 # strings representing various path-related bits and pieces
     30 curdir = '.'
     31 pardir = '..'
     32 extsep = '.'
     33 sep = '/'
     34 pathsep = ':'
     35 defpath = ':/bin:/usr/bin'
     36 altsep = None
     37 devnull = '/dev/null'
     38 
     39 # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
     40 # On MS-DOS this may also turn slashes into backslashes; however, other
     41 # normalizations (such as optimizing '../' away) are not allowed
     42 # (another function should be defined to do that).
     43 
     44 def normcase(s):
     45     """Normalize case of pathname.  Has no effect under Posix"""
     46     return s
     47 
     48 
     49 # Return whether a path is absolute.
     50 # Trivial in Posix, harder on the Mac or MS-DOS.
     51 
     52 def isabs(s):
     53     """Test whether a path is absolute"""
     54     return s.startswith('/')
     55 
     56 
     57 # Join pathnames.
     58 # Ignore the previous parts if a part is absolute.
     59 # Insert a '/' unless the first part is empty or already ends in '/'.
     60 
     61 def join(a, *p):
     62     """Join two or more pathname components, inserting '/' as needed.
     63     If any component is an absolute path, all previous path components
     64     will be discarded.  An empty last part will result in a path that
     65     ends with a separator."""
     66     path = a
     67     for b in p:
     68         if b.startswith('/'):
     69             path = b
     70         elif path == '' or path.endswith('/'):
     71             path +=  b
     72         else:
     73             path += '/' + b
     74     return path
     75 
     76 
     77 # Split a path in head (everything up to the last '/') and tail (the
     78 # rest).  If the path ends in '/', tail will be empty.  If there is no
     79 # '/' in the path, head  will be empty.
     80 # Trailing '/'es are stripped from head unless it is the root.
     81 
     82 def split(p):
     83     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
     84     everything after the final slash.  Either part may be empty."""
     85     i = p.rfind('/') + 1
     86     head, tail = p[:i], p[i:]
     87     if head and head != '/'*len(head):
     88         head = head.rstrip('/')
     89     return head, tail
     90 
     91 
     92 # Split a path in root and extension.
     93 # The extension is everything starting at the last dot in the last
     94 # pathname component; the root is everything before that.
     95 # It is always true that root + ext == p.
     96 
     97 def splitext(p):
     98     return genericpath._splitext(p, sep, altsep, extsep)
     99 splitext.__doc__ = genericpath._splitext.__doc__
    100 
    101 # Split a pathname into a drive specification and the rest of the
    102 # path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
    103 
    104 def splitdrive(p):
    105     """Split a pathname into drive and path. On Posix, drive is always
    106     empty."""
    107     return '', p
    108 
    109 
    110 # Return the tail (basename) part of a path, same as split(path)[1].
    111 
    112 def basename(p):
    113     """Returns the final component of a pathname"""
    114     i = p.rfind('/') + 1
    115     return p[i:]
    116 
    117 
    118 # Return the head (dirname) part of a path, same as split(path)[0].
    119 
    120 def dirname(p):
    121     """Returns the directory component of a pathname"""
    122     i = p.rfind('/') + 1
    123     head = p[:i]
    124     if head and head != '/'*len(head):
    125         head = head.rstrip('/')
    126     return head
    127 
    128 
    129 # Is a path a symbolic link?
    130 # This will always return false on systems where os.lstat doesn't exist.
    131 
    132 def islink(path):
    133     """Test whether a path is a symbolic link"""
    134     try:
    135         st = os.lstat(path)
    136     except (os.error, AttributeError):
    137         return False
    138     return stat.S_ISLNK(st.st_mode)
    139 
    140 # Being true for dangling symbolic links is also useful.
    141 
    142 def lexists(path):
    143     """Test whether a path exists.  Returns True for broken symbolic links"""
    144     try:
    145         os.lstat(path)
    146     except os.error:
    147         return False
    148     return True
    149 
    150 
    151 # Are two filenames really pointing to the same file?
    152 
    153 def samefile(f1, f2):
    154     """Test whether two pathnames reference the same actual file"""
    155     s1 = os.stat(f1)
    156     s2 = os.stat(f2)
    157     return samestat(s1, s2)
    158 
    159 
    160 # Are two open files really referencing the same file?
    161 # (Not necessarily the same file descriptor!)
    162 
    163 def sameopenfile(fp1, fp2):
    164     """Test whether two open file objects reference the same file"""
    165     s1 = os.fstat(fp1)
    166     s2 = os.fstat(fp2)
    167     return samestat(s1, s2)
    168 
    169 
    170 # Are two stat buffers (obtained from stat, fstat or lstat)
    171 # describing the same file?
    172 
    173 def samestat(s1, s2):
    174     """Test whether two stat buffers reference the same file"""
    175     return s1.st_ino == s2.st_ino and \
    176            s1.st_dev == s2.st_dev
    177 
    178 
    179 # Is a path a mount point?
    180 # (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
    181 
    182 def ismount(path):
    183     """Test whether a path is a mount point"""
    184     if islink(path):
    185         # A symlink can never be a mount point
    186         return False
    187     try:
    188         s1 = os.lstat(path)
    189         s2 = os.lstat(realpath(join(path, '..')))
    190     except os.error:
    191         return False # It doesn't exist -- so not a mount point :-)
    192     dev1 = s1.st_dev
    193     dev2 = s2.st_dev
    194     if dev1 != dev2:
    195         return True     # path/.. on a different device as path
    196     ino1 = s1.st_ino
    197     ino2 = s2.st_ino
    198     if ino1 == ino2:
    199         return True     # path/.. is the same i-node as path
    200     return False
    201 
    202 
    203 # Directory tree walk.
    204 # For each directory under top (including top itself, but excluding
    205 # '.' and '..'), func(arg, dirname, filenames) is called, where
    206 # dirname is the name of the directory and filenames is the list
    207 # of files (and subdirectories etc.) in the directory.
    208 # The func may modify the filenames list, to implement a filter,
    209 # or to impose a different order of visiting.
    210 
    211 def walk(top, func, arg):
    212     """Directory tree walk with callback function.
    213 
    214     For each directory in the directory tree rooted at top (including top
    215     itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
    216     dirname is the name of the directory, and fnames a list of the names of
    217     the files and subdirectories in dirname (excluding '.' and '..').  func
    218     may modify the fnames list in-place (e.g. via del or slice assignment),
    219     and walk will only recurse into the subdirectories whose names remain in
    220     fnames; this can be used to implement a filter, or to impose a specific
    221     order of visiting.  No semantics are defined for, or required of, arg,
    222     beyond that arg is always passed to func.  It can be used, e.g., to pass
    223     a filename pattern, or a mutable object designed to accumulate
    224     statistics.  Passing None for arg is common."""
    225     warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
    226                       stacklevel=2)
    227     try:
    228         names = os.listdir(top)
    229     except os.error:
    230         return
    231     func(arg, top, names)
    232     for name in names:
    233         name = join(top, name)
    234         try:
    235             st = os.lstat(name)
    236         except os.error:
    237             continue
    238         if stat.S_ISDIR(st.st_mode):
    239             walk(name, func, arg)
    240 
    241 
    242 # Expand paths beginning with '~' or '~user'.
    243 # '~' means $HOME; '~user' means that user's home directory.
    244 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
    245 # the path is returned unchanged (leaving error reporting to whatever
    246 # function is called with the expanded path as argument).
    247 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
    248 # (A function should also be defined to do full *sh-style environment
    249 # variable expansion.)
    250 
    251 def expanduser(path):
    252     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
    253     do nothing."""
    254     if not path.startswith('~'):
    255         return path
    256     i = path.find('/', 1)
    257     if i < 0:
    258         i = len(path)
    259     if i == 1:
    260         if 'HOME' not in os.environ:
    261             import pwd
    262             userhome = pwd.getpwuid(os.getuid()).pw_dir
    263         else:
    264             userhome = os.environ['HOME']
    265     else:
    266         import pwd
    267         try:
    268             pwent = pwd.getpwnam(path[1:i])
    269         except KeyError:
    270             return path
    271         userhome = pwent.pw_dir
    272     userhome = userhome.rstrip('/')
    273     return (userhome + path[i:]) or '/'
    274 
    275 
    276 # Expand paths containing shell variable substitutions.
    277 # This expands the forms $variable and ${variable} only.
    278 # Non-existent variables are left unchanged.
    279 
    280 _varprog = None
    281 _uvarprog = None
    282 
    283 def expandvars(path):
    284     """Expand shell variables of form $var and ${var}.  Unknown variables
    285     are left unchanged."""
    286     global _varprog, _uvarprog
    287     if '$' not in path:
    288         return path
    289     if isinstance(path, _unicode):
    290         if not _uvarprog:
    291             import re
    292             _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE)
    293         varprog = _uvarprog
    294         encoding = sys.getfilesystemencoding()
    295     else:
    296         if not _varprog:
    297             import re
    298             _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
    299         varprog = _varprog
    300         encoding = None
    301     i = 0
    302     while True:
    303         m = varprog.search(path, i)
    304         if not m:
    305             break
    306         i, j = m.span(0)
    307         name = m.group(1)
    308         if name.startswith('{') and name.endswith('}'):
    309             name = name[1:-1]
    310         if encoding:
    311             name = name.encode(encoding)
    312         if name in os.environ:
    313             tail = path[j:]
    314             value = os.environ[name]
    315             if encoding:
    316                 value = value.decode(encoding)
    317             path = path[:i] + value
    318             i = len(path)
    319             path += tail
    320         else:
    321             i = j
    322     return path
    323 
    324 
    325 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
    326 # It should be understood that this may change the meaning of the path
    327 # if it contains symbolic links!
    328 
    329 def normpath(path):
    330     """Normalize path, eliminating double slashes, etc."""
    331     # Preserve unicode (if path is unicode)
    332     slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.')
    333     if path == '':
    334         return dot
    335     initial_slashes = path.startswith('/')
    336     # POSIX allows one or two initial slashes, but treats three or more
    337     # as single slash.
    338     if (initial_slashes and
    339         path.startswith('//') and not path.startswith('///')):
    340         initial_slashes = 2
    341     comps = path.split('/')
    342     new_comps = []
    343     for comp in comps:
    344         if comp in ('', '.'):
    345             continue
    346         if (comp != '..' or (not initial_slashes and not new_comps) or
    347              (new_comps and new_comps[-1] == '..')):
    348             new_comps.append(comp)
    349         elif new_comps:
    350             new_comps.pop()
    351     comps = new_comps
    352     path = slash.join(comps)
    353     if initial_slashes:
    354         path = slash*initial_slashes + path
    355     return path or dot
    356 
    357 
    358 def abspath(path):
    359     """Return an absolute path."""
    360     if not isabs(path):
    361         if isinstance(path, _unicode):
    362             cwd = os.getcwdu()
    363         else:
    364             cwd = os.getcwd()
    365         path = join(cwd, path)
    366     return normpath(path)
    367 
    368 
    369 # Return a canonical path (i.e. the absolute location of a file on the
    370 # filesystem).
    371 
    372 def realpath(filename):
    373     """Return the canonical path of the specified filename, eliminating any
    374 symbolic links encountered in the path."""
    375     path, ok = _joinrealpath('', filename, {})
    376     return abspath(path)
    377 
    378 # Join two paths, normalizing and eliminating any symbolic links
    379 # encountered in the second path.
    380 def _joinrealpath(path, rest, seen):
    381     if isabs(rest):
    382         rest = rest[1:]
    383         path = sep
    384 
    385     while rest:
    386         name, _, rest = rest.partition(sep)
    387         if not name or name == curdir:
    388             # current dir
    389             continue
    390         if name == pardir:
    391             # parent dir
    392             if path:
    393                 path, name = split(path)
    394                 if name == pardir:
    395                     path = join(path, pardir, pardir)
    396             else:
    397                 path = pardir
    398             continue
    399         newpath = join(path, name)
    400         if not islink(newpath):
    401             path = newpath
    402             continue
    403         # Resolve the symbolic link
    404         if newpath in seen:
    405             # Already seen this path
    406             path = seen[newpath]
    407             if path is not None:
    408                 # use cached value
    409                 continue
    410             # The symlink is not resolved, so we must have a symlink loop.
    411             # Return already resolved part + rest of the path unchanged.
    412             return join(newpath, rest), False
    413         seen[newpath] = None # not resolved symlink
    414         path, ok = _joinrealpath(path, os.readlink(newpath), seen)
    415         if not ok:
    416             return join(path, rest), False
    417         seen[newpath] = path # resolved symlink
    418 
    419     return path, True
    420 
    421 
    422 supports_unicode_filenames = (sys.platform == 'darwin')
    423 
    424 def relpath(path, start=curdir):
    425     """Return a relative version of a path"""
    426 
    427     if not path:
    428         raise ValueError("no path specified")
    429 
    430     start_list = [x for x in abspath(start).split(sep) if x]
    431     path_list = [x for x in abspath(path).split(sep) if x]
    432 
    433     # Work out how much of the filepath is shared by start and path.
    434     i = len(commonprefix([start_list, path_list]))
    435 
    436     rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
    437     if not rel_list:
    438         return curdir
    439     return join(*rel_list)
    440