Home | History | Annotate | Download | only in Lib
      1 """Common operations on Posix pathnames.
      2 
      3 Instead of importing this module directly, import os and refer to
      4 this module as os.path.  The "os.path" name is an alias for this
      5 module on Posix systems; on other systems (e.g. Mac, Windows),
      6 os.path provides the same operations in a manner specific to that
      7 platform, and is an alias to another module (e.g. macpath, ntpath).
      8 
      9 Some of this can actually be useful on non-Posix systems too, e.g.
     10 for manipulation of the pathname component of URLs.
     11 """
     12 
     13 import os
     14 import sys
     15 import stat
     16 import genericpath
     17 from genericpath import *
     18 
     19 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
     20            "basename","dirname","commonprefix","getsize","getmtime",
     21            "getatime","getctime","islink","exists","lexists","isdir","isfile",
     22            "ismount", "expanduser","expandvars","normpath","abspath",
     23            "samefile","sameopenfile","samestat",
     24            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
     25            "devnull","realpath","supports_unicode_filenames","relpath",
     26            "commonpath"]
     27 
     28 # Strings representing various path-related bits and pieces.
     29 # These are primarily for export; internally, they are hardcoded.
     30 curdir = '.'
     31 pardir = '..'
     32 extsep = '.'
     33 sep = '/'
     34 pathsep = ':'
     35 defpath = ':/bin:/usr/bin'
     36 altsep = None
     37 devnull = '/dev/null'
     38 
     39 def _get_sep(path):
     40     if isinstance(path, bytes):
     41         return b'/'
     42     else:
     43         return '/'
     44 
     45 # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
     46 # On MS-DOS this may also turn slashes into backslashes; however, other
     47 # normalizations (such as optimizing '../' away) are not allowed
     48 # (another function should be defined to do that).
     49 
     50 def normcase(s):
     51     """Normalize case of pathname.  Has no effect under Posix"""
     52     s = os.fspath(s)
     53     if not isinstance(s, (bytes, str)):
     54         raise TypeError("normcase() argument must be str or bytes, "
     55                         "not '{}'".format(s.__class__.__name__))
     56     return s
     57 
     58 
     59 # Return whether a path is absolute.
     60 # Trivial in Posix, harder on the Mac or MS-DOS.
     61 
     62 def isabs(s):
     63     """Test whether a path is absolute"""
     64     s = os.fspath(s)
     65     sep = _get_sep(s)
     66     return s.startswith(sep)
     67 
     68 
     69 # Join pathnames.
     70 # Ignore the previous parts if a part is absolute.
     71 # Insert a '/' unless the first part is empty or already ends in '/'.
     72 
     73 def join(a, *p):
     74     """Join two or more pathname components, inserting '/' as needed.
     75     If any component is an absolute path, all previous path components
     76     will be discarded.  An empty last part will result in a path that
     77     ends with a separator."""
     78     a = os.fspath(a)
     79     sep = _get_sep(a)
     80     path = a
     81     try:
     82         if not p:
     83             path[:0] + sep  #23780: Ensure compatible data type even if p is null.
     84         for b in map(os.fspath, p):
     85             if b.startswith(sep):
     86                 path = b
     87             elif not path or path.endswith(sep):
     88                 path += b
     89             else:
     90                 path += sep + b
     91     except (TypeError, AttributeError, BytesWarning):
     92         genericpath._check_arg_types('join', a, *p)
     93         raise
     94     return path
     95 
     96 
     97 # Split a path in head (everything up to the last '/') and tail (the
     98 # rest).  If the path ends in '/', tail will be empty.  If there is no
     99 # '/' in the path, head  will be empty.
    100 # Trailing '/'es are stripped from head unless it is the root.
    101 
    102 def split(p):
    103     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
    104     everything after the final slash.  Either part may be empty."""
    105     p = os.fspath(p)
    106     sep = _get_sep(p)
    107     i = p.rfind(sep) + 1
    108     head, tail = p[:i], p[i:]
    109     if head and head != sep*len(head):
    110         head = head.rstrip(sep)
    111     return head, tail
    112 
    113 
    114 # Split a path in root and extension.
    115 # The extension is everything starting at the last dot in the last
    116 # pathname component; the root is everything before that.
    117 # It is always true that root + ext == p.
    118 
    119 def splitext(p):
    120     p = os.fspath(p)
    121     if isinstance(p, bytes):
    122         sep = b'/'
    123         extsep = b'.'
    124     else:
    125         sep = '/'
    126         extsep = '.'
    127     return genericpath._splitext(p, sep, None, extsep)
    128 splitext.__doc__ = genericpath._splitext.__doc__
    129 
    130 # Split a pathname into a drive specification and the rest of the
    131 # path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
    132 
    133 def splitdrive(p):
    134     """Split a pathname into drive and path. On Posix, drive is always
    135     empty."""
    136     p = os.fspath(p)
    137     return p[:0], p
    138 
    139 
    140 # Return the tail (basename) part of a path, same as split(path)[1].
    141 
    142 def basename(p):
    143     """Returns the final component of a pathname"""
    144     p = os.fspath(p)
    145     sep = _get_sep(p)
    146     i = p.rfind(sep) + 1
    147     return p[i:]
    148 
    149 
    150 # Return the head (dirname) part of a path, same as split(path)[0].
    151 
    152 def dirname(p):
    153     """Returns the directory component of a pathname"""
    154     p = os.fspath(p)
    155     sep = _get_sep(p)
    156     i = p.rfind(sep) + 1
    157     head = p[:i]
    158     if head and head != sep*len(head):
    159         head = head.rstrip(sep)
    160     return head
    161 
    162 
    163 # Is a path a symbolic link?
    164 # This will always return false on systems where os.lstat doesn't exist.
    165 
    166 def islink(path):
    167     """Test whether a path is a symbolic link"""
    168     try:
    169         st = os.lstat(path)
    170     except (OSError, AttributeError):
    171         return False
    172     return stat.S_ISLNK(st.st_mode)
    173 
    174 # Being true for dangling symbolic links is also useful.
    175 
    176 def lexists(path):
    177     """Test whether a path exists.  Returns True for broken symbolic links"""
    178     try:
    179         os.lstat(path)
    180     except OSError:
    181         return False
    182     return True
    183 
    184 
    185 # Is a path a mount point?
    186 # (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
    187 
    188 def ismount(path):
    189     """Test whether a path is a mount point"""
    190     try:
    191         s1 = os.lstat(path)
    192     except OSError:
    193         # It doesn't exist -- so not a mount point. :-)
    194         return False
    195     else:
    196         # A symlink can never be a mount point
    197         if stat.S_ISLNK(s1.st_mode):
    198             return False
    199 
    200     if isinstance(path, bytes):
    201         parent = join(path, b'..')
    202     else:
    203         parent = join(path, '..')
    204     parent = realpath(parent)
    205     try:
    206         s2 = os.lstat(parent)
    207     except OSError:
    208         return False
    209 
    210     dev1 = s1.st_dev
    211     dev2 = s2.st_dev
    212     if dev1 != dev2:
    213         return True     # path/.. on a different device as path
    214     ino1 = s1.st_ino
    215     ino2 = s2.st_ino
    216     if ino1 == ino2:
    217         return True     # path/.. is the same i-node as path
    218     return False
    219 
    220 
    221 # Expand paths beginning with '~' or '~user'.
    222 # '~' means $HOME; '~user' means that user's home directory.
    223 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
    224 # the path is returned unchanged (leaving error reporting to whatever
    225 # function is called with the expanded path as argument).
    226 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
    227 # (A function should also be defined to do full *sh-style environment
    228 # variable expansion.)
    229 
    230 def expanduser(path):
    231     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
    232     do nothing."""
    233     path = os.fspath(path)
    234     if isinstance(path, bytes):
    235         tilde = b'~'
    236     else:
    237         tilde = '~'
    238     if not path.startswith(tilde):
    239         return path
    240     sep = _get_sep(path)
    241     i = path.find(sep, 1)
    242     if i < 0:
    243         i = len(path)
    244     if i == 1:
    245         if 'HOME' not in os.environ:
    246             import pwd
    247             userhome = pwd.getpwuid(os.getuid()).pw_dir
    248         else:
    249             userhome = os.environ['HOME']
    250     else:
    251         import pwd
    252         name = path[1:i]
    253         if isinstance(name, bytes):
    254             name = str(name, 'ASCII')
    255         try:
    256             pwent = pwd.getpwnam(name)
    257         except KeyError:
    258             return path
    259         userhome = pwent.pw_dir
    260     if isinstance(path, bytes):
    261         userhome = os.fsencode(userhome)
    262         root = b'/'
    263     else:
    264         root = '/'
    265     userhome = userhome.rstrip(root)
    266     return (userhome + path[i:]) or root
    267 
    268 
    269 # Expand paths containing shell variable substitutions.
    270 # This expands the forms $variable and ${variable} only.
    271 # Non-existent variables are left unchanged.
    272 
    273 _varprog = None
    274 _varprogb = None
    275 
    276 def expandvars(path):
    277     """Expand shell variables of form $var and ${var}.  Unknown variables
    278     are left unchanged."""
    279     path = os.fspath(path)
    280     global _varprog, _varprogb
    281     if isinstance(path, bytes):
    282         if b'$' not in path:
    283             return path
    284         if not _varprogb:
    285             import re
    286             _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
    287         search = _varprogb.search
    288         start = b'{'
    289         end = b'}'
    290         environ = getattr(os, 'environb', None)
    291     else:
    292         if '$' not in path:
    293             return path
    294         if not _varprog:
    295             import re
    296             _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
    297         search = _varprog.search
    298         start = '{'
    299         end = '}'
    300         environ = os.environ
    301     i = 0
    302     while True:
    303         m = search(path, i)
    304         if not m:
    305             break
    306         i, j = m.span(0)
    307         name = m.group(1)
    308         if name.startswith(start) and name.endswith(end):
    309             name = name[1:-1]
    310         try:
    311             if environ is None:
    312                 value = os.fsencode(os.environ[os.fsdecode(name)])
    313             else:
    314                 value = environ[name]
    315         except KeyError:
    316             i = j
    317         else:
    318             tail = path[j:]
    319             path = path[:i] + value
    320             i = len(path)
    321             path += tail
    322     return path
    323 
    324 
    325 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
    326 # It should be understood that this may change the meaning of the path
    327 # if it contains symbolic links!
    328 
    329 def normpath(path):
    330     """Normalize path, eliminating double slashes, etc."""
    331     path = os.fspath(path)
    332     if isinstance(path, bytes):
    333         sep = b'/'
    334         empty = b''
    335         dot = b'.'
    336         dotdot = b'..'
    337     else:
    338         sep = '/'
    339         empty = ''
    340         dot = '.'
    341         dotdot = '..'
    342     if path == empty:
    343         return dot
    344     initial_slashes = path.startswith(sep)
    345     # POSIX allows one or two initial slashes, but treats three or more
    346     # as single slash.
    347     if (initial_slashes and
    348         path.startswith(sep*2) and not path.startswith(sep*3)):
    349         initial_slashes = 2
    350     comps = path.split(sep)
    351     new_comps = []
    352     for comp in comps:
    353         if comp in (empty, dot):
    354             continue
    355         if (comp != dotdot or (not initial_slashes and not new_comps) or
    356              (new_comps and new_comps[-1] == dotdot)):
    357             new_comps.append(comp)
    358         elif new_comps:
    359             new_comps.pop()
    360     comps = new_comps
    361     path = sep.join(comps)
    362     if initial_slashes:
    363         path = sep*initial_slashes + path
    364     return path or dot
    365 
    366 
    367 def abspath(path):
    368     """Return an absolute path."""
    369     path = os.fspath(path)
    370     if not isabs(path):
    371         if isinstance(path, bytes):
    372             cwd = os.getcwdb()
    373         else:
    374             cwd = os.getcwd()
    375         path = join(cwd, path)
    376     return normpath(path)
    377 
    378 
    379 # Return a canonical path (i.e. the absolute location of a file on the
    380 # filesystem).
    381 
    382 def realpath(filename):
    383     """Return the canonical path of the specified filename, eliminating any
    384 symbolic links encountered in the path."""
    385     filename = os.fspath(filename)
    386     path, ok = _joinrealpath(filename[:0], filename, {})
    387     return abspath(path)
    388 
    389 # Join two paths, normalizing and eliminating any symbolic links
    390 # encountered in the second path.
    391 def _joinrealpath(path, rest, seen):
    392     if isinstance(path, bytes):
    393         sep = b'/'
    394         curdir = b'.'
    395         pardir = b'..'
    396     else:
    397         sep = '/'
    398         curdir = '.'
    399         pardir = '..'
    400 
    401     if isabs(rest):
    402         rest = rest[1:]
    403         path = sep
    404 
    405     while rest:
    406         name, _, rest = rest.partition(sep)
    407         if not name or name == curdir:
    408             # current dir
    409             continue
    410         if name == pardir:
    411             # parent dir
    412             if path:
    413                 path, name = split(path)
    414                 if name == pardir:
    415                     path = join(path, pardir, pardir)
    416             else:
    417                 path = pardir
    418             continue
    419         newpath = join(path, name)
    420         if not islink(newpath):
    421             path = newpath
    422             continue
    423         # Resolve the symbolic link
    424         if newpath in seen:
    425             # Already seen this path
    426             path = seen[newpath]
    427             if path is not None:
    428                 # use cached value
    429                 continue
    430             # The symlink is not resolved, so we must have a symlink loop.
    431             # Return already resolved part + rest of the path unchanged.
    432             return join(newpath, rest), False
    433         seen[newpath] = None # not resolved symlink
    434         path, ok = _joinrealpath(path, os.readlink(newpath), seen)
    435         if not ok:
    436             return join(path, rest), False
    437         seen[newpath] = path # resolved symlink
    438 
    439     return path, True
    440 
    441 
    442 supports_unicode_filenames = (sys.platform == 'darwin')
    443 
    444 def relpath(path, start=None):
    445     """Return a relative version of a path"""
    446 
    447     if not path:
    448         raise ValueError("no path specified")
    449 
    450     path = os.fspath(path)
    451     if isinstance(path, bytes):
    452         curdir = b'.'
    453         sep = b'/'
    454         pardir = b'..'
    455     else:
    456         curdir = '.'
    457         sep = '/'
    458         pardir = '..'
    459 
    460     if start is None:
    461         start = curdir
    462     else:
    463         start = os.fspath(start)
    464 
    465     try:
    466         start_list = [x for x in abspath(start).split(sep) if x]
    467         path_list = [x for x in abspath(path).split(sep) if x]
    468         # Work out how much of the filepath is shared by start and path.
    469         i = len(commonprefix([start_list, path_list]))
    470 
    471         rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
    472         if not rel_list:
    473             return curdir
    474         return join(*rel_list)
    475     except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
    476         genericpath._check_arg_types('relpath', path, start)
    477         raise
    478 
    479 
    480 # Return the longest common sub-path of the sequence of paths given as input.
    481 # The paths are not normalized before comparing them (this is the
    482 # responsibility of the caller). Any trailing separator is stripped from the
    483 # returned path.
    484 
    485 def commonpath(paths):
    486     """Given a sequence of path names, returns the longest common sub-path."""
    487 
    488     if not paths:
    489         raise ValueError('commonpath() arg is an empty sequence')
    490 
    491     paths = tuple(map(os.fspath, paths))
    492     if isinstance(paths[0], bytes):
    493         sep = b'/'
    494         curdir = b'.'
    495     else:
    496         sep = '/'
    497         curdir = '.'
    498 
    499     try:
    500         split_paths = [path.split(sep) for path in paths]
    501 
    502         try:
    503             isabs, = set(p[:1] == sep for p in paths)
    504         except ValueError:
    505             raise ValueError("Can't mix absolute and relative paths") from None
    506 
    507         split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
    508         s1 = min(split_paths)
    509         s2 = max(split_paths)
    510         common = s1
    511         for i, c in enumerate(s1):
    512             if c != s2[i]:
    513                 common = s1[:i]
    514                 break
    515 
    516         prefix = sep if isabs else sep[:0]
    517         return prefix + sep.join(common)
    518     except (TypeError, AttributeError):
    519         genericpath._check_arg_types('commonpath', *paths)
    520         raise
    521