Home | History | Annotate | Download | only in Lib
      1 # Module 'ntpath' -- common operations on WinNT/Win95 pathnames
      2 """Common pathname manipulations, WindowsNT/95 version.
      3 
      4 Instead of importing this module directly, import os and refer to this
      5 module as os.path.
      6 """
      7 
      8 import os
      9 import sys
     10 import stat
     11 import genericpath
     12 import warnings
     13 
     14 from genericpath import *
     15 from genericpath import _unicode
     16 
     17 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
     18            "basename","dirname","commonprefix","getsize","getmtime",
     19            "getatime","getctime", "islink","exists","lexists","isdir","isfile",
     20            "ismount","walk","expanduser","expandvars","normpath","abspath",
     21            "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
     22            "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
     23 
     24 # strings representing various path-related bits and pieces
     25 curdir = '.'
     26 pardir = '..'
     27 extsep = '.'
     28 sep = '\\'
     29 pathsep = ';'
     30 altsep = '/'
     31 defpath = '.;C:\\bin'
     32 if 'ce' in sys.builtin_module_names:
     33     defpath = '\\Windows'
     34 elif 'os2' in sys.builtin_module_names:
     35     # OS/2 w/ VACPP
     36     altsep = '/'
     37 devnull = 'nul'
     38 
     39 # Normalize the case of a pathname and map slashes to backslashes.
     40 # Other normalizations (such as optimizing '../' away) are not done
     41 # (this is done by normpath).
     42 
     43 def normcase(s):
     44     """Normalize case of pathname.
     45 
     46     Makes all characters lowercase and all slashes into backslashes."""
     47     return s.replace("/", "\\").lower()
     48 
     49 
     50 # Return whether a path is absolute.
     51 # Trivial in Posix, harder on the Mac or MS-DOS.
     52 # For DOS it is absolute if it starts with a slash or backslash (current
     53 # volume), or if a pathname after the volume letter and colon / UNC resource
     54 # starts with a slash or backslash.
     55 
     56 def isabs(s):
     57     """Test whether a path is absolute"""
     58     s = splitdrive(s)[1]
     59     return s != '' and s[:1] in '/\\'
     60 
     61 
     62 # Join two (or more) paths.
     63 def join(path, *paths):
     64     """Join two or more pathname components, inserting "\\" as needed."""
     65     result_drive, result_path = splitdrive(path)
     66     for p in paths:
     67         p_drive, p_path = splitdrive(p)
     68         if p_path and p_path[0] in '\\/':
     69             # Second path is absolute
     70             if p_drive or not result_drive:
     71                 result_drive = p_drive
     72             result_path = p_path
     73             continue
     74         elif p_drive and p_drive != result_drive:
     75             if p_drive.lower() != result_drive.lower():
     76                 # Different drives => ignore the first path entirely
     77                 result_drive = p_drive
     78                 result_path = p_path
     79                 continue
     80             # Same drive in different case
     81             result_drive = p_drive
     82         # Second path is relative to the first
     83         if result_path and result_path[-1] not in '\\/':
     84             result_path = result_path + '\\'
     85         result_path = result_path + p_path
     86     ## add separator between UNC and non-absolute path
     87     if (result_path and result_path[0] not in '\\/' and
     88         result_drive and result_drive[-1:] != ':'):
     89         return result_drive + sep + result_path
     90     return result_drive + result_path
     91 
     92 
     93 # Split a path in a drive specification (a drive letter followed by a
     94 # colon) and the path specification.
     95 # It is always true that drivespec + pathspec == p
     96 def splitdrive(p):
     97     """Split a pathname into drive/UNC sharepoint and relative path specifiers.
     98     Returns a 2-tuple (drive_or_unc, path); either part may be empty.
     99 
    100     If you assign
    101         result = splitdrive(p)
    102     It is always true that:
    103         result[0] + result[1] == p
    104 
    105     If the path contained a drive letter, drive_or_unc will contain everything
    106     up to and including the colon.  e.g. splitdrive("c:/dir") returns ("c:", "/dir")
    107 
    108     If the path contained a UNC path, the drive_or_unc will contain the host name
    109     and share up to but not including the fourth directory separator character.
    110     e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")
    111 
    112     Paths cannot contain both a drive letter and a UNC path.
    113 
    114     """
    115     if len(p) > 1:
    116         normp = p.replace(altsep, sep)
    117         if (normp[0:2] == sep*2) and (normp[2:3] != sep):
    118             # is a UNC path:
    119             # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
    120             # \\machine\mountpoint\directory\etc\...
    121             #           directory ^^^^^^^^^^^^^^^
    122             index = normp.find(sep, 2)
    123             if index == -1:
    124                 return '', p
    125             index2 = normp.find(sep, index + 1)
    126             # a UNC path can't have two slashes in a row
    127             # (after the initial two)
    128             if index2 == index + 1:
    129                 return '', p
    130             if index2 == -1:
    131                 index2 = len(p)
    132             return p[:index2], p[index2:]
    133         if normp[1] == ':':
    134             return p[:2], p[2:]
    135     return '', p
    136 
    137 # Parse UNC paths
    138 def splitunc(p):
    139     """Split a pathname into UNC mount point and relative path specifiers.
    140 
    141     Return a 2-tuple (unc, rest); either part may be empty.
    142     If unc is not empty, it has the form '//host/mount' (or similar
    143     using backslashes).  unc+rest is always the input path.
    144     Paths containing drive letters never have a UNC part.
    145     """
    146     if p[1:2] == ':':
    147         return '', p # Drive letter present
    148     firstTwo = p[0:2]
    149     if firstTwo == '//' or firstTwo == '\\\\':
    150         # is a UNC path:
    151         # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
    152         # \\machine\mountpoint\directories...
    153         #           directory ^^^^^^^^^^^^^^^
    154         normp = p.replace('\\', '/')
    155         index = normp.find('/', 2)
    156         if index <= 2:
    157             return '', p
    158         index2 = normp.find('/', index + 1)
    159         # a UNC path can't have two slashes in a row
    160         # (after the initial two)
    161         if index2 == index + 1:
    162             return '', p
    163         if index2 == -1:
    164             index2 = len(p)
    165         return p[:index2], p[index2:]
    166     return '', p
    167 
    168 
    169 # Split a path in head (everything up to the last '/') and tail (the
    170 # rest).  After the trailing '/' is stripped, the invariant
    171 # join(head, tail) == p holds.
    172 # The resulting head won't end in '/' unless it is the root.
    173 
    174 def split(p):
    175     """Split a pathname.
    176 
    177     Return tuple (head, tail) where tail is everything after the final slash.
    178     Either part may be empty."""
    179 
    180     d, p = splitdrive(p)
    181     # set i to index beyond p's last slash
    182     i = len(p)
    183     while i and p[i-1] not in '/\\':
    184         i = i - 1
    185     head, tail = p[:i], p[i:]  # now tail has no slashes
    186     # remove trailing slashes from head, unless it's all slashes
    187     head2 = head
    188     while head2 and head2[-1] in '/\\':
    189         head2 = head2[:-1]
    190     head = head2 or head
    191     return d + head, tail
    192 
    193 
    194 # Split a path in root and extension.
    195 # The extension is everything starting at the last dot in the last
    196 # pathname component; the root is everything before that.
    197 # It is always true that root + ext == p.
    198 
    199 def splitext(p):
    200     return genericpath._splitext(p, sep, altsep, extsep)
    201 splitext.__doc__ = genericpath._splitext.__doc__
    202 
    203 
    204 # Return the tail (basename) part of a path.
    205 
    206 def basename(p):
    207     """Returns the final component of a pathname"""
    208     return split(p)[1]
    209 
    210 
    211 # Return the head (dirname) part of a path.
    212 
    213 def dirname(p):
    214     """Returns the directory component of a pathname"""
    215     return split(p)[0]
    216 
    217 # Is a path a symbolic link?
    218 # This will always return false on systems where posix.lstat doesn't exist.
    219 
    220 def islink(path):
    221     """Test for symbolic link.
    222     On WindowsNT/95 and OS/2 always returns false
    223     """
    224     return False
    225 
    226 # alias exists to lexists
    227 lexists = exists
    228 
    229 # Is a path a mount point?  Either a root (with or without drive letter)
    230 # or a UNC path with at most a / or \ after the mount point.
    231 
    232 def ismount(path):
    233     """Test whether a path is a mount point (defined as root of drive)"""
    234     unc, rest = splitunc(path)
    235     if unc:
    236         return rest in ("", "/", "\\")
    237     p = splitdrive(path)[1]
    238     return len(p) == 1 and p[0] in '/\\'
    239 
    240 
    241 # Directory tree walk.
    242 # For each directory under top (including top itself, but excluding
    243 # '.' and '..'), func(arg, dirname, filenames) is called, where
    244 # dirname is the name of the directory and filenames is the list
    245 # of files (and subdirectories etc.) in the directory.
    246 # The func may modify the filenames list, to implement a filter,
    247 # or to impose a different order of visiting.
    248 
    249 def walk(top, func, arg):
    250     """Directory tree walk with callback function.
    251 
    252     For each directory in the directory tree rooted at top (including top
    253     itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
    254     dirname is the name of the directory, and fnames a list of the names of
    255     the files and subdirectories in dirname (excluding '.' and '..').  func
    256     may modify the fnames list in-place (e.g. via del or slice assignment),
    257     and walk will only recurse into the subdirectories whose names remain in
    258     fnames; this can be used to implement a filter, or to impose a specific
    259     order of visiting.  No semantics are defined for, or required of, arg,
    260     beyond that arg is always passed to func.  It can be used, e.g., to pass
    261     a filename pattern, or a mutable object designed to accumulate
    262     statistics.  Passing None for arg is common."""
    263     warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
    264                       stacklevel=2)
    265     try:
    266         names = os.listdir(top)
    267     except os.error:
    268         return
    269     func(arg, top, names)
    270     for name in names:
    271         name = join(top, name)
    272         if isdir(name):
    273             walk(name, func, arg)
    274 
    275 
    276 # Expand paths beginning with '~' or '~user'.
    277 # '~' means $HOME; '~user' means that user's home directory.
    278 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
    279 # the path is returned unchanged (leaving error reporting to whatever
    280 # function is called with the expanded path as argument).
    281 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
    282 # (A function should also be defined to do full *sh-style environment
    283 # variable expansion.)
    284 
    285 def expanduser(path):
    286     """Expand ~ and ~user constructs.
    287 
    288     If user or $HOME is unknown, do nothing."""
    289     if path[:1] != '~':
    290         return path
    291     i, n = 1, len(path)
    292     while i < n and path[i] not in '/\\':
    293         i = i + 1
    294 
    295     if 'HOME' in os.environ:
    296         userhome = os.environ['HOME']
    297     elif 'USERPROFILE' in os.environ:
    298         userhome = os.environ['USERPROFILE']
    299     elif not 'HOMEPATH' in os.environ:
    300         return path
    301     else:
    302         try:
    303             drive = os.environ['HOMEDRIVE']
    304         except KeyError:
    305             drive = ''
    306         userhome = join(drive, os.environ['HOMEPATH'])
    307 
    308     if i != 1: #~user
    309         userhome = join(dirname(userhome), path[1:i])
    310 
    311     return userhome + path[i:]
    312 
    313 
    314 # Expand paths containing shell variable substitutions.
    315 # The following rules apply:
    316 #       - no expansion within single quotes
    317 #       - '$$' is translated into '$'
    318 #       - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
    319 #       - ${varname} is accepted.
    320 #       - $varname is accepted.
    321 #       - %varname% is accepted.
    322 #       - varnames can be made out of letters, digits and the characters '_-'
    323 #         (though is not verified in the ${varname} and %varname% cases)
    324 # XXX With COMMAND.COM you can use any characters in a variable name,
    325 # XXX except '^|<>='.
    326 
    327 def expandvars(path):
    328     """Expand shell variables of the forms $var, ${var} and %var%.
    329 
    330     Unknown variables are left unchanged."""
    331     if '$' not in path and '%' not in path:
    332         return path
    333     import string
    334     varchars = string.ascii_letters + string.digits + '_-'
    335     if isinstance(path, _unicode):
    336         encoding = sys.getfilesystemencoding()
    337         def getenv(var):
    338             return os.environ[var.encode(encoding)].decode(encoding)
    339     else:
    340         def getenv(var):
    341             return os.environ[var]
    342     res = ''
    343     index = 0
    344     pathlen = len(path)
    345     while index < pathlen:
    346         c = path[index]
    347         if c == '\'':   # no expansion within single quotes
    348             path = path[index + 1:]
    349             pathlen = len(path)
    350             try:
    351                 index = path.index('\'')
    352                 res = res + '\'' + path[:index + 1]
    353             except ValueError:
    354                 res = res + c + path
    355                 index = pathlen - 1
    356         elif c == '%':  # variable or '%'
    357             if path[index + 1:index + 2] == '%':
    358                 res = res + c
    359                 index = index + 1
    360             else:
    361                 path = path[index+1:]
    362                 pathlen = len(path)
    363                 try:
    364                     index = path.index('%')
    365                 except ValueError:
    366                     res = res + '%' + path
    367                     index = pathlen - 1
    368                 else:
    369                     var = path[:index]
    370                     try:
    371                         res = res + getenv(var)
    372                     except KeyError:
    373                         res = res + '%' + var + '%'
    374         elif c == '$':  # variable or '$$'
    375             if path[index + 1:index + 2] == '$':
    376                 res = res + c
    377                 index = index + 1
    378             elif path[index + 1:index + 2] == '{':
    379                 path = path[index+2:]
    380                 pathlen = len(path)
    381                 try:
    382                     index = path.index('}')
    383                     var = path[:index]
    384                     try:
    385                         res = res + getenv(var)
    386                     except KeyError:
    387                         res = res + '${' + var + '}'
    388                 except ValueError:
    389                     res = res + '${' + path
    390                     index = pathlen - 1
    391             else:
    392                 var = ''
    393                 index = index + 1
    394                 c = path[index:index + 1]
    395                 while c != '' and c in varchars:
    396                     var = var + c
    397                     index = index + 1
    398                     c = path[index:index + 1]
    399                 try:
    400                     res = res + getenv(var)
    401                 except KeyError:
    402                     res = res + '$' + var
    403                 if c != '':
    404                     index = index - 1
    405         else:
    406             res = res + c
    407         index = index + 1
    408     return res
    409 
    410 
    411 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
    412 # Previously, this function also truncated pathnames to 8+3 format,
    413 # but as this module is called "ntpath", that's obviously wrong!
    414 
    415 def normpath(path):
    416     """Normalize path, eliminating double slashes, etc."""
    417     # Preserve unicode (if path is unicode)
    418     backslash, dot = (u'\\', u'.') if isinstance(path, _unicode) else ('\\', '.')
    419     if path.startswith(('\\\\.\\', '\\\\?\\')):
    420         # in the case of paths with these prefixes:
    421         # \\.\ -> device names
    422         # \\?\ -> literal paths
    423         # do not do any normalization, but return the path unchanged
    424         return path
    425     path = path.replace("/", "\\")
    426     prefix, path = splitdrive(path)
    427     # We need to be careful here. If the prefix is empty, and the path starts
    428     # with a backslash, it could either be an absolute path on the current
    429     # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
    430     # is therefore imperative NOT to collapse multiple backslashes blindly in
    431     # that case.
    432     # The code below preserves multiple backslashes when there is no drive
    433     # letter. This means that the invalid filename \\\a\b is preserved
    434     # unchanged, where a\\\b is normalised to a\b. It's not clear that there
    435     # is any better behaviour for such edge cases.
    436     if prefix == '':
    437         # No drive letter - preserve initial backslashes
    438         while path[:1] == "\\":
    439             prefix = prefix + backslash
    440             path = path[1:]
    441     else:
    442         # We have a drive letter - collapse initial backslashes
    443         if path.startswith("\\"):
    444             prefix = prefix + backslash
    445             path = path.lstrip("\\")
    446     comps = path.split("\\")
    447     i = 0
    448     while i < len(comps):
    449         if comps[i] in ('.', ''):
    450             del comps[i]
    451         elif comps[i] == '..':
    452             if i > 0 and comps[i-1] != '..':
    453                 del comps[i-1:i+1]
    454                 i -= 1
    455             elif i == 0 and prefix.endswith("\\"):
    456                 del comps[i]
    457             else:
    458                 i += 1
    459         else:
    460             i += 1
    461     # If the path is now empty, substitute '.'
    462     if not prefix and not comps:
    463         comps.append(dot)
    464     return prefix + backslash.join(comps)
    465 
    466 
    467 # Return an absolute path.
    468 try:
    469     from nt import _getfullpathname
    470 
    471 except ImportError: # not running on Windows - mock up something sensible
    472     def abspath(path):
    473         """Return the absolute version of a path."""
    474         if not isabs(path):
    475             if isinstance(path, _unicode):
    476                 cwd = os.getcwdu()
    477             else:
    478                 cwd = os.getcwd()
    479             path = join(cwd, path)
    480         return normpath(path)
    481 
    482 else:  # use native Windows method on Windows
    483     def abspath(path):
    484         """Return the absolute version of a path."""
    485 
    486         if path: # Empty path must return current working directory.
    487             try:
    488                 path = _getfullpathname(path)
    489             except WindowsError:
    490                 pass # Bad path - return unchanged.
    491         elif isinstance(path, _unicode):
    492             path = os.getcwdu()
    493         else:
    494             path = os.getcwd()
    495         return normpath(path)
    496 
    497 # realpath is a no-op on systems without islink support
    498 realpath = abspath
    499 # Win9x family and earlier have no Unicode filename support.
    500 supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
    501                               sys.getwindowsversion()[3] >= 2)
    502 
    503 def _abspath_split(path):
    504     abs = abspath(normpath(path))
    505     prefix, rest = splitunc(abs)
    506     is_unc = bool(prefix)
    507     if not is_unc:
    508         prefix, rest = splitdrive(abs)
    509     return is_unc, prefix, [x for x in rest.split(sep) if x]
    510 
    511 def relpath(path, start=curdir):
    512     """Return a relative version of a path"""
    513 
    514     if not path:
    515         raise ValueError("no path specified")
    516 
    517     start_is_unc, start_prefix, start_list = _abspath_split(start)
    518     path_is_unc, path_prefix, path_list = _abspath_split(path)
    519 
    520     if path_is_unc ^ start_is_unc:
    521         raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
    522                                                             % (path, start))
    523     if path_prefix.lower() != start_prefix.lower():
    524         if path_is_unc:
    525             raise ValueError("path is on UNC root %s, start on UNC root %s"
    526                                                 % (path_prefix, start_prefix))
    527         else:
    528             raise ValueError("path is on drive %s, start on drive %s"
    529                                                 % (path_prefix, start_prefix))
    530     # Work out how much of the filepath is shared by start and path.
    531     i = 0
    532     for e1, e2 in zip(start_list, path_list):
    533         if e1.lower() != e2.lower():
    534             break
    535         i += 1
    536 
    537     rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
    538     if not rel_list:
    539         return curdir
    540     return join(*rel_list)
    541 
    542 try:
    543     # The genericpath.isdir implementation uses os.stat and checks the mode
    544     # attribute to tell whether or not the path is a directory.
    545     # This is overkill on Windows - just pass the path to GetFileAttributes
    546     # and check the attribute from there.
    547     from nt import _isdir as isdir
    548 except ImportError:
    549     # Use genericpath.isdir as imported above.
    550     pass
    551