Home | History | Annotate | Download | only in Lib
      1 """Filename globbing utility."""
      2 
      3 import os
      4 import re
      5 import fnmatch
      6 
      7 __all__ = ["glob", "iglob", "escape"]
      8 
      9 def glob(pathname, *, recursive=False):
     10     """Return a list of paths matching a pathname pattern.
     11 
     12     The pattern may contain simple shell-style wildcards a la
     13     fnmatch. However, unlike fnmatch, filenames starting with a
     14     dot are special cases that are not matched by '*' and '?'
     15     patterns.
     16 
     17     If recursive is true, the pattern '**' will match any files and
     18     zero or more directories and subdirectories.
     19     """
     20     return list(iglob(pathname, recursive=recursive))
     21 
     22 def iglob(pathname, *, recursive=False):
     23     """Return an iterator which yields the paths matching a pathname pattern.
     24 
     25     The pattern may contain simple shell-style wildcards a la
     26     fnmatch. However, unlike fnmatch, filenames starting with a
     27     dot are special cases that are not matched by '*' and '?'
     28     patterns.
     29 
     30     If recursive is true, the pattern '**' will match any files and
     31     zero or more directories and subdirectories.
     32     """
     33     it = _iglob(pathname, recursive, False)
     34     if recursive and _isrecursive(pathname):
     35         s = next(it)  # skip empty string
     36         assert not s
     37     return it
     38 
     39 def _iglob(pathname, recursive, dironly):
     40     dirname, basename = os.path.split(pathname)
     41     if not has_magic(pathname):
     42         assert not dironly
     43         if basename:
     44             if os.path.lexists(pathname):
     45                 yield pathname
     46         else:
     47             # Patterns ending with a slash should match only directories
     48             if os.path.isdir(dirname):
     49                 yield pathname
     50         return
     51     if not dirname:
     52         if recursive and _isrecursive(basename):
     53             yield from _glob2(dirname, basename, dironly)
     54         else:
     55             yield from _glob1(dirname, basename, dironly)
     56         return
     57     # `os.path.split()` returns the argument itself as a dirname if it is a
     58     # drive or UNC path.  Prevent an infinite recursion if a drive or UNC path
     59     # contains magic characters (i.e. r'\\?\C:').
     60     if dirname != pathname and has_magic(dirname):
     61         dirs = _iglob(dirname, recursive, True)
     62     else:
     63         dirs = [dirname]
     64     if has_magic(basename):
     65         if recursive and _isrecursive(basename):
     66             glob_in_dir = _glob2
     67         else:
     68             glob_in_dir = _glob1
     69     else:
     70         glob_in_dir = _glob0
     71     for dirname in dirs:
     72         for name in glob_in_dir(dirname, basename, dironly):
     73             yield os.path.join(dirname, name)
     74 
     75 # These 2 helper functions non-recursively glob inside a literal directory.
     76 # They return a list of basenames.  _glob1 accepts a pattern while _glob0
     77 # takes a literal basename (so it only has to check for its existence).
     78 
     79 def _glob1(dirname, pattern, dironly):
     80     names = list(_iterdir(dirname, dironly))
     81     if not _ishidden(pattern):
     82         names = (x for x in names if not _ishidden(x))
     83     return fnmatch.filter(names, pattern)
     84 
     85 def _glob0(dirname, basename, dironly):
     86     if not basename:
     87         # `os.path.split()` returns an empty basename for paths ending with a
     88         # directory separator.  'q*x/' should match only directories.
     89         if os.path.isdir(dirname):
     90             return [basename]
     91     else:
     92         if os.path.lexists(os.path.join(dirname, basename)):
     93             return [basename]
     94     return []
     95 
     96 # Following functions are not public but can be used by third-party code.
     97 
     98 def glob0(dirname, pattern):
     99     return _glob0(dirname, pattern, False)
    100 
    101 def glob1(dirname, pattern):
    102     return _glob1(dirname, pattern, False)
    103 
    104 # This helper function recursively yields relative pathnames inside a literal
    105 # directory.
    106 
    107 def _glob2(dirname, pattern, dironly):
    108     assert _isrecursive(pattern)
    109     yield pattern[:0]
    110     yield from _rlistdir(dirname, dironly)
    111 
    112 # If dironly is false, yields all file names inside a directory.
    113 # If dironly is true, yields only directory names.
    114 def _iterdir(dirname, dironly):
    115     if not dirname:
    116         if isinstance(dirname, bytes):
    117             dirname = bytes(os.curdir, 'ASCII')
    118         else:
    119             dirname = os.curdir
    120     try:
    121         with os.scandir(dirname) as it:
    122             for entry in it:
    123                 try:
    124                     if not dironly or entry.is_dir():
    125                         yield entry.name
    126                 except OSError:
    127                     pass
    128     except OSError:
    129         return
    130 
    131 # Recursively yields relative pathnames inside a literal directory.
    132 def _rlistdir(dirname, dironly):
    133     names = list(_iterdir(dirname, dironly))
    134     for x in names:
    135         if not _ishidden(x):
    136             yield x
    137             path = os.path.join(dirname, x) if dirname else x
    138             for y in _rlistdir(path, dironly):
    139                 yield os.path.join(x, y)
    140 
    141 
    142 magic_check = re.compile('([*?[])')
    143 magic_check_bytes = re.compile(b'([*?[])')
    144 
    145 def has_magic(s):
    146     if isinstance(s, bytes):
    147         match = magic_check_bytes.search(s)
    148     else:
    149         match = magic_check.search(s)
    150     return match is not None
    151 
    152 def _ishidden(path):
    153     return path[0] in ('.', b'.'[0])
    154 
    155 def _isrecursive(pattern):
    156     if isinstance(pattern, bytes):
    157         return pattern == b'**'
    158     else:
    159         return pattern == '**'
    160 
    161 def escape(pathname):
    162     """Escape all special characters.
    163     """
    164     # Escaping is done by wrapping any of "*?[" between square brackets.
    165     # Metacharacters do not work in the drive part and shouldn't be escaped.
    166     drive, pathname = os.path.splitdrive(pathname)
    167     if isinstance(pathname, bytes):
    168         pathname = magic_check_bytes.sub(br'[\1]', pathname)
    169     else:
    170         pathname = magic_check.sub(r'[\1]', pathname)
    171     return drive + pathname
    172