Home | History | Annotate | Download | only in distutils
      1 """distutils.filelist
      2 
      3 Provides the FileList class, used for poking about the filesystem
      4 and building lists of files.
      5 """
      6 
      7 import os, re
      8 import fnmatch
      9 import functools
     10 from distutils.util import convert_path
     11 from distutils.errors import DistutilsTemplateError, DistutilsInternalError
     12 from distutils import log
     13 
     14 class FileList:
     15     """A list of files built by on exploring the filesystem and filtered by
     16     applying various patterns to what we find there.
     17 
     18     Instance attributes:
     19       dir
     20         directory from which files will be taken -- only used if
     21         'allfiles' not supplied to constructor
     22       files
     23         list of filenames currently being built/filtered/manipulated
     24       allfiles
     25         complete list of files under consideration (ie. without any
     26         filtering applied)
     27     """
     28 
     29     def __init__(self, warn=None, debug_print=None):
     30         # ignore argument to FileList, but keep them for backwards
     31         # compatibility
     32         self.allfiles = None
     33         self.files = []
     34 
     35     def set_allfiles(self, allfiles):
     36         self.allfiles = allfiles
     37 
     38     def findall(self, dir=os.curdir):
     39         self.allfiles = findall(dir)
     40 
     41     def debug_print(self, msg):
     42         """Print 'msg' to stdout if the global DEBUG (taken from the
     43         DISTUTILS_DEBUG environment variable) flag is true.
     44         """
     45         from distutils.debug import DEBUG
     46         if DEBUG:
     47             print(msg)
     48 
     49     # -- List-like methods ---------------------------------------------
     50 
     51     def append(self, item):
     52         self.files.append(item)
     53 
     54     def extend(self, items):
     55         self.files.extend(items)
     56 
     57     def sort(self):
     58         # Not a strict lexical sort!
     59         sortable_files = sorted(map(os.path.split, self.files))
     60         self.files = []
     61         for sort_tuple in sortable_files:
     62             self.files.append(os.path.join(*sort_tuple))
     63 
     64 
     65     # -- Other miscellaneous utility methods ---------------------------
     66 
     67     def remove_duplicates(self):
     68         # Assumes list has been sorted!
     69         for i in range(len(self.files) - 1, 0, -1):
     70             if self.files[i] == self.files[i - 1]:
     71                 del self.files[i]
     72 
     73 
     74     # -- "File template" methods ---------------------------------------
     75 
     76     def _parse_template_line(self, line):
     77         words = line.split()
     78         action = words[0]
     79 
     80         patterns = dir = dir_pattern = None
     81 
     82         if action in ('include', 'exclude',
     83                       'global-include', 'global-exclude'):
     84             if len(words) < 2:
     85                 raise DistutilsTemplateError(
     86                       "'%s' expects <pattern1> <pattern2> ..." % action)
     87             patterns = [convert_path(w) for w in words[1:]]
     88         elif action in ('recursive-include', 'recursive-exclude'):
     89             if len(words) < 3:
     90                 raise DistutilsTemplateError(
     91                       "'%s' expects <dir> <pattern1> <pattern2> ..." % action)
     92             dir = convert_path(words[1])
     93             patterns = [convert_path(w) for w in words[2:]]
     94         elif action in ('graft', 'prune'):
     95             if len(words) != 2:
     96                 raise DistutilsTemplateError(
     97                       "'%s' expects a single <dir_pattern>" % action)
     98             dir_pattern = convert_path(words[1])
     99         else:
    100             raise DistutilsTemplateError("unknown action '%s'" % action)
    101 
    102         return (action, patterns, dir, dir_pattern)
    103 
    104     def process_template_line(self, line):
    105         # Parse the line: split it up, make sure the right number of words
    106         # is there, and return the relevant words.  'action' is always
    107         # defined: it's the first word of the line.  Which of the other
    108         # three are defined depends on the action; it'll be either
    109         # patterns, (dir and patterns), or (dir_pattern).
    110         (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
    111 
    112         # OK, now we know that the action is valid and we have the
    113         # right number of words on the line for that action -- so we
    114         # can proceed with minimal error-checking.
    115         if action == 'include':
    116             self.debug_print("include " + ' '.join(patterns))
    117             for pattern in patterns:
    118                 if not self.include_pattern(pattern, anchor=1):
    119                     log.warn("warning: no files found matching '%s'",
    120                              pattern)
    121 
    122         elif action == 'exclude':
    123             self.debug_print("exclude " + ' '.join(patterns))
    124             for pattern in patterns:
    125                 if not self.exclude_pattern(pattern, anchor=1):
    126                     log.warn(("warning: no previously-included files "
    127                               "found matching '%s'"), pattern)
    128 
    129         elif action == 'global-include':
    130             self.debug_print("global-include " + ' '.join(patterns))
    131             for pattern in patterns:
    132                 if not self.include_pattern(pattern, anchor=0):
    133                     log.warn(("warning: no files found matching '%s' "
    134                               "anywhere in distribution"), pattern)
    135 
    136         elif action == 'global-exclude':
    137             self.debug_print("global-exclude " + ' '.join(patterns))
    138             for pattern in patterns:
    139                 if not self.exclude_pattern(pattern, anchor=0):
    140                     log.warn(("warning: no previously-included files matching "
    141                               "'%s' found anywhere in distribution"),
    142                              pattern)
    143 
    144         elif action == 'recursive-include':
    145             self.debug_print("recursive-include %s %s" %
    146                              (dir, ' '.join(patterns)))
    147             for pattern in patterns:
    148                 if not self.include_pattern(pattern, prefix=dir):
    149                     log.warn(("warning: no files found matching '%s' "
    150                                 "under directory '%s'"),
    151                              pattern, dir)
    152 
    153         elif action == 'recursive-exclude':
    154             self.debug_print("recursive-exclude %s %s" %
    155                              (dir, ' '.join(patterns)))
    156             for pattern in patterns:
    157                 if not self.exclude_pattern(pattern, prefix=dir):
    158                     log.warn(("warning: no previously-included files matching "
    159                               "'%s' found under directory '%s'"),
    160                              pattern, dir)
    161 
    162         elif action == 'graft':
    163             self.debug_print("graft " + dir_pattern)
    164             if not self.include_pattern(None, prefix=dir_pattern):
    165                 log.warn("warning: no directories found matching '%s'",
    166                          dir_pattern)
    167 
    168         elif action == 'prune':
    169             self.debug_print("prune " + dir_pattern)
    170             if not self.exclude_pattern(None, prefix=dir_pattern):
    171                 log.warn(("no previously-included directories found "
    172                           "matching '%s'"), dir_pattern)
    173         else:
    174             raise DistutilsInternalError(
    175                   "this cannot happen: invalid action '%s'" % action)
    176 
    177 
    178     # -- Filtering/selection methods -----------------------------------
    179 
    180     def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
    181         """Select strings (presumably filenames) from 'self.files' that
    182         match 'pattern', a Unix-style wildcard (glob) pattern.  Patterns
    183         are not quite the same as implemented by the 'fnmatch' module: '*'
    184         and '?'  match non-special characters, where "special" is platform-
    185         dependent: slash on Unix; colon, slash, and backslash on
    186         DOS/Windows; and colon on Mac OS.
    187 
    188         If 'anchor' is true (the default), then the pattern match is more
    189         stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
    190         'anchor' is false, both of these will match.
    191 
    192         If 'prefix' is supplied, then only filenames starting with 'prefix'
    193         (itself a pattern) and ending with 'pattern', with anything in between
    194         them, will match.  'anchor' is ignored in this case.
    195 
    196         If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
    197         'pattern' is assumed to be either a string containing a regex or a
    198         regex object -- no translation is done, the regex is just compiled
    199         and used as-is.
    200 
    201         Selected strings will be added to self.files.
    202 
    203         Return True if files are found, False otherwise.
    204         """
    205         # XXX docstring lying about what the special chars are?
    206         files_found = False
    207         pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
    208         self.debug_print("include_pattern: applying regex r'%s'" %
    209                          pattern_re.pattern)
    210 
    211         # delayed loading of allfiles list
    212         if self.allfiles is None:
    213             self.findall()
    214 
    215         for name in self.allfiles:
    216             if pattern_re.search(name):
    217                 self.debug_print(" adding " + name)
    218                 self.files.append(name)
    219                 files_found = True
    220         return files_found
    221 
    222 
    223     def exclude_pattern (self, pattern,
    224                          anchor=1, prefix=None, is_regex=0):
    225         """Remove strings (presumably filenames) from 'files' that match
    226         'pattern'.  Other parameters are the same as for
    227         'include_pattern()', above.
    228         The list 'self.files' is modified in place.
    229         Return True if files are found, False otherwise.
    230         """
    231         files_found = False
    232         pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
    233         self.debug_print("exclude_pattern: applying regex r'%s'" %
    234                          pattern_re.pattern)
    235         for i in range(len(self.files)-1, -1, -1):
    236             if pattern_re.search(self.files[i]):
    237                 self.debug_print(" removing " + self.files[i])
    238                 del self.files[i]
    239                 files_found = True
    240         return files_found
    241 
    242 
    243 # ----------------------------------------------------------------------
    244 # Utility functions
    245 
    246 def _find_all_simple(path):
    247     """
    248     Find all files under 'path'
    249     """
    250     results = (
    251         os.path.join(base, file)
    252         for base, dirs, files in os.walk(path, followlinks=True)
    253         for file in files
    254     )
    255     return filter(os.path.isfile, results)
    256 
    257 
    258 def findall(dir=os.curdir):
    259     """
    260     Find all files under 'dir' and return the list of full filenames.
    261     Unless dir is '.', return full filenames with dir prepended.
    262     """
    263     files = _find_all_simple(dir)
    264     if dir == os.curdir:
    265         make_rel = functools.partial(os.path.relpath, start=dir)
    266         files = map(make_rel, files)
    267     return list(files)
    268 
    269 
    270 def glob_to_re(pattern):
    271     """Translate a shell-like glob pattern to a regular expression; return
    272     a string containing the regex.  Differs from 'fnmatch.translate()' in
    273     that '*' does not match "special characters" (which are
    274     platform-specific).
    275     """
    276     pattern_re = fnmatch.translate(pattern)
    277 
    278     # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
    279     # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
    280     # and by extension they shouldn't match such "special characters" under
    281     # any OS.  So change all non-escaped dots in the RE to match any
    282     # character except the special characters (currently: just os.sep).
    283     sep = os.sep
    284     if os.sep == '\\':
    285         # we're using a regex to manipulate a regex, so we need
    286         # to escape the backslash twice
    287         sep = r'\\\\'
    288     escaped = r'\1[^%s]' % sep
    289     pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
    290     return pattern_re
    291 
    292 
    293 def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
    294     """Translate a shell-like wildcard pattern to a compiled regular
    295     expression.  Return the compiled regex.  If 'is_regex' true,
    296     then 'pattern' is directly compiled to a regex (if it's a string)
    297     or just returned as-is (assumes it's a regex object).
    298     """
    299     if is_regex:
    300         if isinstance(pattern, str):
    301             return re.compile(pattern)
    302         else:
    303             return pattern
    304 
    305     # ditch start and end characters
    306     start, _, end = glob_to_re('_').partition('_')
    307 
    308     if pattern:
    309         pattern_re = glob_to_re(pattern)
    310         assert pattern_re.startswith(start) and pattern_re.endswith(end)
    311     else:
    312         pattern_re = ''
    313 
    314     if prefix is not None:
    315         prefix_re = glob_to_re(prefix)
    316         assert prefix_re.startswith(start) and prefix_re.endswith(end)
    317         prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
    318         sep = os.sep
    319         if os.sep == '\\':
    320             sep = r'\\'
    321         pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
    322         pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
    323     else:                               # no prefix -- respect anchor flag
    324         if anchor:
    325             pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
    326 
    327     return re.compile(pattern_re)
    328