Home | History | Annotate | Download | only in devtools
      1 #!/usr/bin/env python
      2 # encoding: utf-8
      3 # Baptiste Lepilleur, 2009
      4 
      5 from __future__ import print_function
      6 from dircache import listdir
      7 import re
      8 import fnmatch
      9 import os.path
     10 
     11 
     12 # These fnmatch expressions are used by default to prune the directory tree
     13 # while doing the recursive traversal in the glob_impl method of glob function.
     14 prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
     15 
     16 # These fnmatch expressions are used by default to exclude files and dirs
     17 # while doing the recursive traversal in the glob_impl method of glob function.
     18 ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
     19 
     20 # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
     21 # while doing the recursive traversal in the glob_impl method of glob function.
     22 default_excludes = '''
     23 **/*~
     24 **/#*#
     25 **/.#*
     26 **/%*%
     27 **/._*
     28 **/CVS
     29 **/CVS/**
     30 **/.cvsignore
     31 **/SCCS
     32 **/SCCS/**
     33 **/vssver.scc
     34 **/.svn
     35 **/.svn/**
     36 **/.git
     37 **/.git/**
     38 **/.gitignore
     39 **/.bzr
     40 **/.bzr/**
     41 **/.hg
     42 **/.hg/**
     43 **/_MTN
     44 **/_MTN/**
     45 **/_darcs
     46 **/_darcs/**
     47 **/.DS_Store '''
     48 
     49 DIR = 1
     50 FILE = 2
     51 DIR_LINK = 4
     52 FILE_LINK = 8
     53 LINKS = DIR_LINK | FILE_LINK
     54 ALL_NO_LINK = DIR | FILE
     55 ALL = DIR | FILE | LINKS
     56 
     57 _ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' )
     58 
     59 def ant_pattern_to_re( ant_pattern ):
     60     """Generates a regular expression from the ant pattern.
     61     Matching convention:
     62     **/a: match 'a', 'dir/a', 'dir1/dir2/a'
     63     a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
     64     *.py: match 'script.py' but not 'a/script.py'
     65     """
     66     rex = ['^']
     67     next_pos = 0
     68     sep_rex = r'(?:/|%s)' % re.escape( os.path.sep )
     69 ##    print 'Converting', ant_pattern
     70     for match in _ANT_RE.finditer( ant_pattern ):
     71 ##        print 'Matched', match.group()
     72 ##        print match.start(0), next_pos
     73         if match.start(0) != next_pos:
     74             raise ValueError( "Invalid ant pattern" )
     75         if match.group(1): # /**/
     76             rex.append( sep_rex + '(?:.*%s)?' % sep_rex )
     77         elif match.group(2): # **/
     78             rex.append( '(?:.*%s)?' % sep_rex )
     79         elif match.group(3): # /**
     80             rex.append( sep_rex + '.*' )
     81         elif match.group(4): # *
     82             rex.append( '[^/%s]*' % re.escape(os.path.sep) )
     83         elif match.group(5): # /
     84             rex.append( sep_rex )
     85         else: # somepath
     86             rex.append( re.escape(match.group(6)) )
     87         next_pos = match.end()
     88     rex.append('$')
     89     return re.compile( ''.join( rex ) )
     90 
     91 def _as_list( l ):
     92     if isinstance(l, basestring):
     93         return l.split()
     94     return l
     95 
     96 def glob(dir_path,
     97          includes = '**/*',
     98          excludes = default_excludes,
     99          entry_type = FILE,
    100          prune_dirs = prune_dirs,
    101          max_depth = 25):
    102     include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
    103     exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
    104     prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
    105     dir_path = dir_path.replace('/',os.path.sep)
    106     entry_type_filter = entry_type
    107 
    108     def is_pruned_dir( dir_name ):
    109         for pattern in prune_dirs:
    110             if fnmatch.fnmatch( dir_name, pattern ):
    111                 return True
    112         return False
    113 
    114     def apply_filter( full_path, filter_rexs ):
    115         """Return True if at least one of the filter regular expression match full_path."""
    116         for rex in filter_rexs:
    117             if rex.match( full_path ):
    118                 return True
    119         return False
    120 
    121     def glob_impl( root_dir_path ):
    122         child_dirs = [root_dir_path]
    123         while child_dirs:
    124             dir_path = child_dirs.pop()
    125             for entry in listdir( dir_path ):
    126                 full_path = os.path.join( dir_path, entry )
    127 ##                print 'Testing:', full_path,
    128                 is_dir = os.path.isdir( full_path )
    129                 if is_dir and not is_pruned_dir( entry ): # explore child directory ?
    130 ##                    print '===> marked for recursion',
    131                     child_dirs.append( full_path )
    132                 included = apply_filter( full_path, include_filter )
    133                 rejected = apply_filter( full_path, exclude_filter )
    134                 if not included or rejected: # do not include entry ?
    135 ##                    print '=> not included or rejected'
    136                     continue
    137                 link = os.path.islink( full_path )
    138                 is_file = os.path.isfile( full_path )
    139                 if not is_file and not is_dir:
    140 ##                    print '=> unknown entry type'
    141                     continue
    142                 if link:
    143                     entry_type = is_file and FILE_LINK or DIR_LINK
    144                 else:
    145                     entry_type = is_file and FILE or DIR
    146 ##                print '=> type: %d' % entry_type, 
    147                 if (entry_type & entry_type_filter) != 0:
    148 ##                    print ' => KEEP'
    149                     yield os.path.join( dir_path, entry )
    150 ##                else:
    151 ##                    print ' => TYPE REJECTED'
    152     return list( glob_impl( dir_path ) )
    153 
    154 
    155 if __name__ == "__main__":
    156     import unittest
    157 
    158     class AntPatternToRETest(unittest.TestCase):
    159 ##        def test_conversion( self ):
    160 ##            self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern )
    161 
    162         def test_matching( self ):
    163             test_cases = [ ( 'path',
    164                              ['path'],
    165                              ['somepath', 'pathsuffix', '/path', '/path'] ),
    166                            ( '*.py',
    167                              ['source.py', 'source.ext.py', '.py'],
    168                              ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ),
    169                            ( '**/path',
    170                              ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
    171                              ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ),
    172                            ( 'path/**',
    173                              ['path/a', 'path/path/a', 'path//'],
    174                              ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ),
    175                            ( '/**/path',
    176                              ['/path', '/a/path', '/a/b/path/path', '/path/path'],
    177                              ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ),
    178                            ( 'a/b',
    179                              ['a/b'],
    180                              ['somea/b', 'a/bsuffix', 'a/b/c'] ),
    181                            ( '**/*.py',
    182                              ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
    183                              ['script.pyc', 'script.pyo', 'a.py/b'] ),
    184                            ( 'src/**/*.py',
    185                              ['src/a.py', 'src/dir/a.py'],
    186                              ['a/src/a.py', '/src/a.py'] ),
    187                            ]
    188             for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
    189                 def local_path( paths ):
    190                     return [ p.replace('/',os.path.sep) for p in paths ]
    191                 test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) )
    192             for ant_pattern, accepted_matches, rejected_matches in test_cases:
    193                 rex = ant_pattern_to_re( ant_pattern )
    194                 print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
    195                 for accepted_match in accepted_matches:
    196                     print('Accepted?:', accepted_match)
    197                     self.assertTrue( rex.match( accepted_match ) is not None )
    198                 for rejected_match in rejected_matches:
    199                     print('Rejected?:', rejected_match)
    200                     self.assertTrue( rex.match( rejected_match ) is None )
    201 
    202     unittest.main()
    203