1 #!/usr/bin/env python 2 # encoding: utf-8 3 # Baptiste Lepilleur, 2009 4 5 from __future__ import print_function 6 from dircache import listdir 7 import re 8 import fnmatch 9 import os.path 10 11 12 # These fnmatch expressions are used by default to prune the directory tree 13 # while doing the recursive traversal in the glob_impl method of glob function. 14 prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS ' 15 16 # These fnmatch expressions are used by default to exclude files and dirs 17 # while doing the recursive traversal in the glob_impl method of glob function. 18 ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split() 19 20 # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree 21 # while doing the recursive traversal in the glob_impl method of glob function. 22 default_excludes = ''' 23 **/*~ 24 **/#*# 25 **/.#* 26 **/%*% 27 **/._* 28 **/CVS 29 **/CVS/** 30 **/.cvsignore 31 **/SCCS 32 **/SCCS/** 33 **/vssver.scc 34 **/.svn 35 **/.svn/** 36 **/.git 37 **/.git/** 38 **/.gitignore 39 **/.bzr 40 **/.bzr/** 41 **/.hg 42 **/.hg/** 43 **/_MTN 44 **/_MTN/** 45 **/_darcs 46 **/_darcs/** 47 **/.DS_Store ''' 48 49 DIR = 1 50 FILE = 2 51 DIR_LINK = 4 52 FILE_LINK = 8 53 LINKS = DIR_LINK | FILE_LINK 54 ALL_NO_LINK = DIR | FILE 55 ALL = DIR | FILE | LINKS 56 57 _ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' ) 58 59 def ant_pattern_to_re( ant_pattern ): 60 """Generates a regular expression from the ant pattern. 61 Matching convention: 62 **/a: match 'a', 'dir/a', 'dir1/dir2/a' 63 a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b' 64 *.py: match 'script.py' but not 'a/script.py' 65 """ 66 rex = ['^'] 67 next_pos = 0 68 sep_rex = r'(?:/|%s)' % re.escape( os.path.sep ) 69 ## print 'Converting', ant_pattern 70 for match in _ANT_RE.finditer( ant_pattern ): 71 ## print 'Matched', match.group() 72 ## print match.start(0), next_pos 73 if match.start(0) != next_pos: 74 raise ValueError( "Invalid ant pattern" ) 75 if match.group(1): # /**/ 76 rex.append( sep_rex + '(?:.*%s)?' % sep_rex ) 77 elif match.group(2): # **/ 78 rex.append( '(?:.*%s)?' % sep_rex ) 79 elif match.group(3): # /** 80 rex.append( sep_rex + '.*' ) 81 elif match.group(4): # * 82 rex.append( '[^/%s]*' % re.escape(os.path.sep) ) 83 elif match.group(5): # / 84 rex.append( sep_rex ) 85 else: # somepath 86 rex.append( re.escape(match.group(6)) ) 87 next_pos = match.end() 88 rex.append('$') 89 return re.compile( ''.join( rex ) ) 90 91 def _as_list( l ): 92 if isinstance(l, basestring): 93 return l.split() 94 return l 95 96 def glob(dir_path, 97 includes = '**/*', 98 excludes = default_excludes, 99 entry_type = FILE, 100 prune_dirs = prune_dirs, 101 max_depth = 25): 102 include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)] 103 exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)] 104 prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)] 105 dir_path = dir_path.replace('/',os.path.sep) 106 entry_type_filter = entry_type 107 108 def is_pruned_dir( dir_name ): 109 for pattern in prune_dirs: 110 if fnmatch.fnmatch( dir_name, pattern ): 111 return True 112 return False 113 114 def apply_filter( full_path, filter_rexs ): 115 """Return True if at least one of the filter regular expression match full_path.""" 116 for rex in filter_rexs: 117 if rex.match( full_path ): 118 return True 119 return False 120 121 def glob_impl( root_dir_path ): 122 child_dirs = [root_dir_path] 123 while child_dirs: 124 dir_path = child_dirs.pop() 125 for entry in listdir( dir_path ): 126 full_path = os.path.join( dir_path, entry ) 127 ## print 'Testing:', full_path, 128 is_dir = os.path.isdir( full_path ) 129 if is_dir and not is_pruned_dir( entry ): # explore child directory ? 130 ## print '===> marked for recursion', 131 child_dirs.append( full_path ) 132 included = apply_filter( full_path, include_filter ) 133 rejected = apply_filter( full_path, exclude_filter ) 134 if not included or rejected: # do not include entry ? 135 ## print '=> not included or rejected' 136 continue 137 link = os.path.islink( full_path ) 138 is_file = os.path.isfile( full_path ) 139 if not is_file and not is_dir: 140 ## print '=> unknown entry type' 141 continue 142 if link: 143 entry_type = is_file and FILE_LINK or DIR_LINK 144 else: 145 entry_type = is_file and FILE or DIR 146 ## print '=> type: %d' % entry_type, 147 if (entry_type & entry_type_filter) != 0: 148 ## print ' => KEEP' 149 yield os.path.join( dir_path, entry ) 150 ## else: 151 ## print ' => TYPE REJECTED' 152 return list( glob_impl( dir_path ) ) 153 154 155 if __name__ == "__main__": 156 import unittest 157 158 class AntPatternToRETest(unittest.TestCase): 159 ## def test_conversion( self ): 160 ## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern ) 161 162 def test_matching( self ): 163 test_cases = [ ( 'path', 164 ['path'], 165 ['somepath', 'pathsuffix', '/path', '/path'] ), 166 ( '*.py', 167 ['source.py', 'source.ext.py', '.py'], 168 ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ), 169 ( '**/path', 170 ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'], 171 ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ), 172 ( 'path/**', 173 ['path/a', 'path/path/a', 'path//'], 174 ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ), 175 ( '/**/path', 176 ['/path', '/a/path', '/a/b/path/path', '/path/path'], 177 ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ), 178 ( 'a/b', 179 ['a/b'], 180 ['somea/b', 'a/bsuffix', 'a/b/c'] ), 181 ( '**/*.py', 182 ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'], 183 ['script.pyc', 'script.pyo', 'a.py/b'] ), 184 ( 'src/**/*.py', 185 ['src/a.py', 'src/dir/a.py'], 186 ['a/src/a.py', '/src/a.py'] ), 187 ] 188 for ant_pattern, accepted_matches, rejected_matches in list(test_cases): 189 def local_path( paths ): 190 return [ p.replace('/',os.path.sep) for p in paths ] 191 test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) ) 192 for ant_pattern, accepted_matches, rejected_matches in test_cases: 193 rex = ant_pattern_to_re( ant_pattern ) 194 print('ant_pattern:', ant_pattern, ' => ', rex.pattern) 195 for accepted_match in accepted_matches: 196 print('Accepted?:', accepted_match) 197 self.assertTrue( rex.match( accepted_match ) is not None ) 198 for rejected_match in rejected_matches: 199 print('Rejected?:', rejected_match) 200 self.assertTrue( rex.match( rejected_match ) is None ) 201 202 unittest.main() 203