Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 
      3 """\
      4 List python source files.
      5 
      6 There are three functions to check whether a file is a Python source, listed
      7 here with increasing complexity:
      8 
      9 - has_python_ext() checks whether a file name ends in '.py[w]'.
     10 - look_like_python() checks whether the file is not binary and either has
     11   the '.py[w]' extension or the first line contains the word 'python'.
     12 - can_be_compiled() checks whether the file can be compiled by compile().
     13 
     14 The file also must be of appropriate size - not bigger than a megabyte.
     15 
     16 walk_python_files() recursively lists all Python files under the given directories.
     17 """
     18 __author__ = "Oleg Broytmann, Georg Brandl"
     19 
     20 __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
     21 
     22 
     23 import os, re
     24 
     25 binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')
     26 
     27 debug = False
     28 
     29 def print_debug(msg):
     30     if debug: print msg
     31 
     32 
     33 def _open(fullpath):
     34     try:
     35         size = os.stat(fullpath).st_size
     36     except OSError, err: # Permission denied - ignore the file
     37         print_debug("%s: permission denied: %s" % (fullpath, err))
     38         return None
     39 
     40     if size > 1024*1024: # too big
     41         print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
     42         return None
     43 
     44     try:
     45         return open(fullpath, 'rU')
     46     except IOError, err: # Access denied, or a special file - ignore it
     47         print_debug("%s: access denied: %s" % (fullpath, err))
     48         return None
     49 
     50 def has_python_ext(fullpath):
     51     return fullpath.endswith(".py") or fullpath.endswith(".pyw")
     52 
     53 def looks_like_python(fullpath):
     54     infile = _open(fullpath)
     55     if infile is None:
     56         return False
     57 
     58     line = infile.readline()
     59     infile.close()
     60 
     61     if binary_re.search(line):
     62         # file appears to be binary
     63         print_debug("%s: appears to be binary" % fullpath)
     64         return False
     65 
     66     if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
     67         return True
     68     elif "python" in line:
     69         # disguised Python script (e.g. CGI)
     70         return True
     71 
     72     return False
     73 
     74 def can_be_compiled(fullpath):
     75     infile = _open(fullpath)
     76     if infile is None:
     77         return False
     78 
     79     code = infile.read()
     80     infile.close()
     81 
     82     try:
     83         compile(code, fullpath, "exec")
     84     except Exception, err:
     85         print_debug("%s: cannot compile: %s" % (fullpath, err))
     86         return False
     87 
     88     return True
     89 
     90 
     91 def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
     92     """\
     93     Recursively yield all Python source files below the given paths.
     94 
     95     paths: a list of files and/or directories to be checked.
     96     is_python: a function that takes a file name and checks whether it is a
     97                Python source file
     98     exclude_dirs: a list of directory base names that should be excluded in
     99                   the search
    100     """
    101     if exclude_dirs is None:
    102         exclude_dirs=[]
    103 
    104     for path in paths:
    105         print_debug("testing: %s" % path)
    106         if os.path.isfile(path):
    107             if is_python(path):
    108                 yield path
    109         elif os.path.isdir(path):
    110             print_debug("    it is a directory")
    111             for dirpath, dirnames, filenames in os.walk(path):
    112                 for exclude in exclude_dirs:
    113                     if exclude in dirnames:
    114                         dirnames.remove(exclude)
    115                 for filename in filenames:
    116                     fullpath = os.path.join(dirpath, filename)
    117                     print_debug("testing: %s" % fullpath)
    118                     if is_python(fullpath):
    119                         yield fullpath
    120         else:
    121             print_debug("    unknown type")
    122 
    123 
    124 if __name__ == "__main__":
    125     # Two simple examples/tests
    126     for fullpath in walk_python_files(['.']):
    127         print fullpath
    128     print "----------"
    129     for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
    130         print fullpath
    131