Home | History | Annotate | Download | only in Lib
      1 """Parse a Python module and describe its classes and methods.
      2 
      3 Parse enough of a Python file to recognize imports and class and
      4 method definitions, and to find out the superclasses of a class.
      5 
      6 The interface consists of a single function:
      7         readmodule_ex(module [, path])
      8 where module is the name of a Python module, and path is an optional
      9 list of directories where the module is to be searched.  If present,
     10 path is prepended to the system search path sys.path.  The return
     11 value is a dictionary.  The keys of the dictionary are the names of
     12 the classes defined in the module (including classes that are defined
     13 via the from XXX import YYY construct).  The values are class
     14 instances of the class Class defined here.  One special key/value pair
     15 is present for packages: the key '__path__' has a list as its value
     16 which contains the package search path.
     17 
     18 A class is described by the class Class in this module.  Instances
     19 of this class have the following instance variables:
     20         module -- the module name
     21         name -- the name of the class
     22         super -- a list of super classes (Class instances)
     23         methods -- a dictionary of methods
     24         file -- the file in which the class was defined
     25         lineno -- the line in the file on which the class statement occurred
     26 The dictionary of methods uses the method names as keys and the line
     27 numbers on which the method was defined as values.
     28 If the name of a super class is not recognized, the corresponding
     29 entry in the list of super classes is not a class instance but a
     30 string giving the name of the super class.  Since import statements
     31 are recognized and imported modules are scanned as well, this
     32 shouldn't happen often.
     33 
     34 A function is described by the class Function in this module.
     35 Instances of this class have the following instance variables:
     36         module -- the module name
     37         name -- the name of the class
     38         file -- the file in which the class was defined
     39         lineno -- the line in the file on which the class statement occurred
     40 """
     41 
     42 import sys
     43 import imp
     44 import tokenize
     45 from token import NAME, DEDENT, OP
     46 from operator import itemgetter
     47 
     48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
     49 
     50 _modules = {}                           # cache of modules we've seen

     51 
     52 # each Python class is represented by an instance of this class

     53 class Class:
     54     '''Class to represent a Python class.'''
     55     def __init__(self, module, name, super, file, lineno):
     56         self.module = module
     57         self.name = name
     58         if super is None:
     59             super = []
     60         self.super = super
     61         self.methods = {}
     62         self.file = file
     63         self.lineno = lineno
     64 
     65     def _addmethod(self, name, lineno):
     66         self.methods[name] = lineno
     67 
     68 class Function:
     69     '''Class to represent a top-level Python function'''
     70     def __init__(self, module, name, file, lineno):
     71         self.module = module
     72         self.name = name
     73         self.file = file
     74         self.lineno = lineno
     75 
     76 def readmodule(module, path=None):
     77     '''Backwards compatible interface.
     78 
     79     Call readmodule_ex() and then only keep Class objects from the
     80     resulting dictionary.'''
     81 
     82     res = {}
     83     for key, value in _readmodule(module, path or []).items():
     84         if isinstance(value, Class):
     85             res[key] = value
     86     return res
     87 
     88 def readmodule_ex(module, path=None):
     89     '''Read a module file and return a dictionary of classes.
     90 
     91     Search for MODULE in PATH and sys.path, read and parse the
     92     module and return a dictionary with one entry for each class
     93     found in the module.
     94     '''
     95     return _readmodule(module, path or [])
     96 
     97 def _readmodule(module, path, inpackage=None):
     98     '''Do the hard work for readmodule[_ex].
     99 
    100     If INPACKAGE is given, it must be the dotted name of the package in
    101     which we are searching for a submodule, and then PATH must be the
    102     package search path; otherwise, we are searching for a top-level
    103     module, and PATH is combined with sys.path.
    104     '''
    105     # Compute the full module name (prepending inpackage if set)

    106     if inpackage is not None:
    107         fullmodule = "%s.%s" % (inpackage, module)
    108     else:
    109         fullmodule = module
    110 
    111     # Check in the cache

    112     if fullmodule in _modules:
    113         return _modules[fullmodule]
    114 
    115     # Initialize the dict for this module's contents

    116     dict = {}
    117 
    118     # Check if it is a built-in module; we don't do much for these

    119     if module in sys.builtin_module_names and inpackage is None:
    120         _modules[module] = dict
    121         return dict
    122 
    123     # Check for a dotted module name

    124     i = module.rfind('.')
    125     if i >= 0:
    126         package = module[:i]
    127         submodule = module[i+1:]
    128         parent = _readmodule(package, path, inpackage)
    129         if inpackage is not None:
    130             package = "%s.%s" % (inpackage, package)
    131         return _readmodule(submodule, parent['__path__'], package)
    132 
    133     # Search the path for the module

    134     f = None
    135     if inpackage is not None:
    136         f, fname, (_s, _m, ty) = imp.find_module(module, path)
    137     else:
    138         f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
    139     if ty == imp.PKG_DIRECTORY:
    140         dict['__path__'] = [fname]
    141         path = [fname] + path
    142         f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
    143     _modules[fullmodule] = dict
    144     if ty != imp.PY_SOURCE:
    145         # not Python source, can't do anything with this module

    146         f.close()
    147         return dict
    148 
    149     stack = [] # stack of (class, indent) pairs

    150 
    151     g = tokenize.generate_tokens(f.readline)
    152     try:
    153         for tokentype, token, start, _end, _line in g:
    154             if tokentype == DEDENT:
    155                 lineno, thisindent = start
    156                 # close nested classes and defs

    157                 while stack and stack[-1][1] >= thisindent:
    158                     del stack[-1]
    159             elif token == 'def':
    160                 lineno, thisindent = start
    161                 # close previous nested classes and defs

    162                 while stack and stack[-1][1] >= thisindent:
    163                     del stack[-1]
    164                 tokentype, meth_name, start = g.next()[0:3]
    165                 if tokentype != NAME:
    166                     continue # Syntax error

    167                 if stack:
    168                     cur_class = stack[-1][0]
    169                     if isinstance(cur_class, Class):
    170                         # it's a method

    171                         cur_class._addmethod(meth_name, lineno)
    172                     # else it's a nested def

    173                 else:
    174                     # it's a function

    175                     dict[meth_name] = Function(fullmodule, meth_name,
    176                                                fname, lineno)
    177                 stack.append((None, thisindent)) # Marker for nested fns

    178             elif token == 'class':
    179                 lineno, thisindent = start
    180                 # close previous nested classes and defs

    181                 while stack and stack[-1][1] >= thisindent:
    182                     del stack[-1]
    183                 tokentype, class_name, start = g.next()[0:3]
    184                 if tokentype != NAME:
    185                     continue # Syntax error

    186                 # parse what follows the class name

    187                 tokentype, token, start = g.next()[0:3]
    188                 inherit = None
    189                 if token == '(':
    190                     names = [] # List of superclasses

    191                     # there's a list of superclasses

    192                     level = 1
    193                     super = [] # Tokens making up current superclass

    194                     while True:
    195                         tokentype, token, start = g.next()[0:3]
    196                         if token in (')', ',') and level == 1:
    197                             n = "".join(super)
    198                             if n in dict:
    199                                 # we know this super class

    200                                 n = dict[n]
    201                             else:
    202                                 c = n.split('.')
    203                                 if len(c) > 1:
    204                                     # super class is of the form

    205                                     # module.class: look in module for

    206                                     # class

    207                                     m = c[-2]
    208                                     c = c[-1]
    209                                     if m in _modules:
    210                                         d = _modules[m]
    211                                         if c in d:
    212                                             n = d[c]
    213                             names.append(n)
    214                             super = []
    215                         if token == '(':
    216                             level += 1
    217                         elif token == ')':
    218                             level -= 1
    219                             if level == 0:
    220                                 break
    221                         elif token == ',' and level == 1:
    222                             pass
    223                         # only use NAME and OP (== dot) tokens for type name

    224                         elif tokentype in (NAME, OP) and level == 1:
    225                             super.append(token)
    226                         # expressions in the base list are not supported

    227                     inherit = names
    228                 cur_class = Class(fullmodule, class_name, inherit,
    229                                   fname, lineno)
    230                 if not stack:
    231                     dict[class_name] = cur_class
    232                 stack.append((cur_class, thisindent))
    233             elif token == 'import' and start[1] == 0:
    234                 modules = _getnamelist(g)
    235                 for mod, _mod2 in modules:
    236                     try:
    237                         # Recursively read the imported module

    238                         if inpackage is None:
    239                             _readmodule(mod, path)
    240                         else:
    241                             try:
    242                                 _readmodule(mod, path, inpackage)
    243                             except ImportError:
    244                                 _readmodule(mod, [])
    245                     except:
    246                         # If we can't find or parse the imported module,

    247                         # too bad -- don't die here.

    248                         pass
    249             elif token == 'from' and start[1] == 0:
    250                 mod, token = _getname(g)
    251                 if not mod or token != "import":
    252                     continue
    253                 names = _getnamelist(g)
    254                 try:
    255                     # Recursively read the imported module

    256                     d = _readmodule(mod, path, inpackage)
    257                 except:
    258                     # If we can't find or parse the imported module,

    259                     # too bad -- don't die here.

    260                     continue
    261                 # add any classes that were defined in the imported module

    262                 # to our name space if they were mentioned in the list

    263                 for n, n2 in names:
    264                     if n in d:
    265                         dict[n2 or n] = d[n]
    266                     elif n == '*':
    267                         # don't add names that start with _

    268                         for n in d:
    269                             if n[0] != '_':
    270                                 dict[n] = d[n]
    271     except StopIteration:
    272         pass
    273 
    274     f.close()
    275     return dict
    276 
    277 def _getnamelist(g):
    278     # Helper to get a comma-separated list of dotted names plus 'as'

    279     # clauses.  Return a list of pairs (name, name2) where name2 is

    280     # the 'as' name, or None if there is no 'as' clause.

    281     names = []
    282     while True:
    283         name, token = _getname(g)
    284         if not name:
    285             break
    286         if token == 'as':
    287             name2, token = _getname(g)
    288         else:
    289             name2 = None
    290         names.append((name, name2))
    291         while token != "," and "\n" not in token:
    292             token = g.next()[1]
    293         if token != ",":
    294             break
    295     return names
    296 
    297 def _getname(g):
    298     # Helper to get a dotted name, return a pair (name, token) where

    299     # name is the dotted name, or None if there was no dotted name,

    300     # and token is the next input token.

    301     parts = []
    302     tokentype, token = g.next()[0:2]
    303     if tokentype != NAME and token != '*':
    304         return (None, token)
    305     parts.append(token)
    306     while True:
    307         tokentype, token = g.next()[0:2]
    308         if token != '.':
    309             break
    310         tokentype, token = g.next()[0:2]
    311         if tokentype != NAME:
    312             break
    313         parts.append(token)
    314     return (".".join(parts), token)
    315 
    316 def _main():
    317     # Main program for testing.

    318     import os
    319     mod = sys.argv[1]
    320     if os.path.exists(mod):
    321         path = [os.path.dirname(mod)]
    322         mod = os.path.basename(mod)
    323         if mod.lower().endswith(".py"):
    324             mod = mod[:-3]
    325     else:
    326         path = []
    327     dict = readmodule_ex(mod, path)
    328     objs = dict.values()
    329     objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
    330                                getattr(b, 'lineno', 0)))
    331     for obj in objs:
    332         if isinstance(obj, Class):
    333             print "class", obj.name, obj.super, obj.lineno
    334             methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
    335             for name, lineno in methods:
    336                 if name != "__path__":
    337                     print "  def", name, lineno
    338         elif isinstance(obj, Function):
    339             print "def", obj.name, obj.lineno
    340 
    341 if __name__ == "__main__":
    342     _main()
    343