Home | History | Annotate | Download | only in Lib
      1 """Parse a Python module and describe its classes and methods.
      2 
      3 Parse enough of a Python file to recognize imports and class and
      4 method definitions, and to find out the superclasses of a class.
      5 
      6 The interface consists of a single function:
      7         readmodule_ex(module [, path])
      8 where module is the name of a Python module, and path is an optional
      9 list of directories where the module is to be searched.  If present,
     10 path is prepended to the system search path sys.path.  The return
     11 value is a dictionary.  The keys of the dictionary are the names of
     12 the classes defined in the module (including classes that are defined
     13 via the from XXX import YYY construct).  The values are class
     14 instances of the class Class defined here.  One special key/value pair
     15 is present for packages: the key '__path__' has a list as its value
     16 which contains the package search path.
     17 
     18 A class is described by the class Class in this module.  Instances
     19 of this class have the following instance variables:
     20         module -- the module name
     21         name -- the name of the class
     22         super -- a list of super classes (Class instances)
     23         methods -- a dictionary of methods
     24         file -- the file in which the class was defined
     25         lineno -- the line in the file on which the class statement occurred
     26 The dictionary of methods uses the method names as keys and the line
     27 numbers on which the method was defined as values.
     28 If the name of a super class is not recognized, the corresponding
     29 entry in the list of super classes is not a class instance but a
     30 string giving the name of the super class.  Since import statements
     31 are recognized and imported modules are scanned as well, this
     32 shouldn't happen often.
     33 
     34 A function is described by the class Function in this module.
     35 Instances of this class have the following instance variables:
     36         module -- the module name
     37         name -- the name of the class
     38         file -- the file in which the class was defined
     39         lineno -- the line in the file on which the class statement occurred
     40 """
     41 
     42 import sys
     43 import imp
     44 import tokenize
     45 from token import NAME, DEDENT, OP
     46 from operator import itemgetter
     47 
     48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
     49 
     50 _modules = {}                           # cache of modules we've seen
     51 
     52 # each Python class is represented by an instance of this class
     53 class Class:
     54     '''Class to represent a Python class.'''
     55     def __init__(self, module, name, super, file, lineno):
     56         self.module = module
     57         self.name = name
     58         if super is None:
     59             super = []
     60         self.super = super
     61         self.methods = {}
     62         self.file = file
     63         self.lineno = lineno
     64 
     65     def _addmethod(self, name, lineno):
     66         self.methods[name] = lineno
     67 
     68 class Function:
     69     '''Class to represent a top-level Python function'''
     70     def __init__(self, module, name, file, lineno):
     71         self.module = module
     72         self.name = name
     73         self.file = file
     74         self.lineno = lineno
     75 
     76 def readmodule(module, path=None):
     77     '''Backwards compatible interface.
     78 
     79     Call readmodule_ex() and then only keep Class objects from the
     80     resulting dictionary.'''
     81 
     82     res = {}
     83     for key, value in _readmodule(module, path or []).items():
     84         if isinstance(value, Class):
     85             res[key] = value
     86     return res
     87 
     88 def readmodule_ex(module, path=None):
     89     '''Read a module file and return a dictionary of classes.
     90 
     91     Search for MODULE in PATH and sys.path, read and parse the
     92     module and return a dictionary with one entry for each class
     93     found in the module.
     94     '''
     95     return _readmodule(module, path or [])
     96 
     97 def _readmodule(module, path, inpackage=None):
     98     '''Do the hard work for readmodule[_ex].
     99 
    100     If INPACKAGE is given, it must be the dotted name of the package in
    101     which we are searching for a submodule, and then PATH must be the
    102     package search path; otherwise, we are searching for a top-level
    103     module, and PATH is combined with sys.path.
    104     '''
    105     # Compute the full module name (prepending inpackage if set)
    106     if inpackage is not None:
    107         fullmodule = "%s.%s" % (inpackage, module)
    108     else:
    109         fullmodule = module
    110 
    111     # Check in the cache
    112     if fullmodule in _modules:
    113         return _modules[fullmodule]
    114 
    115     # Initialize the dict for this module's contents
    116     dict = {}
    117 
    118     # Check if it is a built-in module; we don't do much for these
    119     if module in sys.builtin_module_names and inpackage is None:
    120         _modules[module] = dict
    121         return dict
    122 
    123     # Check for a dotted module name
    124     i = module.rfind('.')
    125     if i >= 0:
    126         package = module[:i]
    127         submodule = module[i+1:]
    128         parent = _readmodule(package, path, inpackage)
    129         if inpackage is not None:
    130             package = "%s.%s" % (inpackage, package)
    131         if not '__path__' in parent:
    132             raise ImportError('No package named {}'.format(package))
    133         return _readmodule(submodule, parent['__path__'], package)
    134 
    135     # Search the path for the module
    136     f = None
    137     if inpackage is not None:
    138         f, fname, (_s, _m, ty) = imp.find_module(module, path)
    139     else:
    140         f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
    141     if ty == imp.PKG_DIRECTORY:
    142         dict['__path__'] = [fname]
    143         path = [fname] + path
    144         f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
    145     _modules[fullmodule] = dict
    146     if ty != imp.PY_SOURCE:
    147         # not Python source, can't do anything with this module
    148         f.close()
    149         return dict
    150 
    151     stack = [] # stack of (class, indent) pairs
    152 
    153     g = tokenize.generate_tokens(f.readline)
    154     try:
    155         for tokentype, token, start, _end, _line in g:
    156             if tokentype == DEDENT:
    157                 lineno, thisindent = start
    158                 # close nested classes and defs
    159                 while stack and stack[-1][1] >= thisindent:
    160                     del stack[-1]
    161             elif token == 'def':
    162                 lineno, thisindent = start
    163                 # close previous nested classes and defs
    164                 while stack and stack[-1][1] >= thisindent:
    165                     del stack[-1]
    166                 tokentype, meth_name, start = g.next()[0:3]
    167                 if tokentype != NAME:
    168                     continue # Syntax error
    169                 if stack:
    170                     cur_class = stack[-1][0]
    171                     if isinstance(cur_class, Class):
    172                         # it's a method
    173                         cur_class._addmethod(meth_name, lineno)
    174                     # else it's a nested def
    175                 else:
    176                     # it's a function
    177                     dict[meth_name] = Function(fullmodule, meth_name,
    178                                                fname, lineno)
    179                 stack.append((None, thisindent)) # Marker for nested fns
    180             elif token == 'class':
    181                 lineno, thisindent = start
    182                 # close previous nested classes and defs
    183                 while stack and stack[-1][1] >= thisindent:
    184                     del stack[-1]
    185                 tokentype, class_name, start = g.next()[0:3]
    186                 if tokentype != NAME:
    187                     continue # Syntax error
    188                 # parse what follows the class name
    189                 tokentype, token, start = g.next()[0:3]
    190                 inherit = None
    191                 if token == '(':
    192                     names = [] # List of superclasses
    193                     # there's a list of superclasses
    194                     level = 1
    195                     super = [] # Tokens making up current superclass
    196                     while True:
    197                         tokentype, token, start = g.next()[0:3]
    198                         if token in (')', ',') and level == 1:
    199                             n = "".join(super)
    200                             if n in dict:
    201                                 # we know this super class
    202                                 n = dict[n]
    203                             else:
    204                                 c = n.split('.')
    205                                 if len(c) > 1:
    206                                     # super class is of the form
    207                                     # module.class: look in module for
    208                                     # class
    209                                     m = c[-2]
    210                                     c = c[-1]
    211                                     if m in _modules:
    212                                         d = _modules[m]
    213                                         if c in d:
    214                                             n = d[c]
    215                             names.append(n)
    216                             super = []
    217                         if token == '(':
    218                             level += 1
    219                         elif token == ')':
    220                             level -= 1
    221                             if level == 0:
    222                                 break
    223                         elif token == ',' and level == 1:
    224                             pass
    225                         # only use NAME and OP (== dot) tokens for type name
    226                         elif tokentype in (NAME, OP) and level == 1:
    227                             super.append(token)
    228                         # expressions in the base list are not supported
    229                     inherit = names
    230                 cur_class = Class(fullmodule, class_name, inherit,
    231                                   fname, lineno)
    232                 if not stack:
    233                     dict[class_name] = cur_class
    234                 stack.append((cur_class, thisindent))
    235             elif token == 'import' and start[1] == 0:
    236                 modules = _getnamelist(g)
    237                 for mod, _mod2 in modules:
    238                     try:
    239                         # Recursively read the imported module
    240                         if inpackage is None:
    241                             _readmodule(mod, path)
    242                         else:
    243                             try:
    244                                 _readmodule(mod, path, inpackage)
    245                             except ImportError:
    246                                 _readmodule(mod, [])
    247                     except:
    248                         # If we can't find or parse the imported module,
    249                         # too bad -- don't die here.
    250                         pass
    251             elif token == 'from' and start[1] == 0:
    252                 mod, token = _getname(g)
    253                 if not mod or token != "import":
    254                     continue
    255                 names = _getnamelist(g)
    256                 try:
    257                     # Recursively read the imported module
    258                     d = _readmodule(mod, path, inpackage)
    259                 except:
    260                     # If we can't find or parse the imported module,
    261                     # too bad -- don't die here.
    262                     continue
    263                 # add any classes that were defined in the imported module
    264                 # to our name space if they were mentioned in the list
    265                 for n, n2 in names:
    266                     if n in d:
    267                         dict[n2 or n] = d[n]
    268                     elif n == '*':
    269                         # don't add names that start with _
    270                         for n in d:
    271                             if n[0] != '_':
    272                                 dict[n] = d[n]
    273     except StopIteration:
    274         pass
    275 
    276     f.close()
    277     return dict
    278 
    279 def _getnamelist(g):
    280     # Helper to get a comma-separated list of dotted names plus 'as'
    281     # clauses.  Return a list of pairs (name, name2) where name2 is
    282     # the 'as' name, or None if there is no 'as' clause.
    283     names = []
    284     while True:
    285         name, token = _getname(g)
    286         if not name:
    287             break
    288         if token == 'as':
    289             name2, token = _getname(g)
    290         else:
    291             name2 = None
    292         names.append((name, name2))
    293         while token != "," and "\n" not in token:
    294             token = g.next()[1]
    295         if token != ",":
    296             break
    297     return names
    298 
    299 def _getname(g):
    300     # Helper to get a dotted name, return a pair (name, token) where
    301     # name is the dotted name, or None if there was no dotted name,
    302     # and token is the next input token.
    303     parts = []
    304     tokentype, token = g.next()[0:2]
    305     if tokentype != NAME and token != '*':
    306         return (None, token)
    307     parts.append(token)
    308     while True:
    309         tokentype, token = g.next()[0:2]
    310         if token != '.':
    311             break
    312         tokentype, token = g.next()[0:2]
    313         if tokentype != NAME:
    314             break
    315         parts.append(token)
    316     return (".".join(parts), token)
    317 
    318 def _main():
    319     # Main program for testing.
    320     import os
    321     mod = sys.argv[1]
    322     if os.path.exists(mod):
    323         path = [os.path.dirname(mod)]
    324         mod = os.path.basename(mod)
    325         if mod.lower().endswith(".py"):
    326             mod = mod[:-3]
    327     else:
    328         path = []
    329     dict = readmodule_ex(mod, path)
    330     objs = dict.values()
    331     objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
    332                                getattr(b, 'lineno', 0)))
    333     for obj in objs:
    334         if isinstance(obj, Class):
    335             print "class", obj.name, obj.super, obj.lineno
    336             methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
    337             for name, lineno in methods:
    338                 if name != "__path__":
    339                     print "  def", name, lineno
    340         elif isinstance(obj, Function):
    341             print "def", obj.name, obj.lineno
    342 
    343 if __name__ == "__main__":
    344     _main()
    345