Home | History | Annotate | Download | only in Lib
      1 """Parse a Python module and describe its classes and methods.
      2 
      3 Parse enough of a Python file to recognize imports and class and
      4 method definitions, and to find out the superclasses of a class.
      5 
      6 The interface consists of a single function:
      7         readmodule_ex(module [, path])
      8 where module is the name of a Python module, and path is an optional
      9 list of directories where the module is to be searched.  If present,
     10 path is prepended to the system search path sys.path.  The return
     11 value is a dictionary.  The keys of the dictionary are the names of
     12 the classes defined in the module (including classes that are defined
     13 via the from XXX import YYY construct).  The values are class
     14 instances of the class Class defined here.  One special key/value pair
     15 is present for packages: the key '__path__' has a list as its value
     16 which contains the package search path.
     17 
     18 A class is described by the class Class in this module.  Instances
     19 of this class have the following instance variables:
     20         module -- the module name
     21         name -- the name of the class
     22         super -- a list of super classes (Class instances)
     23         methods -- a dictionary of methods
     24         file -- the file in which the class was defined
     25         lineno -- the line in the file on which the class statement occurred
     26 The dictionary of methods uses the method names as keys and the line
     27 numbers on which the method was defined as values.
     28 If the name of a super class is not recognized, the corresponding
     29 entry in the list of super classes is not a class instance but a
     30 string giving the name of the super class.  Since import statements
     31 are recognized and imported modules are scanned as well, this
     32 shouldn't happen often.
     33 
     34 A function is described by the class Function in this module.
     35 Instances of this class have the following instance variables:
     36         module -- the module name
     37         name -- the name of the class
     38         file -- the file in which the class was defined
     39         lineno -- the line in the file on which the class statement occurred
     40 """
     41 
     42 import io
     43 import sys
     44 import importlib.util
     45 import tokenize
     46 from token import NAME, DEDENT, OP
     47 
     48 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
     49 
     50 _modules = {}                           # cache of modules we've seen
     51 
     52 # each Python class is represented by an instance of this class
     53 class Class:
     54     '''Class to represent a Python class.'''
     55     def __init__(self, module, name, super, file, lineno):
     56         self.module = module
     57         self.name = name
     58         if super is None:
     59             super = []
     60         self.super = super
     61         self.methods = {}
     62         self.file = file
     63         self.lineno = lineno
     64 
     65     def _addmethod(self, name, lineno):
     66         self.methods[name] = lineno
     67 
     68 class Function:
     69     '''Class to represent a top-level Python function'''
     70     def __init__(self, module, name, file, lineno):
     71         self.module = module
     72         self.name = name
     73         self.file = file
     74         self.lineno = lineno
     75 
     76 def readmodule(module, path=None):
     77     '''Backwards compatible interface.
     78 
     79     Call readmodule_ex() and then only keep Class objects from the
     80     resulting dictionary.'''
     81 
     82     res = {}
     83     for key, value in _readmodule(module, path or []).items():
     84         if isinstance(value, Class):
     85             res[key] = value
     86     return res
     87 
     88 def readmodule_ex(module, path=None):
     89     '''Read a module file and return a dictionary of classes.
     90 
     91     Search for MODULE in PATH and sys.path, read and parse the
     92     module and return a dictionary with one entry for each class
     93     found in the module.
     94     '''
     95     return _readmodule(module, path or [])
     96 
     97 def _readmodule(module, path, inpackage=None):
     98     '''Do the hard work for readmodule[_ex].
     99 
    100     If INPACKAGE is given, it must be the dotted name of the package in
    101     which we are searching for a submodule, and then PATH must be the
    102     package search path; otherwise, we are searching for a top-level
    103     module, and PATH is combined with sys.path.
    104     '''
    105     # Compute the full module name (prepending inpackage if set)
    106     if inpackage is not None:
    107         fullmodule = "%s.%s" % (inpackage, module)
    108     else:
    109         fullmodule = module
    110 
    111     # Check in the cache
    112     if fullmodule in _modules:
    113         return _modules[fullmodule]
    114 
    115     # Initialize the dict for this module's contents
    116     dict = {}
    117 
    118     # Check if it is a built-in module; we don't do much for these
    119     if module in sys.builtin_module_names and inpackage is None:
    120         _modules[module] = dict
    121         return dict
    122 
    123     # Check for a dotted module name
    124     i = module.rfind('.')
    125     if i >= 0:
    126         package = module[:i]
    127         submodule = module[i+1:]
    128         parent = _readmodule(package, path, inpackage)
    129         if inpackage is not None:
    130             package = "%s.%s" % (inpackage, package)
    131         if not '__path__' in parent:
    132             raise ImportError('No package named {}'.format(package))
    133         return _readmodule(submodule, parent['__path__'], package)
    134 
    135     # Search the path for the module
    136     f = None
    137     if inpackage is not None:
    138         search_path = path
    139     else:
    140         search_path = path + sys.path
    141     # XXX This will change once issue19944 lands.
    142     spec = importlib.util._find_spec_from_path(fullmodule, search_path)
    143     _modules[fullmodule] = dict
    144     # is module a package?
    145     if spec.submodule_search_locations is not None:
    146         dict['__path__'] = spec.submodule_search_locations
    147     try:
    148         source = spec.loader.get_source(fullmodule)
    149         if source is None:
    150             return dict
    151     except (AttributeError, ImportError):
    152         # not Python source, can't do anything with this module
    153         return dict
    154 
    155     fname = spec.loader.get_filename(fullmodule)
    156 
    157     f = io.StringIO(source)
    158 
    159     stack = [] # stack of (class, indent) pairs
    160 
    161     g = tokenize.generate_tokens(f.readline)
    162     try:
    163         for tokentype, token, start, _end, _line in g:
    164             if tokentype == DEDENT:
    165                 lineno, thisindent = start
    166                 # close nested classes and defs
    167                 while stack and stack[-1][1] >= thisindent:
    168                     del stack[-1]
    169             elif token == 'def':
    170                 lineno, thisindent = start
    171                 # close previous nested classes and defs
    172                 while stack and stack[-1][1] >= thisindent:
    173                     del stack[-1]
    174                 tokentype, meth_name, start = next(g)[0:3]
    175                 if tokentype != NAME:
    176                     continue # Syntax error
    177                 if stack:
    178                     cur_class = stack[-1][0]
    179                     if isinstance(cur_class, Class):
    180                         # it's a method
    181                         cur_class._addmethod(meth_name, lineno)
    182                     # else it's a nested def
    183                 else:
    184                     # it's a function
    185                     dict[meth_name] = Function(fullmodule, meth_name,
    186                                                fname, lineno)
    187                 stack.append((None, thisindent)) # Marker for nested fns
    188             elif token == 'class':
    189                 lineno, thisindent = start
    190                 # close previous nested classes and defs
    191                 while stack and stack[-1][1] >= thisindent:
    192                     del stack[-1]
    193                 tokentype, class_name, start = next(g)[0:3]
    194                 if tokentype != NAME:
    195                     continue # Syntax error
    196                 # parse what follows the class name
    197                 tokentype, token, start = next(g)[0:3]
    198                 inherit = None
    199                 if token == '(':
    200                     names = [] # List of superclasses
    201                     # there's a list of superclasses
    202                     level = 1
    203                     super = [] # Tokens making up current superclass
    204                     while True:
    205                         tokentype, token, start = next(g)[0:3]
    206                         if token in (')', ',') and level == 1:
    207                             n = "".join(super)
    208                             if n in dict:
    209                                 # we know this super class
    210                                 n = dict[n]
    211                             else:
    212                                 c = n.split('.')
    213                                 if len(c) > 1:
    214                                     # super class is of the form
    215                                     # module.class: look in module for
    216                                     # class
    217                                     m = c[-2]
    218                                     c = c[-1]
    219                                     if m in _modules:
    220                                         d = _modules[m]
    221                                         if c in d:
    222                                             n = d[c]
    223                             names.append(n)
    224                             super = []
    225                         if token == '(':
    226                             level += 1
    227                         elif token == ')':
    228                             level -= 1
    229                             if level == 0:
    230                                 break
    231                         elif token == ',' and level == 1:
    232                             pass
    233                         # only use NAME and OP (== dot) tokens for type name
    234                         elif tokentype in (NAME, OP) and level == 1:
    235                             super.append(token)
    236                         # expressions in the base list are not supported
    237                     inherit = names
    238                 cur_class = Class(fullmodule, class_name, inherit,
    239                                   fname, lineno)
    240                 if not stack:
    241                     dict[class_name] = cur_class
    242                 stack.append((cur_class, thisindent))
    243             elif token == 'import' and start[1] == 0:
    244                 modules = _getnamelist(g)
    245                 for mod, _mod2 in modules:
    246                     try:
    247                         # Recursively read the imported module
    248                         if inpackage is None:
    249                             _readmodule(mod, path)
    250                         else:
    251                             try:
    252                                 _readmodule(mod, path, inpackage)
    253                             except ImportError:
    254                                 _readmodule(mod, [])
    255                     except:
    256                         # If we can't find or parse the imported module,
    257                         # too bad -- don't die here.
    258                         pass
    259             elif token == 'from' and start[1] == 0:
    260                 mod, token = _getname(g)
    261                 if not mod or token != "import":
    262                     continue
    263                 names = _getnamelist(g)
    264                 try:
    265                     # Recursively read the imported module
    266                     d = _readmodule(mod, path, inpackage)
    267                 except:
    268                     # If we can't find or parse the imported module,
    269                     # too bad -- don't die here.
    270                     continue
    271                 # add any classes that were defined in the imported module
    272                 # to our name space if they were mentioned in the list
    273                 for n, n2 in names:
    274                     if n in d:
    275                         dict[n2 or n] = d[n]
    276                     elif n == '*':
    277                         # don't add names that start with _
    278                         for n in d:
    279                             if n[0] != '_':
    280                                 dict[n] = d[n]
    281     except StopIteration:
    282         pass
    283 
    284     f.close()
    285     return dict
    286 
    287 def _getnamelist(g):
    288     # Helper to get a comma-separated list of dotted names plus 'as'
    289     # clauses.  Return a list of pairs (name, name2) where name2 is
    290     # the 'as' name, or None if there is no 'as' clause.
    291     names = []
    292     while True:
    293         name, token = _getname(g)
    294         if not name:
    295             break
    296         if token == 'as':
    297             name2, token = _getname(g)
    298         else:
    299             name2 = None
    300         names.append((name, name2))
    301         while token != "," and "\n" not in token:
    302             token = next(g)[1]
    303         if token != ",":
    304             break
    305     return names
    306 
    307 def _getname(g):
    308     # Helper to get a dotted name, return a pair (name, token) where
    309     # name is the dotted name, or None if there was no dotted name,
    310     # and token is the next input token.
    311     parts = []
    312     tokentype, token = next(g)[0:2]
    313     if tokentype != NAME and token != '*':
    314         return (None, token)
    315     parts.append(token)
    316     while True:
    317         tokentype, token = next(g)[0:2]
    318         if token != '.':
    319             break
    320         tokentype, token = next(g)[0:2]
    321         if tokentype != NAME:
    322             break
    323         parts.append(token)
    324     return (".".join(parts), token)
    325 
    326 def _main():
    327     # Main program for testing.
    328     import os
    329     from operator import itemgetter
    330     mod = sys.argv[1]
    331     if os.path.exists(mod):
    332         path = [os.path.dirname(mod)]
    333         mod = os.path.basename(mod)
    334         if mod.lower().endswith(".py"):
    335             mod = mod[:-3]
    336     else:
    337         path = []
    338     dict = readmodule_ex(mod, path)
    339     objs = list(dict.values())
    340     objs.sort(key=lambda a: getattr(a, 'lineno', 0))
    341     for obj in objs:
    342         if isinstance(obj, Class):
    343             print("class", obj.name, obj.super, obj.lineno)
    344             methods = sorted(obj.methods.items(), key=itemgetter(1))
    345             for name, lineno in methods:
    346                 if name != "__path__":
    347                     print("  def", name, lineno)
    348         elif isinstance(obj, Function):
    349             print("def", obj.name, obj.lineno)
    350 
    351 if __name__ == "__main__":
    352     _main()
    353