Home | History | Annotate | Download | only in parser
      1 """Simple code to extract class & function docstrings from a module.
      2 
      3 This code is used as an example in the library reference manual in the
      4 section on using the parser module.  Refer to the manual for a thorough
      5 discussion of the operation of this code.
      6 """
      7 
      8 import os
      9 import parser
     10 import symbol
     11 import token
     12 import types
     13 
     14 from types import ListType, TupleType
     15 
     16 
     17 def get_docs(fileName):
     18     """Retrieve information from the parse tree of a source file.
     19 
     20     fileName
     21         Name of the file to read Python source code from.
     22     """
     23     source = open(fileName).read()
     24     basename = os.path.basename(os.path.splitext(fileName)[0])
     25     ast = parser.suite(source)
     26     return ModuleInfo(ast.totuple(), basename)
     27 
     28 
     29 class SuiteInfoBase:
     30     _docstring = ''
     31     _name = ''
     32 
     33     def __init__(self, tree = None):
     34         self._class_info = {}
     35         self._function_info = {}
     36         if tree:
     37             self._extract_info(tree)
     38 
     39     def _extract_info(self, tree):
     40         # extract docstring
     41         if len(tree) == 2:
     42             found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
     43         else:
     44             found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
     45         if found:
     46             self._docstring = eval(vars['docstring'])
     47         # discover inner definitions
     48         for node in tree[1:]:
     49             found, vars = match(COMPOUND_STMT_PATTERN, node)
     50             if found:
     51                 cstmt = vars['compound']
     52                 if cstmt[0] == symbol.funcdef:
     53                     name = cstmt[2][1]
     54                     self._function_info[name] = FunctionInfo(cstmt)
     55                 elif cstmt[0] == symbol.classdef:
     56                     name = cstmt[2][1]
     57                     self._class_info[name] = ClassInfo(cstmt)
     58 
     59     def get_docstring(self):
     60         return self._docstring
     61 
     62     def get_name(self):
     63         return self._name
     64 
     65     def get_class_names(self):
     66         return self._class_info.keys()
     67 
     68     def get_class_info(self, name):
     69         return self._class_info[name]
     70 
     71     def __getitem__(self, name):
     72         try:
     73             return self._class_info[name]
     74         except KeyError:
     75             return self._function_info[name]
     76 
     77 
     78 class SuiteFuncInfo:
     79     #  Mixin class providing access to function names and info.
     80 
     81     def get_function_names(self):
     82         return self._function_info.keys()
     83 
     84     def get_function_info(self, name):
     85         return self._function_info[name]
     86 
     87 
     88 class FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
     89     def __init__(self, tree = None):
     90         self._name = tree[2][1]
     91         SuiteInfoBase.__init__(self, tree and tree[-1] or None)
     92 
     93 
     94 class ClassInfo(SuiteInfoBase):
     95     def __init__(self, tree = None):
     96         self._name = tree[2][1]
     97         SuiteInfoBase.__init__(self, tree and tree[-1] or None)
     98 
     99     def get_method_names(self):
    100         return self._function_info.keys()
    101 
    102     def get_method_info(self, name):
    103         return self._function_info[name]
    104 
    105 
    106 class ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
    107     def __init__(self, tree = None, name = "<string>"):
    108         self._name = name
    109         SuiteInfoBase.__init__(self, tree)
    110         if tree:
    111             found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
    112             if found:
    113                 self._docstring = vars["docstring"]
    114 
    115 
    116 def match(pattern, data, vars=None):
    117     """Match `data' to `pattern', with variable extraction.
    118 
    119     pattern
    120         Pattern to match against, possibly containing variables.
    121 
    122     data
    123         Data to be checked and against which variables are extracted.
    124 
    125     vars
    126         Dictionary of variables which have already been found.  If not
    127         provided, an empty dictionary is created.
    128 
    129     The `pattern' value may contain variables of the form ['varname'] which
    130     are allowed to match anything.  The value that is matched is returned as
    131     part of a dictionary which maps 'varname' to the matched value.  'varname'
    132     is not required to be a string object, but using strings makes patterns
    133     and the code which uses them more readable.
    134 
    135     This function returns two values: a boolean indicating whether a match
    136     was found and a dictionary mapping variable names to their associated
    137     values.
    138     """
    139     if vars is None:
    140         vars = {}
    141     if type(pattern) is ListType:       # 'variables' are ['varname']
    142         vars[pattern[0]] = data
    143         return 1, vars
    144     if type(pattern) is not TupleType:
    145         return (pattern == data), vars
    146     if len(data) != len(pattern):
    147         return 0, vars
    148     for pattern, data in map(None, pattern, data):
    149         same, vars = match(pattern, data, vars)
    150         if not same:
    151             break
    152     return same, vars
    153 
    154 
    155 #  This pattern identifies compound statements, allowing them to be readily
    156 #  differentiated from simple statements.
    157 #
    158 COMPOUND_STMT_PATTERN = (
    159     symbol.stmt,
    160     (symbol.compound_stmt, ['compound'])
    161     )
    162 
    163 
    164 #  This pattern will match a 'stmt' node which *might* represent a docstring;
    165 #  docstrings require that the statement which provides the docstring be the
    166 #  first statement in the class or function, which this pattern does not check.
    167 #
    168 DOCSTRING_STMT_PATTERN = (
    169     symbol.stmt,
    170     (symbol.simple_stmt,
    171      (symbol.small_stmt,
    172       (symbol.expr_stmt,
    173        (symbol.testlist,
    174         (symbol.test,
    175          (symbol.and_test,
    176           (symbol.not_test,
    177            (symbol.comparison,
    178             (symbol.expr,
    179              (symbol.xor_expr,
    180               (symbol.and_expr,
    181                (symbol.shift_expr,
    182                 (symbol.arith_expr,
    183                  (symbol.term,
    184                   (symbol.factor,
    185                    (symbol.power,
    186                     (symbol.atom,
    187                      (token.STRING, ['docstring'])
    188                      )))))))))))))))),
    189      (token.NEWLINE, '')
    190      ))
    191