Home | History | Annotate | Download | only in compiler
      1 """Generate ast module from specification
      2 
      3 This script generates the ast module from a simple specification,
      4 which makes it easy to accomodate changes in the grammar.  This
      5 approach would be quite reasonable if the grammar changed often.
      6 Instead, it is rather complex to generate the appropriate code.  And
      7 the Node interface has changed more often than the grammar.
      8 """
      9 
     10 import fileinput
     11 import re
     12 import sys
     13 from StringIO import StringIO
     14 
     15 SPEC = "ast.txt"
     16 COMMA = ", "
     17 
     18 def load_boilerplate(file):
     19     f = open(file)
     20     buf = f.read()
     21     f.close()
     22     i = buf.find('### ''PROLOGUE')
     23     j = buf.find('### ''EPILOGUE')
     24     pro = buf[i+12:j].strip()
     25     epi = buf[j+12:].strip()
     26     return pro, epi
     27 
     28 def strip_default(arg):
     29     """Return the argname from an 'arg = default' string"""
     30     i = arg.find('=')
     31     if i == -1:
     32         return arg
     33     t = arg[:i].strip()
     34     return t
     35 
     36 P_NODE = 1
     37 P_OTHER = 2
     38 P_NESTED = 3
     39 P_NONE = 4
     40 
     41 class NodeInfo:
     42     """Each instance describes a specific AST node"""
     43     def __init__(self, name, args):
     44         self.name = name
     45         self.args = args.strip()
     46         self.argnames = self.get_argnames()
     47         self.argprops = self.get_argprops()
     48         self.nargs = len(self.argnames)
     49         self.init = []
     50 
     51     def get_argnames(self):
     52         if '(' in self.args:
     53             i = self.args.find('(')
     54             j = self.args.rfind(')')
     55             args = self.args[i+1:j]
     56         else:
     57             args = self.args
     58         return [strip_default(arg.strip())
     59                 for arg in args.split(',') if arg]
     60 
     61     def get_argprops(self):
     62         """Each argument can have a property like '*' or '!'
     63 
     64         XXX This method modifies the argnames in place!
     65         """
     66         d = {}
     67         hardest_arg = P_NODE
     68         for i in range(len(self.argnames)):
     69             arg = self.argnames[i]
     70             if arg.endswith('*'):
     71                 arg = self.argnames[i] = arg[:-1]
     72                 d[arg] = P_OTHER
     73                 hardest_arg = max(hardest_arg, P_OTHER)
     74             elif arg.endswith('!'):
     75                 arg = self.argnames[i] = arg[:-1]
     76                 d[arg] = P_NESTED
     77                 hardest_arg = max(hardest_arg, P_NESTED)
     78             elif arg.endswith('&'):
     79                 arg = self.argnames[i] = arg[:-1]
     80                 d[arg] = P_NONE
     81                 hardest_arg = max(hardest_arg, P_NONE)
     82             else:
     83                 d[arg] = P_NODE
     84         self.hardest_arg = hardest_arg
     85 
     86         if hardest_arg > P_NODE:
     87             self.args = self.args.replace('*', '')
     88             self.args = self.args.replace('!', '')
     89             self.args = self.args.replace('&', '')
     90 
     91         return d
     92 
     93     def gen_source(self):
     94         buf = StringIO()
     95         print >> buf, "class %s(Node):" % self.name
     96         self._gen_init(buf)
     97         print >> buf
     98         self._gen_getChildren(buf)
     99         print >> buf
    100         self._gen_getChildNodes(buf)
    101         print >> buf
    102         self._gen_repr(buf)
    103         buf.seek(0, 0)
    104         return buf.read()
    105 
    106     def _gen_init(self, buf):
    107         if self.args:
    108             argtuple = '(' in self.args
    109             args = self.args if not argtuple else ''.join(self.argnames)
    110             print >> buf, "    def __init__(self, %s, lineno=None):" % args
    111         else:
    112             print >> buf, "    def __init__(self, lineno=None):"
    113         if self.argnames:
    114             if argtuple:
    115                 for idx, name in enumerate(self.argnames):
    116                     print >> buf, "        self.%s = %s[%s]" % (name, args, idx)
    117             else:
    118                 for name in self.argnames:
    119                     print >> buf, "        self.%s = %s" % (name, name)
    120         print >> buf, "        self.lineno = lineno"
    121         # Copy the lines in self.init, indented four spaces.  The rstrip()

    122         # business is to get rid of the four spaces if line happens to be

    123         # empty, so that reindent.py is happy with the output.

    124         for line in self.init:
    125             print >> buf, ("    " + line).rstrip()
    126 
    127     def _gen_getChildren(self, buf):
    128         print >> buf, "    def getChildren(self):"
    129         if len(self.argnames) == 0:
    130             print >> buf, "        return ()"
    131         else:
    132             if self.hardest_arg < P_NESTED:
    133                 clist = COMMA.join(["self.%s" % c
    134                                     for c in self.argnames])
    135                 if self.nargs == 1:
    136                     print >> buf, "        return %s," % clist
    137                 else:
    138                     print >> buf, "        return %s" % clist
    139             else:
    140                 if len(self.argnames) == 1:
    141                     print >> buf, "        return tuple(flatten(self.%s))" % self.argnames[0]
    142                 else:
    143                     print >> buf, "        children = []"
    144                     template = "        children.%s(%sself.%s%s)"
    145                     for name in self.argnames:
    146                         if self.argprops[name] == P_NESTED:
    147                             print >> buf, template % ("extend", "flatten(",
    148                                                       name, ")")
    149                         else:
    150                             print >> buf, template % ("append", "", name, "")
    151                     print >> buf, "        return tuple(children)"
    152 
    153     def _gen_getChildNodes(self, buf):
    154         print >> buf, "    def getChildNodes(self):"
    155         if len(self.argnames) == 0:
    156             print >> buf, "        return ()"
    157         else:
    158             if self.hardest_arg < P_NESTED:
    159                 clist = ["self.%s" % c
    160                          for c in self.argnames
    161                          if self.argprops[c] == P_NODE]
    162                 if len(clist) == 0:
    163                     print >> buf, "        return ()"
    164                 elif len(clist) == 1:
    165                     print >> buf, "        return %s," % clist[0]
    166                 else:
    167                     print >> buf, "        return %s" % COMMA.join(clist)
    168             else:
    169                 print >> buf, "        nodelist = []"
    170                 template = "        nodelist.%s(%sself.%s%s)"
    171                 for name in self.argnames:
    172                     if self.argprops[name] == P_NONE:
    173                         tmp = ("        if self.%s is not None:\n"
    174                                "            nodelist.append(self.%s)")
    175                         print >> buf, tmp % (name, name)
    176                     elif self.argprops[name] == P_NESTED:
    177                         print >> buf, template % ("extend", "flatten_nodes(",
    178                                                   name, ")")
    179                     elif self.argprops[name] == P_NODE:
    180                         print >> buf, template % ("append", "", name, "")
    181                 print >> buf, "        return tuple(nodelist)"
    182 
    183     def _gen_repr(self, buf):
    184         print >> buf, "    def __repr__(self):"
    185         if self.argnames:
    186             fmt = COMMA.join(["%s"] * self.nargs)
    187             if '(' in self.args:
    188                 fmt = '(%s)' % fmt
    189             vals = ["repr(self.%s)" % name for name in self.argnames]
    190             vals = COMMA.join(vals)
    191             if self.nargs == 1:
    192                 vals = vals + ","
    193             print >> buf, '        return "%s(%s)" %% (%s)' % \
    194                   (self.name, fmt, vals)
    195         else:
    196             print >> buf, '        return "%s()"' % self.name
    197 
    198 rx_init = re.compile('init\((.*)\):')
    199 
    200 def parse_spec(file):
    201     classes = {}
    202     cur = None
    203     for line in fileinput.input(file):
    204         if line.strip().startswith('#'):
    205             continue
    206         mo = rx_init.search(line)
    207         if mo is None:
    208             if cur is None:
    209                 # a normal entry

    210                 try:
    211                     name, args = line.split(':')
    212                 except ValueError:
    213                     continue
    214                 classes[name] = NodeInfo(name, args)
    215                 cur = None
    216             else:
    217                 # some code for the __init__ method

    218                 cur.init.append(line)
    219         else:
    220             # some extra code for a Node's __init__ method

    221             name = mo.group(1)
    222             cur = classes[name]
    223     return sorted(classes.values(), key=lambda n: n.name)
    224 
    225 def main():
    226     prologue, epilogue = load_boilerplate(sys.argv[-1])
    227     print prologue
    228     print
    229     classes = parse_spec(SPEC)
    230     for info in classes:
    231         print info.gen_source()
    232     print epilogue
    233 
    234 if __name__ == "__main__":
    235     main()
    236     sys.exit(0)
    237 
    238 ### PROLOGUE

    239 """Python abstract syntax node definitions
    240 
    241 This file is automatically generated by Tools/compiler/astgen.py
    242 """
    243 from consts import CO_VARARGS, CO_VARKEYWORDS
    244 
    245 def flatten(seq):
    246     l = []
    247     for elt in seq:
    248         t = type(elt)
    249         if t is tuple or t is list:
    250             for elt2 in flatten(elt):
    251                 l.append(elt2)
    252         else:
    253             l.append(elt)
    254     return l
    255 
    256 def flatten_nodes(seq):
    257     return [n for n in flatten(seq) if isinstance(n, Node)]
    258 
    259 nodes = {}
    260 
    261 class Node:
    262     """Abstract base class for ast nodes."""
    263     def getChildren(self):
    264         pass # implemented by subclasses

    265     def __iter__(self):
    266         for n in self.getChildren():
    267             yield n
    268     def asList(self): # for backwards compatibility

    269         return self.getChildren()
    270     def getChildNodes(self):
    271         pass # implemented by subclasses

    272 
    273 class EmptyNode(Node):
    274     pass
    275 
    276 class Expression(Node):
    277     # Expression is an artificial node class to support "eval"

    278     nodes["expression"] = "Expression"
    279     def __init__(self, node):
    280         self.node = node
    281 
    282     def getChildren(self):
    283         return self.node,
    284 
    285     def getChildNodes(self):
    286         return self.node,
    287 
    288     def __repr__(self):
    289         return "Expression(%s)" % (repr(self.node))
    290 
    291 ### EPILOGUE

    292 for name, obj in globals().items():
    293     if isinstance(obj, type) and issubclass(obj, Node):
    294         nodes[name.lower()] = obj
    295