Home | History | Annotate | Download | only in Parser
      1 #  Copyright (c) 1998-2002 John Aycock

      2 #

      3 #  Permission is hereby granted, free of charge, to any person obtaining

      4 #  a copy of this software and associated documentation files (the

      5 #  "Software"), to deal in the Software without restriction, including

      6 #  without limitation the rights to use, copy, modify, merge, publish,

      7 #  distribute, sublicense, and/or sell copies of the Software, and to

      8 #  permit persons to whom the Software is furnished to do so, subject to

      9 #  the following conditions:

     10 #

     11 #  The above copyright notice and this permission notice shall be

     12 #  included in all copies or substantial portions of the Software.

     13 #

     14 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

     15 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

     16 #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

     17 #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

     18 #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

     19 #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

     20 #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

     21 
     22 __version__ = 'SPARK-0.7 (pre-alpha-5)'
     23 
     24 import re
     25 import string
     26 
     27 def _namelist(instance):
     28     namelist, namedict, classlist = [], {}, [instance.__class__]
     29     for c in classlist:
     30         for b in c.__bases__:
     31             classlist.append(b)
     32         for name in c.__dict__.keys():
     33             if not namedict.has_key(name):
     34                 namelist.append(name)
     35                 namedict[name] = 1
     36     return namelist
     37 
     38 class GenericScanner:
     39     def __init__(self, flags=0):
     40         pattern = self.reflect()
     41         self.re = re.compile(pattern, re.VERBOSE|flags)
     42 
     43         self.index2func = {}
     44         for name, number in self.re.groupindex.items():
     45             self.index2func[number-1] = getattr(self, 't_' + name)
     46 
     47     def makeRE(self, name):
     48         doc = getattr(self, name).__doc__
     49         rv = '(?P<%s>%s)' % (name[2:], doc)
     50         return rv
     51 
     52     def reflect(self):
     53         rv = []
     54         for name in _namelist(self):
     55             if name[:2] == 't_' and name != 't_default':
     56                 rv.append(self.makeRE(name))
     57 
     58         rv.append(self.makeRE('t_default'))
     59         return string.join(rv, '|')
     60 
     61     def error(self, s, pos):
     62         print "Lexical error at position %s" % pos
     63         raise SystemExit
     64 
     65     def tokenize(self, s):
     66         pos = 0
     67         n = len(s)
     68         while pos < n:
     69             m = self.re.match(s, pos)
     70             if m is None:
     71                 self.error(s, pos)
     72 
     73             groups = m.groups()
     74             for i in range(len(groups)):
     75                 if groups[i] and self.index2func.has_key(i):
     76                     self.index2func[i](groups[i])
     77             pos = m.end()
     78 
     79     def t_default(self, s):
     80         r'( . | \n )+'
     81         print "Specification error: unmatched input"
     82         raise SystemExit
     83 
     84 #

     85 #  Extracted from GenericParser and made global so that [un]picking works.

     86 #

     87 class _State:
     88     def __init__(self, stateno, items):
     89         self.T, self.complete, self.items = [], [], items
     90         self.stateno = stateno
     91 
     92 class GenericParser:
     93     #

     94     #  An Earley parser, as per J. Earley, "An Efficient Context-Free

     95     #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley,

     96     #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,

     97     #  Carnegie-Mellon University, August 1968.  New formulation of

     98     #  the parser according to J. Aycock, "Practical Earley Parsing

     99     #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria,

    100     #  2001, and J. Aycock and R. N. Horspool, "Practical Earley

    101     #  Parsing", unpublished paper, 2001.

    102     #

    103 
    104     def __init__(self, start):
    105         self.rules = {}
    106         self.rule2func = {}
    107         self.rule2name = {}
    108         self.collectRules()
    109         self.augment(start)
    110         self.ruleschanged = 1
    111 
    112     _NULLABLE = '\e_'
    113     _START = 'START'
    114     _BOF = '|-'
    115 
    116     #

    117     #  When pickling, take the time to generate the full state machine;

    118     #  some information is then extraneous, too.  Unfortunately we

    119     #  can't save the rule2func map.

    120     #

    121     def __getstate__(self):
    122         if self.ruleschanged:
    123             #

    124             #  XXX - duplicated from parse()

    125             #

    126             self.computeNull()
    127             self.newrules = {}
    128             self.new2old = {}
    129             self.makeNewRules()
    130             self.ruleschanged = 0
    131             self.edges, self.cores = {}, {}
    132             self.states = { 0: self.makeState0() }
    133             self.makeState(0, self._BOF)
    134         #

    135         #  XXX - should find a better way to do this..

    136         #

    137         changes = 1
    138         while changes:
    139             changes = 0
    140             for k, v in self.edges.items():
    141                 if v is None:
    142                     state, sym = k
    143                     if self.states.has_key(state):
    144                         self.goto(state, sym)
    145                         changes = 1
    146         rv = self.__dict__.copy()
    147         for s in self.states.values():
    148             del s.items
    149         del rv['rule2func']
    150         del rv['nullable']
    151         del rv['cores']
    152         return rv
    153 
    154     def __setstate__(self, D):
    155         self.rules = {}
    156         self.rule2func = {}
    157         self.rule2name = {}
    158         self.collectRules()
    159         start = D['rules'][self._START][0][1][1]        # Blech.

    160         self.augment(start)
    161         D['rule2func'] = self.rule2func
    162         D['makeSet'] = self.makeSet_fast
    163         self.__dict__ = D
    164 
    165     #

    166     #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess

    167     #  thee not with this; nor shall thee toucheth the _preprocess

    168     #  argument to addRule.

    169     #

    170     def preprocess(self, rule, func):       return rule, func
    171 
    172     def addRule(self, doc, func, _preprocess=1):
    173         fn = func
    174         rules = string.split(doc)
    175 
    176         index = []
    177         for i in range(len(rules)):
    178             if rules[i] == '::=':
    179                 index.append(i-1)
    180         index.append(len(rules))
    181 
    182         for i in range(len(index)-1):
    183             lhs = rules[index[i]]
    184             rhs = rules[index[i]+2:index[i+1]]
    185             rule = (lhs, tuple(rhs))
    186 
    187             if _preprocess:
    188                 rule, fn = self.preprocess(rule, func)
    189 
    190             if self.rules.has_key(lhs):
    191                 self.rules[lhs].append(rule)
    192             else:
    193                 self.rules[lhs] = [ rule ]
    194             self.rule2func[rule] = fn
    195             self.rule2name[rule] = func.__name__[2:]
    196         self.ruleschanged = 1
    197 
    198     def collectRules(self):
    199         for name in _namelist(self):
    200             if name[:2] == 'p_':
    201                 func = getattr(self, name)
    202                 doc = func.__doc__
    203                 self.addRule(doc, func)
    204 
    205     def augment(self, start):
    206         rule = '%s ::= %s %s' % (self._START, self._BOF, start)
    207         self.addRule(rule, lambda args: args[1], 0)
    208 
    209     def computeNull(self):
    210         self.nullable = {}
    211         tbd = []
    212 
    213         for rulelist in self.rules.values():
    214             lhs = rulelist[0][0]
    215             self.nullable[lhs] = 0
    216             for rule in rulelist:
    217                 rhs = rule[1]
    218                 if len(rhs) == 0:
    219                     self.nullable[lhs] = 1
    220                     continue
    221                 #

    222                 #  We only need to consider rules which

    223                 #  consist entirely of nonterminal symbols.

    224                 #  This should be a savings on typical

    225                 #  grammars.

    226                 #

    227                 for sym in rhs:
    228                     if not self.rules.has_key(sym):
    229                         break
    230                 else:
    231                     tbd.append(rule)
    232         changes = 1
    233         while changes:
    234             changes = 0
    235             for lhs, rhs in tbd:
    236                 if self.nullable[lhs]:
    237                     continue
    238                 for sym in rhs:
    239                     if not self.nullable[sym]:
    240                         break
    241                 else:
    242                     self.nullable[lhs] = 1
    243                     changes = 1
    244 
    245     def makeState0(self):
    246         s0 = _State(0, [])
    247         for rule in self.newrules[self._START]:
    248             s0.items.append((rule, 0))
    249         return s0
    250 
    251     def finalState(self, tokens):
    252         #

    253         #  Yuck.

    254         #

    255         if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
    256             return 1
    257         start = self.rules[self._START][0][1][1]
    258         return self.goto(1, start)
    259 
    260     def makeNewRules(self):
    261         worklist = []
    262         for rulelist in self.rules.values():
    263             for rule in rulelist:
    264                 worklist.append((rule, 0, 1, rule))
    265 
    266         for rule, i, candidate, oldrule in worklist:
    267             lhs, rhs = rule
    268             n = len(rhs)
    269             while i < n:
    270                 sym = rhs[i]
    271                 if not self.rules.has_key(sym) or \
    272                    not self.nullable[sym]:
    273                     candidate = 0
    274                     i = i + 1
    275                     continue
    276 
    277                 newrhs = list(rhs)
    278                 newrhs[i] = self._NULLABLE+sym
    279                 newrule = (lhs, tuple(newrhs))
    280                 worklist.append((newrule, i+1,
    281                                  candidate, oldrule))
    282                 candidate = 0
    283                 i = i + 1
    284             else:
    285                 if candidate:
    286                     lhs = self._NULLABLE+lhs
    287                     rule = (lhs, rhs)
    288                 if self.newrules.has_key(lhs):
    289                     self.newrules[lhs].append(rule)
    290                 else:
    291                     self.newrules[lhs] = [ rule ]
    292                 self.new2old[rule] = oldrule
    293 
    294     def typestring(self, token):
    295         return None
    296 
    297     def error(self, token):
    298         print "Syntax error at or near `%s' token" % token
    299         raise SystemExit
    300 
    301     def parse(self, tokens):
    302         sets = [ [(1,0), (2,0)] ]
    303         self.links = {}
    304 
    305         if self.ruleschanged:
    306             self.computeNull()
    307             self.newrules = {}
    308             self.new2old = {}
    309             self.makeNewRules()
    310             self.ruleschanged = 0
    311             self.edges, self.cores = {}, {}
    312             self.states = { 0: self.makeState0() }
    313             self.makeState(0, self._BOF)
    314 
    315         for i in xrange(len(tokens)):
    316             sets.append([])
    317 
    318             if sets[i] == []:
    319                 break
    320             self.makeSet(tokens[i], sets, i)
    321         else:
    322             sets.append([])
    323             self.makeSet(None, sets, len(tokens))
    324 
    325         #_dump(tokens, sets, self.states)
    326 
    327         finalitem = (self.finalState(tokens), 0)
    328         if finalitem not in sets[-2]:
    329             if len(tokens) > 0:
    330                 self.error(tokens[i-1])
    331             else:
    332                 self.error(None)
    333 
    334         return self.buildTree(self._START, finalitem,
    335                               tokens, len(sets)-2)
    336 
    337     def isnullable(self, sym):
    338         #
    339         #  For symbols in G_e only.  If we weren't supporting 1.5,
    340         #  could just use sym.startswith().

    341         #

    342         return self._NULLABLE == sym[0:len(self._NULLABLE)]
    343 
    344     def skip(self, (lhs, rhs), pos=0):
    345         n = len(rhs)
    346         while pos < n:
    347             if not self.isnullable(rhs[pos]):
    348                 break
    349             pos = pos + 1
    350         return pos
    351 
    352     def makeState(self, state, sym):
    353         assert sym is not None
    354         #

    355         #  Compute \epsilon-kernel state's core and see if

    356         #  it exists already.

    357         #

    358         kitems = []
    359         for rule, pos in self.states[state].items:
    360             lhs, rhs = rule
    361             if rhs[pos:pos+1] == (sym,):
    362                 kitems.append((rule, self.skip(rule, pos+1)))
    363         core = kitems
    364 
    365         core.sort()
    366         tcore = tuple(core)
    367         if self.cores.has_key(tcore):
    368             return self.cores[tcore]
    369         #

    370         #  Nope, doesn't exist.  Compute it and the associated

    371         #  \epsilon-nonkernel state together; we'll need it right away.

    372         #

    373         k = self.cores[tcore] = len(self.states)
    374         K, NK = _State(k, kitems), _State(k+1, [])
    375         self.states[k] = K
    376         predicted = {}
    377 
    378         edges = self.edges
    379         rules = self.newrules
    380         for X in K, NK:
    381             worklist = X.items
    382             for item in worklist:
    383                 rule, pos = item
    384                 lhs, rhs = rule
    385                 if pos == len(rhs):
    386                     X.complete.append(rule)
    387                     continue
    388 
    389                 nextSym = rhs[pos]
    390                 key = (X.stateno, nextSym)
    391                 if not rules.has_key(nextSym):
    392                     if not edges.has_key(key):
    393                         edges[key] = None
    394                         X.T.append(nextSym)
    395                 else:
    396                     edges[key] = None
    397                     if not predicted.has_key(nextSym):
    398                         predicted[nextSym] = 1
    399                         for prule in rules[nextSym]:
    400                             ppos = self.skip(prule)
    401                             new = (prule, ppos)
    402                             NK.items.append(new)
    403             #

    404             #  Problem: we know K needs generating, but we

    405             #  don't yet know about NK.  Can't commit anything

    406             #  regarding NK to self.edges until we're sure.  Should

    407             #  we delay committing on both K and NK to avoid this

    408             #  hacky code?  This creates other problems..

    409             #

    410             if X is K:
    411                 edges = {}
    412 
    413         if NK.items == []:
    414             return k
    415 
    416         #

    417         #  Check for \epsilon-nonkernel's core.  Unfortunately we

    418         #  need to know the entire set of predicted nonterminals

    419         #  to do this without accidentally duplicating states.

    420         #

    421         core = predicted.keys()
    422         core.sort()
    423         tcore = tuple(core)
    424         if self.cores.has_key(tcore):
    425             self.edges[(k, None)] = self.cores[tcore]
    426             return k
    427 
    428         nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
    429         self.edges.update(edges)
    430         self.states[nk] = NK
    431         return k
    432 
    433     def goto(self, state, sym):
    434         key = (state, sym)
    435         if not self.edges.has_key(key):
    436             #

    437             #  No transitions from state on sym.

    438             #

    439             return None
    440 
    441         rv = self.edges[key]
    442         if rv is None:
    443             #

    444             #  Target state isn't generated yet.  Remedy this.

    445             #

    446             rv = self.makeState(state, sym)
    447             self.edges[key] = rv
    448         return rv
    449 
    450     def gotoT(self, state, t):
    451         return [self.goto(state, t)]
    452 
    453     def gotoST(self, state, st):
    454         rv = []
    455         for t in self.states[state].T:
    456             if st == t:
    457                 rv.append(self.goto(state, t))
    458         return rv
    459 
    460     def add(self, set, item, i=None, predecessor=None, causal=None):
    461         if predecessor is None:
    462             if item not in set:
    463                 set.append(item)
    464         else:
    465             key = (item, i)
    466             if item not in set:
    467                 self.links[key] = []
    468                 set.append(item)
    469             self.links[key].append((predecessor, causal))
    470 
    471     def makeSet(self, token, sets, i):
    472         cur, next = sets[i], sets[i+1]
    473 
    474         ttype = token is not None and self.typestring(token) or None
    475         if ttype is not None:
    476             fn, arg = self.gotoT, ttype
    477         else:
    478             fn, arg = self.gotoST, token
    479 
    480         for item in cur:
    481             ptr = (item, i)
    482             state, parent = item
    483             add = fn(state, arg)
    484             for k in add:
    485                 if k is not None:
    486                     self.add(next, (k, parent), i+1, ptr)
    487                     nk = self.goto(k, None)
    488                     if nk is not None:
    489                         self.add(next, (nk, i+1))
    490 
    491             if parent == i:
    492                 continue
    493 
    494             for rule in self.states[state].complete:
    495                 lhs, rhs = rule
    496                 for pitem in sets[parent]:
    497                     pstate, pparent = pitem
    498                     k = self.goto(pstate, lhs)
    499                     if k is not None:
    500                         why = (item, i, rule)
    501                         pptr = (pitem, parent)
    502                         self.add(cur, (k, pparent),
    503                                  i, pptr, why)
    504                         nk = self.goto(k, None)
    505                         if nk is not None:
    506                             self.add(cur, (nk, i))
    507 
    508     def makeSet_fast(self, token, sets, i):
    509         #

    510         #  Call *only* when the entire state machine has been built!

    511         #  It relies on self.edges being filled in completely, and

    512         #  then duplicates and inlines code to boost speed at the

    513         #  cost of extreme ugliness.

    514         #

    515         cur, next = sets[i], sets[i+1]
    516         ttype = token is not None and self.typestring(token) or None
    517 
    518         for item in cur:
    519             ptr = (item, i)
    520             state, parent = item
    521             if ttype is not None:
    522                 k = self.edges.get((state, ttype), None)
    523                 if k is not None:
    524                     #self.add(next, (k, parent), i+1, ptr)

    525                     #INLINED --v

    526                     new = (k, parent)
    527                     key = (new, i+1)
    528                     if new not in next:
    529                         self.links[key] = []
    530                         next.append(new)
    531                     self.links[key].append((ptr, None))
    532                     #INLINED --^

    533                     #nk = self.goto(k, None)

    534                     nk = self.edges.get((k, None), None)
    535                     if nk is not None:
    536                         #self.add(next, (nk, i+1))

    537                         #INLINED --v

    538                         new = (nk, i+1)
    539                         if new not in next:
    540                             next.append(new)
    541                         #INLINED --^

    542             else:
    543                 add = self.gotoST(state, token)
    544                 for k in add:
    545                     if k is not None:
    546                         self.add(next, (k, parent), i+1, ptr)
    547                         #nk = self.goto(k, None)

    548                         nk = self.edges.get((k, None), None)
    549                         if nk is not None:
    550                             self.add(next, (nk, i+1))
    551 
    552             if parent == i:
    553                 continue
    554 
    555             for rule in self.states[state].complete:
    556                 lhs, rhs = rule
    557                 for pitem in sets[parent]:
    558                     pstate, pparent = pitem
    559                     #k = self.goto(pstate, lhs)

    560                     k = self.edges.get((pstate, lhs), None)
    561                     if k is not None:
    562                         why = (item, i, rule)
    563                         pptr = (pitem, parent)
    564                         #self.add(cur, (k, pparent),

    565                         #        i, pptr, why)

    566                         #INLINED --v

    567                         new = (k, pparent)
    568                         key = (new, i)
    569                         if new not in cur:
    570                             self.links[key] = []
    571                             cur.append(new)
    572                         self.links[key].append((pptr, why))
    573                         #INLINED --^

    574                         #nk = self.goto(k, None)

    575                         nk = self.edges.get((k, None), None)
    576                         if nk is not None:
    577                             #self.add(cur, (nk, i))

    578                             #INLINED --v

    579                             new = (nk, i)
    580                             if new not in cur:
    581                                 cur.append(new)
    582                             #INLINED --^

    583 
    584     def predecessor(self, key, causal):
    585         for p, c in self.links[key]:
    586             if c == causal:
    587                 return p
    588         assert 0
    589 
    590     def causal(self, key):
    591         links = self.links[key]
    592         if len(links) == 1:
    593             return links[0][1]
    594         choices = []
    595         rule2cause = {}
    596         for p, c in links:
    597             rule = c[2]
    598             choices.append(rule)
    599             rule2cause[rule] = c
    600         return rule2cause[self.ambiguity(choices)]
    601 
    602     def deriveEpsilon(self, nt):
    603         if len(self.newrules[nt]) > 1:
    604             rule = self.ambiguity(self.newrules[nt])
    605         else:
    606             rule = self.newrules[nt][0]
    607         #print rule

    608 
    609         rhs = rule[1]
    610         attr = [None] * len(rhs)
    611 
    612         for i in range(len(rhs)-1, -1, -1):
    613             attr[i] = self.deriveEpsilon(rhs[i])
    614         return self.rule2func[self.new2old[rule]](attr)
    615 
    616     def buildTree(self, nt, item, tokens, k):
    617         state, parent = item
    618 
    619         choices = []
    620         for rule in self.states[state].complete:
    621             if rule[0] == nt:
    622                 choices.append(rule)
    623         rule = choices[0]
    624         if len(choices) > 1:
    625             rule = self.ambiguity(choices)
    626         #print rule

    627 
    628         rhs = rule[1]
    629         attr = [None] * len(rhs)
    630 
    631         for i in range(len(rhs)-1, -1, -1):
    632             sym = rhs[i]
    633             if not self.newrules.has_key(sym):
    634                 if sym != self._BOF:
    635                     attr[i] = tokens[k-1]
    636                     key = (item, k)
    637                     item, k = self.predecessor(key, None)
    638             #elif self.isnullable(sym):

    639             elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
    640                 attr[i] = self.deriveEpsilon(sym)
    641             else:
    642                 key = (item, k)
    643                 why = self.causal(key)
    644                 attr[i] = self.buildTree(sym, why[0],
    645                                          tokens, why[1])
    646                 item, k = self.predecessor(key, why)
    647         return self.rule2func[self.new2old[rule]](attr)
    648 
    649     def ambiguity(self, rules):
    650         #

    651         #  XXX - problem here and in collectRules() if the same rule

    652         #        appears in >1 method.  Also undefined results if rules

    653         #        causing the ambiguity appear in the same method.

    654         #

    655         sortlist = []
    656         name2index = {}
    657         for i in range(len(rules)):
    658             lhs, rhs = rule = rules[i]
    659             name = self.rule2name[self.new2old[rule]]
    660             sortlist.append((len(rhs), name))
    661             name2index[name] = i
    662         sortlist.sort()
    663         list = map(lambda (a,b): b, sortlist)
    664         return rules[name2index[self.resolve(list)]]
    665 
    666     def resolve(self, list):
    667         #

    668         #  Resolve ambiguity in favor of the shortest RHS.

    669         #  Since we walk the tree from the top down, this

    670         #  should effectively resolve in favor of a "shift".

    671         #

    672         return list[0]
    673 
    674 #

    675 #  GenericASTBuilder automagically constructs a concrete/abstract syntax tree

    676 #  for a given input.  The extra argument is a class (not an instance!)

    677 #  which supports the "__setslice__" and "__len__" methods.

    678 #

    679 #  XXX - silently overrides any user code in methods.

    680 #

    681 
    682 class GenericASTBuilder(GenericParser):
    683     def __init__(self, AST, start):
    684         GenericParser.__init__(self, start)
    685         self.AST = AST
    686 
    687     def preprocess(self, rule, func):
    688         rebind = lambda lhs, self=self: \
    689                         lambda args, lhs=lhs, self=self: \
    690                                 self.buildASTNode(args, lhs)
    691         lhs, rhs = rule
    692         return rule, rebind(lhs)
    693 
    694     def buildASTNode(self, args, lhs):
    695         children = []
    696         for arg in args:
    697             if isinstance(arg, self.AST):
    698                 children.append(arg)
    699             else:
    700                 children.append(self.terminal(arg))
    701         return self.nonterminal(lhs, children)
    702 
    703     def terminal(self, token):      return token
    704 
    705     def nonterminal(self, type, args):
    706         rv = self.AST(type)
    707         rv[:len(args)] = args
    708         return rv
    709 
    710 #

    711 #  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For

    712 #  each node it attempts to invoke the method n_<node type>, falling

    713 #  back onto the default() method if the n_* can't be found.  The preorder

    714 #  traversal also looks for an exit hook named n_<node type>_exit (no default

    715 #  routine is called if it's not found).  To prematurely halt traversal

    716 #  of a subtree, call the prune() method -- this only makes sense for a

    717 #  preorder traversal.  Node type is determined via the typestring() method.

    718 #

    719 
    720 class GenericASTTraversalPruningException:
    721     pass
    722 
    723 class GenericASTTraversal:
    724     def __init__(self, ast):
    725         self.ast = ast
    726 
    727     def typestring(self, node):
    728         return node.type
    729 
    730     def prune(self):
    731         raise GenericASTTraversalPruningException
    732 
    733     def preorder(self, node=None):
    734         if node is None:
    735             node = self.ast
    736 
    737         try:
    738             name = 'n_' + self.typestring(node)
    739             if hasattr(self, name):
    740                 func = getattr(self, name)
    741                 func(node)
    742             else:
    743                 self.default(node)
    744         except GenericASTTraversalPruningException:
    745             return
    746 
    747         for kid in node:
    748             self.preorder(kid)
    749 
    750         name = name + '_exit'
    751         if hasattr(self, name):
    752             func = getattr(self, name)
    753             func(node)
    754 
    755     def postorder(self, node=None):
    756         if node is None:
    757             node = self.ast
    758 
    759         for kid in node:
    760             self.postorder(kid)
    761 
    762         name = 'n_' + self.typestring(node)
    763         if hasattr(self, name):
    764             func = getattr(self, name)
    765             func(node)
    766         else:
    767             self.default(node)
    768 
    769 
    770     def default(self, node):
    771         pass
    772 
    773 #

    774 #  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__"

    775 #  implemented.

    776 #

    777 #  XXX - makes assumptions about how GenericParser walks the parse tree.

    778 #

    779 
    780 class GenericASTMatcher(GenericParser):
    781     def __init__(self, start, ast):
    782         GenericParser.__init__(self, start)
    783         self.ast = ast
    784 
    785     def preprocess(self, rule, func):
    786         rebind = lambda func, self=self: \
    787                         lambda args, func=func, self=self: \
    788                                 self.foundMatch(args, func)
    789         lhs, rhs = rule
    790         rhslist = list(rhs)
    791         rhslist.reverse()
    792 
    793         return (lhs, tuple(rhslist)), rebind(func)
    794 
    795     def foundMatch(self, args, func):
    796         func(args[-1])
    797         return args[-1]
    798 
    799     def match_r(self, node):
    800         self.input.insert(0, node)
    801         children = 0
    802 
    803         for child in node:
    804             if children == 0:
    805                 self.input.insert(0, '(')
    806             children = children + 1
    807             self.match_r(child)
    808 
    809         if children > 0:
    810             self.input.insert(0, ')')
    811 
    812     def match(self, ast=None):
    813         if ast is None:
    814             ast = self.ast
    815         self.input = []
    816 
    817         self.match_r(ast)
    818         self.parse(self.input)
    819 
    820     def resolve(self, list):
    821         #

    822         #  Resolve ambiguity in favor of the longest RHS.

    823         #

    824         return list[-1]
    825 
    826 def _dump(tokens, sets, states):
    827     for i in range(len(sets)):
    828         print 'set', i
    829         for item in sets[i]:
    830             print '\t', item
    831             for (lhs, rhs), pos in states[item[0]].items:
    832                 print '\t\t', lhs, '::=',
    833                 print string.join(rhs[:pos]),
    834                 print '.',
    835                 print string.join(rhs[pos:])
    836         if i < len(tokens):
    837             print
    838             print 'token', str(tokens[i])
    839             print
    840