Home | History | Annotate | Download | only in ply
      1 # -----------------------------------------------------------------------------
      2 # ply: yacc.py
      3 #
      4 # Copyright (C) 2001-2011,
      5 # David M. Beazley (Dabeaz LLC)
      6 # All rights reserved.
      7 #
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are
     10 # met:
     11 # 
     12 # * Redistributions of source code must retain the above copyright notice,
     13 #   this list of conditions and the following disclaimer.  
     14 # * Redistributions in binary form must reproduce the above copyright notice, 
     15 #   this list of conditions and the following disclaimer in the documentation
     16 #   and/or other materials provided with the distribution.  
     17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
     18 #   endorse or promote products derived from this software without
     19 #  specific prior written permission. 
     20 #
     21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 # -----------------------------------------------------------------------------
     33 #
     34 # This implements an LR parser that is constructed from grammar rules defined
     35 # as Python functions. The grammer is specified by supplying the BNF inside
     36 # Python documentation strings.  The inspiration for this technique was borrowed
     37 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
     38 # Spark and the GNU bison utility.
     39 #
     40 # The current implementation is only somewhat object-oriented. The
     41 # LR parser itself is defined in terms of an object (which allows multiple
     42 # parsers to co-exist).  However, most of the variables used during table
     43 # construction are defined in terms of global variables.  Users shouldn't
     44 # notice unless they are trying to define multiple parsers at the same
     45 # time using threads (in which case they should have their head examined).
     46 #
     47 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
     48 # support was originally implemented by Elias Ioup (ezioup (at] alumni.uchicago.edu),
     49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
     50 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
     51 # by the more efficient DeRemer and Pennello algorithm.
     52 #
     53 # :::::::: WARNING :::::::
     54 #
     55 # Construction of LR parsing tables is fairly complicated and expensive.
     56 # To make this module run fast, a *LOT* of work has been put into
     57 # optimization---often at the expensive of readability and what might
     58 # consider to be good Python "coding style."   Modify the code at your
     59 # own risk!
     60 # ----------------------------------------------------------------------------
     61 
     62 __version__    = "3.4"
     63 __tabversion__ = "3.2"       # Table version
     64 
     65 #-----------------------------------------------------------------------------
     66 #                     === User configurable parameters ===
     67 #
     68 # Change these to modify the default behavior of yacc (if you wish)
     69 #-----------------------------------------------------------------------------
     70 
     71 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
     72                                # a 'parser.out' file in the current directory
     73 
     74 debug_file  = 'parser.out'     # Default name of the debugging file
     75 tab_module  = 'parsetab'       # Default name of the table module
     76 default_lr  = 'LALR'           # Default LR table generation method
     77 
     78 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
     79 
     80 yaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized
     81                                # implementations of certain functions.
     82 
     83 resultlimit = 40               # Size limit of results when running in debug mode.
     84 
     85 pickle_protocol = 0            # Protocol to use when writing pickle files
     86 
     87 import re, types, sys, os.path
     88 
     89 # Compatibility function for python 2.6/3.0
     90 if sys.version_info[0] < 3:
     91     def func_code(f):
     92         return f.func_code
     93 else:
     94     def func_code(f):
     95         return f.__code__
     96 
     97 # Compatibility
     98 try:
     99     MAXINT = sys.maxint
    100 except AttributeError:
    101     MAXINT = sys.maxsize
    102 
    103 # Python 2.x/3.0 compatibility.
    104 def load_ply_lex():
    105     if sys.version_info[0] < 3:
    106         import lex
    107     else:
    108         import ply.lex as lex
    109     return lex
    110 
    111 # This object is a stand-in for a logging object created by the 
    112 # logging module.   PLY will use this by default to create things
    113 # such as the parser.out file.  If a user wants more detailed
    114 # information, they can create their own logging object and pass
    115 # it into PLY.
    116 
    117 class PlyLogger(object):
    118     def __init__(self,f):
    119         self.f = f
    120     def debug(self,msg,*args,**kwargs):
    121         self.f.write((msg % args) + "\n")
    122     info     = debug
    123 
    124     def warning(self,msg,*args,**kwargs):
    125         self.f.write("WARNING: "+ (msg % args) + "\n")
    126 
    127     def error(self,msg,*args,**kwargs):
    128         self.f.write("ERROR: " + (msg % args) + "\n")
    129 
    130     critical = debug
    131 
    132 # Null logger is used when no output is generated. Does nothing.
    133 class NullLogger(object):
    134     def __getattribute__(self,name):
    135         return self
    136     def __call__(self,*args,**kwargs):
    137         return self
    138         
    139 # Exception raised for yacc-related errors
    140 class YaccError(Exception):   pass
    141 
    142 # Format the result message that the parser produces when running in debug mode.
    143 def format_result(r):
    144     repr_str = repr(r)
    145     if '\n' in repr_str: repr_str = repr(repr_str)
    146     if len(repr_str) > resultlimit:
    147         repr_str = repr_str[:resultlimit]+" ..."
    148     result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
    149     return result
    150 
    151 
    152 # Format stack entries when the parser is running in debug mode
    153 def format_stack_entry(r):
    154     repr_str = repr(r)
    155     if '\n' in repr_str: repr_str = repr(repr_str)
    156     if len(repr_str) < 16:
    157         return repr_str
    158     else:
    159         return "<%s @ 0x%x>" % (type(r).__name__,id(r))
    160 
    161 #-----------------------------------------------------------------------------
    162 #                        ===  LR Parsing Engine ===
    163 #
    164 # The following classes are used for the LR parser itself.  These are not
    165 # used during table construction and are independent of the actual LR
    166 # table generation algorithm
    167 #-----------------------------------------------------------------------------
    168 
    169 # This class is used to hold non-terminal grammar symbols during parsing.
    170 # It normally has the following attributes set:
    171 #        .type       = Grammar symbol type
    172 #        .value      = Symbol value
    173 #        .lineno     = Starting line number
    174 #        .endlineno  = Ending line number (optional, set automatically)
    175 #        .lexpos     = Starting lex position
    176 #        .endlexpos  = Ending lex position (optional, set automatically)
    177 
    178 class YaccSymbol:
    179     def __str__(self):    return self.type
    180     def __repr__(self):   return str(self)
    181 
    182 # This class is a wrapper around the objects actually passed to each
    183 # grammar rule.   Index lookup and assignment actually assign the
    184 # .value attribute of the underlying YaccSymbol object.
    185 # The lineno() method returns the line number of a given
    186 # item (or 0 if not defined).   The linespan() method returns
    187 # a tuple of (startline,endline) representing the range of lines
    188 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
    189 # representing the range of positional information for a symbol.
    190 
    191 class YaccProduction:
    192     def __init__(self,s,stack=None):
    193         self.slice = s
    194         self.stack = stack
    195         self.lexer = None
    196         self.parser= None
    197     def __getitem__(self,n):
    198         if n >= 0: return self.slice[n].value
    199         else: return self.stack[n].value
    200 
    201     def __setitem__(self,n,v):
    202         self.slice[n].value = v
    203 
    204     def __getslice__(self,i,j):
    205         return [s.value for s in self.slice[i:j]]
    206 
    207     def __len__(self):
    208         return len(self.slice)
    209 
    210     def lineno(self,n):
    211         return getattr(self.slice[n],"lineno",0)
    212 
    213     def set_lineno(self,n,lineno):
    214         self.slice[n].lineno = lineno
    215 
    216     def linespan(self,n):
    217         startline = getattr(self.slice[n],"lineno",0)
    218         endline = getattr(self.slice[n],"endlineno",startline)
    219         return startline,endline
    220 
    221     def lexpos(self,n):
    222         return getattr(self.slice[n],"lexpos",0)
    223 
    224     def lexspan(self,n):
    225         startpos = getattr(self.slice[n],"lexpos",0)
    226         endpos = getattr(self.slice[n],"endlexpos",startpos)
    227         return startpos,endpos
    228 
    229     def error(self):
    230        raise SyntaxError
    231 
    232 
    233 # -----------------------------------------------------------------------------
    234 #                               == LRParser ==
    235 #
    236 # The LR Parsing engine.
    237 # -----------------------------------------------------------------------------
    238 
    239 class LRParser:
    240     def __init__(self,lrtab,errorf):
    241         self.productions = lrtab.lr_productions
    242         self.action      = lrtab.lr_action
    243         self.goto        = lrtab.lr_goto
    244         self.errorfunc   = errorf
    245 
    246     def errok(self):
    247         self.errorok     = 1
    248 
    249     def restart(self):
    250         del self.statestack[:]
    251         del self.symstack[:]
    252         sym = YaccSymbol()
    253         sym.type = '$end'
    254         self.symstack.append(sym)
    255         self.statestack.append(0)
    256 
    257     def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
    258         if debug or yaccdevel:
    259             if isinstance(debug,int):
    260                 debug = PlyLogger(sys.stderr)
    261             return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
    262         elif tracking:
    263             return self.parseopt(input,lexer,debug,tracking,tokenfunc)
    264         else:
    265             return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
    266         
    267 
    268     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    269     # parsedebug().
    270     #
    271     # This is the debugging enabled version of parse().  All changes made to the
    272     # parsing engine should be made here.   For the non-debugging version,
    273     # copy this code to a method parseopt() and delete all of the sections
    274     # enclosed in:
    275     #
    276     #      #--! DEBUG
    277     #      statements
    278     #      #--! DEBUG
    279     #
    280     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    281 
    282     def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
    283         lookahead = None                 # Current lookahead symbol
    284         lookaheadstack = [ ]             # Stack of lookahead symbols
    285         actions = self.action            # Local reference to action table (to avoid lookup on self.)
    286         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
    287         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
    288         pslice  = YaccProduction(None)   # Production object passed to grammar rules
    289         errorcount = 0                   # Used during error recovery 
    290 
    291         # --! DEBUG
    292         debug.info("PLY: PARSE DEBUG START")
    293         # --! DEBUG
    294 
    295         # If no lexer was given, we will try to use the lex module
    296         if not lexer:
    297             lex = load_ply_lex()
    298             lexer = lex.lexer
    299 
    300         # Set up the lexer and parser objects on pslice
    301         pslice.lexer = lexer
    302         pslice.parser = self
    303 
    304         # If input was supplied, pass to lexer
    305         if input is not None:
    306             lexer.input(input)
    307 
    308         if tokenfunc is None:
    309            # Tokenize function
    310            get_token = lexer.token
    311         else:
    312            get_token = tokenfunc
    313 
    314         # Set up the state and symbol stacks
    315 
    316         statestack = [ ]                # Stack of parsing states
    317         self.statestack = statestack
    318         symstack   = [ ]                # Stack of grammar symbols
    319         self.symstack = symstack
    320 
    321         pslice.stack = symstack         # Put in the production
    322         errtoken   = None               # Err token
    323 
    324         # The start state is assumed to be (0,$end)
    325 
    326         statestack.append(0)
    327         sym = YaccSymbol()
    328         sym.type = "$end"
    329         symstack.append(sym)
    330         state = 0
    331         while 1:
    332             # Get the next symbol on the input.  If a lookahead symbol
    333             # is already set, we just use that. Otherwise, we'll pull
    334             # the next token off of the lookaheadstack or from the lexer
    335 
    336             # --! DEBUG
    337             debug.debug('')
    338             debug.debug('State  : %s', state)
    339             # --! DEBUG
    340 
    341             if not lookahead:
    342                 if not lookaheadstack:
    343                     lookahead = get_token()     # Get the next token
    344                 else:
    345                     lookahead = lookaheadstack.pop()
    346                 if not lookahead:
    347                     lookahead = YaccSymbol()
    348                     lookahead.type = "$end"
    349 
    350             # --! DEBUG
    351             debug.debug('Stack  : %s',
    352                         ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
    353             # --! DEBUG
    354 
    355             # Check the action table
    356             ltype = lookahead.type
    357             t = actions[state].get(ltype)
    358 
    359             if t is not None:
    360                 if t > 0:
    361                     # shift a symbol on the stack
    362                     statestack.append(t)
    363                     state = t
    364                     
    365                     # --! DEBUG
    366                     debug.debug("Action : Shift and goto state %s", t)
    367                     # --! DEBUG
    368 
    369                     symstack.append(lookahead)
    370                     lookahead = None
    371 
    372                     # Decrease error count on successful shift
    373                     if errorcount: errorcount -=1
    374                     continue
    375 
    376                 if t < 0:
    377                     # reduce a symbol on the stack, emit a production
    378                     p = prod[-t]
    379                     pname = p.name
    380                     plen  = p.len
    381 
    382                     # Get production function
    383                     sym = YaccSymbol()
    384                     sym.type = pname       # Production name
    385                     sym.value = None
    386 
    387                     # --! DEBUG
    388                     if plen:
    389                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
    390                     else:
    391                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
    392                         
    393                     # --! DEBUG
    394 
    395                     if plen:
    396                         targ = symstack[-plen-1:]
    397                         targ[0] = sym
    398 
    399                         # --! TRACKING
    400                         if tracking:
    401                            t1 = targ[1]
    402                            sym.lineno = t1.lineno
    403                            sym.lexpos = t1.lexpos
    404                            t1 = targ[-1]
    405                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
    406                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
    407 
    408                         # --! TRACKING
    409 
    410                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    411                         # The code enclosed in this section is duplicated 
    412                         # below as a performance optimization.  Make sure
    413                         # changes get made in both locations.
    414 
    415                         pslice.slice = targ
    416                         
    417                         try:
    418                             # Call the grammar rule with our special slice object
    419                             del symstack[-plen:]
    420                             del statestack[-plen:]
    421                             p.callable(pslice)
    422                             # --! DEBUG
    423                             debug.info("Result : %s", format_result(pslice[0]))
    424                             # --! DEBUG
    425                             symstack.append(sym)
    426                             state = goto[statestack[-1]][pname]
    427                             statestack.append(state)
    428                         except SyntaxError:
    429                             # If an error was set. Enter error recovery state
    430                             lookaheadstack.append(lookahead)
    431                             symstack.pop()
    432                             statestack.pop()
    433                             state = statestack[-1]
    434                             sym.type = 'error'
    435                             lookahead = sym
    436                             errorcount = error_count
    437                             self.errorok = 0
    438                         continue
    439                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    440     
    441                     else:
    442 
    443                         # --! TRACKING
    444                         if tracking:
    445                            sym.lineno = lexer.lineno
    446                            sym.lexpos = lexer.lexpos
    447                         # --! TRACKING
    448 
    449                         targ = [ sym ]
    450 
    451                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    452                         # The code enclosed in this section is duplicated 
    453                         # above as a performance optimization.  Make sure
    454                         # changes get made in both locations.
    455 
    456                         pslice.slice = targ
    457 
    458                         try:
    459                             # Call the grammar rule with our special slice object
    460                             p.callable(pslice)
    461                             # --! DEBUG
    462                             debug.info("Result : %s", format_result(pslice[0]))
    463                             # --! DEBUG
    464                             symstack.append(sym)
    465                             state = goto[statestack[-1]][pname]
    466                             statestack.append(state)
    467                         except SyntaxError:
    468                             # If an error was set. Enter error recovery state
    469                             lookaheadstack.append(lookahead)
    470                             symstack.pop()
    471                             statestack.pop()
    472                             state = statestack[-1]
    473                             sym.type = 'error'
    474                             lookahead = sym
    475                             errorcount = error_count
    476                             self.errorok = 0
    477                         continue
    478                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    479 
    480                 if t == 0:
    481                     n = symstack[-1]
    482                     result = getattr(n,"value",None)
    483                     # --! DEBUG
    484                     debug.info("Done   : Returning %s", format_result(result))
    485                     debug.info("PLY: PARSE DEBUG END")
    486                     # --! DEBUG
    487                     return result
    488 
    489             if t == None:
    490 
    491                 # --! DEBUG
    492                 debug.error('Error  : %s',
    493                             ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
    494                 # --! DEBUG
    495 
    496                 # We have some kind of parsing error here.  To handle
    497                 # this, we are going to push the current token onto
    498                 # the tokenstack and replace it with an 'error' token.
    499                 # If there are any synchronization rules, they may
    500                 # catch it.
    501                 #
    502                 # In addition to pushing the error token, we call call
    503                 # the user defined p_error() function if this is the
    504                 # first syntax error.  This function is only called if
    505                 # errorcount == 0.
    506                 if errorcount == 0 or self.errorok:
    507                     errorcount = error_count
    508                     self.errorok = 0
    509                     errtoken = lookahead
    510                     if errtoken.type == "$end":
    511                         errtoken = None               # End of file!
    512                     if self.errorfunc:
    513                         global errok,token,restart
    514                         errok = self.errok        # Set some special functions available in error recovery
    515                         token = get_token
    516                         restart = self.restart
    517                         if errtoken and not hasattr(errtoken,'lexer'):
    518                             errtoken.lexer = lexer
    519                         tok = self.errorfunc(errtoken)
    520                         del errok, token, restart   # Delete special functions
    521 
    522                         if self.errorok:
    523                             # User must have done some kind of panic
    524                             # mode recovery on their own.  The
    525                             # returned token is the next lookahead
    526                             lookahead = tok
    527                             errtoken = None
    528                             continue
    529                     else:
    530                         if errtoken:
    531                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
    532                             else: lineno = 0
    533                             if lineno:
    534                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
    535                             else:
    536                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
    537                         else:
    538                             sys.stderr.write("yacc: Parse error in input. EOF\n")
    539                             return
    540 
    541                 else:
    542                     errorcount = error_count
    543 
    544                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
    545                 # entire parse has been rolled back and we're completely hosed.   The token is
    546                 # discarded and we just keep going.
    547 
    548                 if len(statestack) <= 1 and lookahead.type != "$end":
    549                     lookahead = None
    550                     errtoken = None
    551                     state = 0
    552                     # Nuke the pushback stack
    553                     del lookaheadstack[:]
    554                     continue
    555 
    556                 # case 2: the statestack has a couple of entries on it, but we're
    557                 # at the end of the file. nuke the top entry and generate an error token
    558 
    559                 # Start nuking entries on the stack
    560                 if lookahead.type == "$end":
    561                     # Whoa. We're really hosed here. Bail out
    562                     return
    563 
    564                 if lookahead.type != 'error':
    565                     sym = symstack[-1]
    566                     if sym.type == 'error':
    567                         # Hmmm. Error is on top of stack, we'll just nuke input
    568                         # symbol and continue
    569                         lookahead = None
    570                         continue
    571                     t = YaccSymbol()
    572                     t.type = 'error'
    573                     if hasattr(lookahead,"lineno"):
    574                         t.lineno = lookahead.lineno
    575                     t.value = lookahead
    576                     lookaheadstack.append(lookahead)
    577                     lookahead = t
    578                 else:
    579                     symstack.pop()
    580                     statestack.pop()
    581                     state = statestack[-1]       # Potential bug fix
    582 
    583                 continue
    584 
    585             # Call an error function here
    586             raise RuntimeError("yacc: internal parser error!!!\n")
    587 
    588     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    589     # parseopt().
    590     #
    591     # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY.
    592     # Edit the debug version above, then copy any modifications to the method
    593     # below while removing #--! DEBUG sections.
    594     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    595 
    596 
    597     def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
    598         lookahead = None                 # Current lookahead symbol
    599         lookaheadstack = [ ]             # Stack of lookahead symbols
    600         actions = self.action            # Local reference to action table (to avoid lookup on self.)
    601         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
    602         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
    603         pslice  = YaccProduction(None)   # Production object passed to grammar rules
    604         errorcount = 0                   # Used during error recovery 
    605 
    606         # If no lexer was given, we will try to use the lex module
    607         if not lexer:
    608             lex = load_ply_lex()
    609             lexer = lex.lexer
    610         
    611         # Set up the lexer and parser objects on pslice
    612         pslice.lexer = lexer
    613         pslice.parser = self
    614 
    615         # If input was supplied, pass to lexer
    616         if input is not None:
    617             lexer.input(input)
    618 
    619         if tokenfunc is None:
    620            # Tokenize function
    621            get_token = lexer.token
    622         else:
    623            get_token = tokenfunc
    624 
    625         # Set up the state and symbol stacks
    626 
    627         statestack = [ ]                # Stack of parsing states
    628         self.statestack = statestack
    629         symstack   = [ ]                # Stack of grammar symbols
    630         self.symstack = symstack
    631 
    632         pslice.stack = symstack         # Put in the production
    633         errtoken   = None               # Err token
    634 
    635         # The start state is assumed to be (0,$end)
    636 
    637         statestack.append(0)
    638         sym = YaccSymbol()
    639         sym.type = '$end'
    640         symstack.append(sym)
    641         state = 0
    642         while 1:
    643             # Get the next symbol on the input.  If a lookahead symbol
    644             # is already set, we just use that. Otherwise, we'll pull
    645             # the next token off of the lookaheadstack or from the lexer
    646 
    647             if not lookahead:
    648                 if not lookaheadstack:
    649                     lookahead = get_token()     # Get the next token
    650                 else:
    651                     lookahead = lookaheadstack.pop()
    652                 if not lookahead:
    653                     lookahead = YaccSymbol()
    654                     lookahead.type = '$end'
    655 
    656             # Check the action table
    657             ltype = lookahead.type
    658             t = actions[state].get(ltype)
    659 
    660             if t is not None:
    661                 if t > 0:
    662                     # shift a symbol on the stack
    663                     statestack.append(t)
    664                     state = t
    665 
    666                     symstack.append(lookahead)
    667                     lookahead = None
    668 
    669                     # Decrease error count on successful shift
    670                     if errorcount: errorcount -=1
    671                     continue
    672 
    673                 if t < 0:
    674                     # reduce a symbol on the stack, emit a production
    675                     p = prod[-t]
    676                     pname = p.name
    677                     plen  = p.len
    678 
    679                     # Get production function
    680                     sym = YaccSymbol()
    681                     sym.type = pname       # Production name
    682                     sym.value = None
    683 
    684                     if plen:
    685                         targ = symstack[-plen-1:]
    686                         targ[0] = sym
    687 
    688                         # --! TRACKING
    689                         if tracking:
    690                            t1 = targ[1]
    691                            sym.lineno = t1.lineno
    692                            sym.lexpos = t1.lexpos
    693                            t1 = targ[-1]
    694                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
    695                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
    696 
    697                         # --! TRACKING
    698 
    699                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    700                         # The code enclosed in this section is duplicated 
    701                         # below as a performance optimization.  Make sure
    702                         # changes get made in both locations.
    703 
    704                         pslice.slice = targ
    705                         
    706                         try:
    707                             # Call the grammar rule with our special slice object
    708                             del symstack[-plen:]
    709                             del statestack[-plen:]
    710                             p.callable(pslice)
    711                             symstack.append(sym)
    712                             state = goto[statestack[-1]][pname]
    713                             statestack.append(state)
    714                         except SyntaxError:
    715                             # If an error was set. Enter error recovery state
    716                             lookaheadstack.append(lookahead)
    717                             symstack.pop()
    718                             statestack.pop()
    719                             state = statestack[-1]
    720                             sym.type = 'error'
    721                             lookahead = sym
    722                             errorcount = error_count
    723                             self.errorok = 0
    724                         continue
    725                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    726     
    727                     else:
    728 
    729                         # --! TRACKING
    730                         if tracking:
    731                            sym.lineno = lexer.lineno
    732                            sym.lexpos = lexer.lexpos
    733                         # --! TRACKING
    734 
    735                         targ = [ sym ]
    736 
    737                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    738                         # The code enclosed in this section is duplicated 
    739                         # above as a performance optimization.  Make sure
    740                         # changes get made in both locations.
    741 
    742                         pslice.slice = targ
    743 
    744                         try:
    745                             # Call the grammar rule with our special slice object
    746                             p.callable(pslice)
    747                             symstack.append(sym)
    748                             state = goto[statestack[-1]][pname]
    749                             statestack.append(state)
    750                         except SyntaxError:
    751                             # If an error was set. Enter error recovery state
    752                             lookaheadstack.append(lookahead)
    753                             symstack.pop()
    754                             statestack.pop()
    755                             state = statestack[-1]
    756                             sym.type = 'error'
    757                             lookahead = sym
    758                             errorcount = error_count
    759                             self.errorok = 0
    760                         continue
    761                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    762 
    763                 if t == 0:
    764                     n = symstack[-1]
    765                     return getattr(n,"value",None)
    766 
    767             if t == None:
    768 
    769                 # We have some kind of parsing error here.  To handle
    770                 # this, we are going to push the current token onto
    771                 # the tokenstack and replace it with an 'error' token.
    772                 # If there are any synchronization rules, they may
    773                 # catch it.
    774                 #
    775                 # In addition to pushing the error token, we call call
    776                 # the user defined p_error() function if this is the
    777                 # first syntax error.  This function is only called if
    778                 # errorcount == 0.
    779                 if errorcount == 0 or self.errorok:
    780                     errorcount = error_count
    781                     self.errorok = 0
    782                     errtoken = lookahead
    783                     if errtoken.type == '$end':
    784                         errtoken = None               # End of file!
    785                     if self.errorfunc:
    786                         global errok,token,restart
    787                         errok = self.errok        # Set some special functions available in error recovery
    788                         token = get_token
    789                         restart = self.restart
    790                         if errtoken and not hasattr(errtoken,'lexer'):
    791                             errtoken.lexer = lexer
    792                         tok = self.errorfunc(errtoken)
    793                         del errok, token, restart   # Delete special functions
    794 
    795                         if self.errorok:
    796                             # User must have done some kind of panic
    797                             # mode recovery on their own.  The
    798                             # returned token is the next lookahead
    799                             lookahead = tok
    800                             errtoken = None
    801                             continue
    802                     else:
    803                         if errtoken:
    804                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
    805                             else: lineno = 0
    806                             if lineno:
    807                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
    808                             else:
    809                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
    810                         else:
    811                             sys.stderr.write("yacc: Parse error in input. EOF\n")
    812                             return
    813 
    814                 else:
    815                     errorcount = error_count
    816 
    817                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
    818                 # entire parse has been rolled back and we're completely hosed.   The token is
    819                 # discarded and we just keep going.
    820 
    821                 if len(statestack) <= 1 and lookahead.type != '$end':
    822                     lookahead = None
    823                     errtoken = None
    824                     state = 0
    825                     # Nuke the pushback stack
    826                     del lookaheadstack[:]
    827                     continue
    828 
    829                 # case 2: the statestack has a couple of entries on it, but we're
    830                 # at the end of the file. nuke the top entry and generate an error token
    831 
    832                 # Start nuking entries on the stack
    833                 if lookahead.type == '$end':
    834                     # Whoa. We're really hosed here. Bail out
    835                     return
    836 
    837                 if lookahead.type != 'error':
    838                     sym = symstack[-1]
    839                     if sym.type == 'error':
    840                         # Hmmm. Error is on top of stack, we'll just nuke input
    841                         # symbol and continue
    842                         lookahead = None
    843                         continue
    844                     t = YaccSymbol()
    845                     t.type = 'error'
    846                     if hasattr(lookahead,"lineno"):
    847                         t.lineno = lookahead.lineno
    848                     t.value = lookahead
    849                     lookaheadstack.append(lookahead)
    850                     lookahead = t
    851                 else:
    852                     symstack.pop()
    853                     statestack.pop()
    854                     state = statestack[-1]       # Potential bug fix
    855 
    856                 continue
    857 
    858             # Call an error function here
    859             raise RuntimeError("yacc: internal parser error!!!\n")
    860 
    861     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    862     # parseopt_notrack().
    863     #
    864     # Optimized version of parseopt() with line number tracking removed. 
    865     # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
    866     # code in the #--! TRACKING sections
    867     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    868 
    869     def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
    870         lookahead = None                 # Current lookahead symbol
    871         lookaheadstack = [ ]             # Stack of lookahead symbols
    872         actions = self.action            # Local reference to action table (to avoid lookup on self.)
    873         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
    874         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
    875         pslice  = YaccProduction(None)   # Production object passed to grammar rules
    876         errorcount = 0                   # Used during error recovery 
    877 
    878         # If no lexer was given, we will try to use the lex module
    879         if not lexer:
    880             lex = load_ply_lex()
    881             lexer = lex.lexer
    882         
    883         # Set up the lexer and parser objects on pslice
    884         pslice.lexer = lexer
    885         pslice.parser = self
    886 
    887         # If input was supplied, pass to lexer
    888         if input is not None:
    889             lexer.input(input)
    890 
    891         if tokenfunc is None:
    892            # Tokenize function
    893            get_token = lexer.token
    894         else:
    895            get_token = tokenfunc
    896 
    897         # Set up the state and symbol stacks
    898 
    899         statestack = [ ]                # Stack of parsing states
    900         self.statestack = statestack
    901         symstack   = [ ]                # Stack of grammar symbols
    902         self.symstack = symstack
    903 
    904         pslice.stack = symstack         # Put in the production
    905         errtoken   = None               # Err token
    906 
    907         # The start state is assumed to be (0,$end)
    908 
    909         statestack.append(0)
    910         sym = YaccSymbol()
    911         sym.type = '$end'
    912         symstack.append(sym)
    913         state = 0
    914         while 1:
    915             # Get the next symbol on the input.  If a lookahead symbol
    916             # is already set, we just use that. Otherwise, we'll pull
    917             # the next token off of the lookaheadstack or from the lexer
    918 
    919             if not lookahead:
    920                 if not lookaheadstack:
    921                     lookahead = get_token()     # Get the next token
    922                 else:
    923                     lookahead = lookaheadstack.pop()
    924                 if not lookahead:
    925                     lookahead = YaccSymbol()
    926                     lookahead.type = '$end'
    927 
    928             # Check the action table
    929             ltype = lookahead.type
    930             t = actions[state].get(ltype)
    931 
    932             if t is not None:
    933                 if t > 0:
    934                     # shift a symbol on the stack
    935                     statestack.append(t)
    936                     state = t
    937 
    938                     symstack.append(lookahead)
    939                     lookahead = None
    940 
    941                     # Decrease error count on successful shift
    942                     if errorcount: errorcount -=1
    943                     continue
    944 
    945                 if t < 0:
    946                     # reduce a symbol on the stack, emit a production
    947                     p = prod[-t]
    948                     pname = p.name
    949                     plen  = p.len
    950 
    951                     # Get production function
    952                     sym = YaccSymbol()
    953                     sym.type = pname       # Production name
    954                     sym.value = None
    955 
    956                     if plen:
    957                         targ = symstack[-plen-1:]
    958                         targ[0] = sym
    959 
    960                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    961                         # The code enclosed in this section is duplicated 
    962                         # below as a performance optimization.  Make sure
    963                         # changes get made in both locations.
    964 
    965                         pslice.slice = targ
    966                         
    967                         try:
    968                             # Call the grammar rule with our special slice object
    969                             del symstack[-plen:]
    970                             del statestack[-plen:]
    971                             p.callable(pslice)
    972                             symstack.append(sym)
    973                             state = goto[statestack[-1]][pname]
    974                             statestack.append(state)
    975                         except SyntaxError:
    976                             # If an error was set. Enter error recovery state
    977                             lookaheadstack.append(lookahead)
    978                             symstack.pop()
    979                             statestack.pop()
    980                             state = statestack[-1]
    981                             sym.type = 'error'
    982                             lookahead = sym
    983                             errorcount = error_count
    984                             self.errorok = 0
    985                         continue
    986                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    987     
    988                     else:
    989 
    990                         targ = [ sym ]
    991 
    992                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    993                         # The code enclosed in this section is duplicated 
    994                         # above as a performance optimization.  Make sure
    995                         # changes get made in both locations.
    996 
    997                         pslice.slice = targ
    998 
    999                         try:
   1000                             # Call the grammar rule with our special slice object
   1001                             p.callable(pslice)
   1002                             symstack.append(sym)
   1003                             state = goto[statestack[-1]][pname]
   1004                             statestack.append(state)
   1005                         except SyntaxError:
   1006                             # If an error was set. Enter error recovery state
   1007                             lookaheadstack.append(lookahead)
   1008                             symstack.pop()
   1009                             statestack.pop()
   1010                             state = statestack[-1]
   1011                             sym.type = 'error'
   1012                             lookahead = sym
   1013                             errorcount = error_count
   1014                             self.errorok = 0
   1015                         continue
   1016                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   1017 
   1018                 if t == 0:
   1019                     n = symstack[-1]
   1020                     return getattr(n,"value",None)
   1021 
   1022             if t == None:
   1023 
   1024                 # We have some kind of parsing error here.  To handle
   1025                 # this, we are going to push the current token onto
   1026                 # the tokenstack and replace it with an 'error' token.
   1027                 # If there are any synchronization rules, they may
   1028                 # catch it.
   1029                 #
   1030                 # In addition to pushing the error token, we call call
   1031                 # the user defined p_error() function if this is the
   1032                 # first syntax error.  This function is only called if
   1033                 # errorcount == 0.
   1034                 if errorcount == 0 or self.errorok:
   1035                     errorcount = error_count
   1036                     self.errorok = 0
   1037                     errtoken = lookahead
   1038                     if errtoken.type == '$end':
   1039                         errtoken = None               # End of file!
   1040                     if self.errorfunc:
   1041                         global errok,token,restart
   1042                         errok = self.errok        # Set some special functions available in error recovery
   1043                         token = get_token
   1044                         restart = self.restart
   1045                         if errtoken and not hasattr(errtoken,'lexer'):
   1046                             errtoken.lexer = lexer
   1047                         tok = self.errorfunc(errtoken)
   1048                         del errok, token, restart   # Delete special functions
   1049 
   1050                         if self.errorok:
   1051                             # User must have done some kind of panic
   1052                             # mode recovery on their own.  The
   1053                             # returned token is the next lookahead
   1054                             lookahead = tok
   1055                             errtoken = None
   1056                             continue
   1057                     else:
   1058                         if errtoken:
   1059                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
   1060                             else: lineno = 0
   1061                             if lineno:
   1062                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
   1063                             else:
   1064                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
   1065                         else:
   1066                             sys.stderr.write("yacc: Parse error in input. EOF\n")
   1067                             return
   1068 
   1069                 else:
   1070                     errorcount = error_count
   1071 
   1072                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
   1073                 # entire parse has been rolled back and we're completely hosed.   The token is
   1074                 # discarded and we just keep going.
   1075 
   1076                 if len(statestack) <= 1 and lookahead.type != '$end':
   1077                     lookahead = None
   1078                     errtoken = None
   1079                     state = 0
   1080                     # Nuke the pushback stack
   1081                     del lookaheadstack[:]
   1082                     continue
   1083 
   1084                 # case 2: the statestack has a couple of entries on it, but we're
   1085                 # at the end of the file. nuke the top entry and generate an error token
   1086 
   1087                 # Start nuking entries on the stack
   1088                 if lookahead.type == '$end':
   1089                     # Whoa. We're really hosed here. Bail out
   1090                     return
   1091 
   1092                 if lookahead.type != 'error':
   1093                     sym = symstack[-1]
   1094                     if sym.type == 'error':
   1095                         # Hmmm. Error is on top of stack, we'll just nuke input
   1096                         # symbol and continue
   1097                         lookahead = None
   1098                         continue
   1099                     t = YaccSymbol()
   1100                     t.type = 'error'
   1101                     if hasattr(lookahead,"lineno"):
   1102                         t.lineno = lookahead.lineno
   1103                     t.value = lookahead
   1104                     lookaheadstack.append(lookahead)
   1105                     lookahead = t
   1106                 else:
   1107                     symstack.pop()
   1108                     statestack.pop()
   1109                     state = statestack[-1]       # Potential bug fix
   1110 
   1111                 continue
   1112 
   1113             # Call an error function here
   1114             raise RuntimeError("yacc: internal parser error!!!\n")
   1115 
   1116 # -----------------------------------------------------------------------------
   1117 #                          === Grammar Representation ===
   1118 #
   1119 # The following functions, classes, and variables are used to represent and
   1120 # manipulate the rules that make up a grammar. 
   1121 # -----------------------------------------------------------------------------
   1122 
   1123 import re
   1124 
   1125 # regex matching identifiers
   1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
   1127 
   1128 # -----------------------------------------------------------------------------
   1129 # class Production:
   1130 #
   1131 # This class stores the raw information about a single production or grammar rule.
   1132 # A grammar rule refers to a specification such as this:
   1133 #
   1134 #       expr : expr PLUS term 
   1135 #
   1136 # Here are the basic attributes defined on all productions
   1137 #
   1138 #       name     - Name of the production.  For example 'expr'
   1139 #       prod     - A list of symbols on the right side ['expr','PLUS','term']
   1140 #       prec     - Production precedence level
   1141 #       number   - Production number.
   1142 #       func     - Function that executes on reduce
   1143 #       file     - File where production function is defined
   1144 #       lineno   - Line number where production function is defined
   1145 #
   1146 # The following attributes are defined or optional.
   1147 #
   1148 #       len       - Length of the production (number of symbols on right hand side)
   1149 #       usyms     - Set of unique symbols found in the production
   1150 # -----------------------------------------------------------------------------
   1151 
   1152 class Production(object):
   1153     reduced = 0
   1154     def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
   1155         self.name     = name
   1156         self.prod     = tuple(prod)
   1157         self.number   = number
   1158         self.func     = func
   1159         self.callable = None
   1160         self.file     = file
   1161         self.line     = line
   1162         self.prec     = precedence
   1163 
   1164         # Internal settings used during table construction
   1165         
   1166         self.len  = len(self.prod)   # Length of the production
   1167 
   1168         # Create a list of unique production symbols used in the production
   1169         self.usyms = [ ]             
   1170         for s in self.prod:
   1171             if s not in self.usyms:
   1172                 self.usyms.append(s)
   1173 
   1174         # List of all LR items for the production
   1175         self.lr_items = []
   1176         self.lr_next = None
   1177 
   1178         # Create a string representation
   1179         if self.prod:
   1180             self.str = "%s -> %s" % (self.name," ".join(self.prod))
   1181         else:
   1182             self.str = "%s -> <empty>" % self.name
   1183 
   1184     def __str__(self):
   1185         return self.str
   1186 
   1187     def __repr__(self):
   1188         return "Production("+str(self)+")"
   1189 
   1190     def __len__(self):
   1191         return len(self.prod)
   1192 
   1193     def __nonzero__(self):
   1194         return 1
   1195 
   1196     def __getitem__(self,index):
   1197         return self.prod[index]
   1198             
   1199     # Return the nth lr_item from the production (or None if at the end)
   1200     def lr_item(self,n):
   1201         if n > len(self.prod): return None
   1202         p = LRItem(self,n)
   1203 
   1204         # Precompute the list of productions immediately following.  Hack. Remove later
   1205         try:
   1206             p.lr_after = Prodnames[p.prod[n+1]]
   1207         except (IndexError,KeyError):
   1208             p.lr_after = []
   1209         try:
   1210             p.lr_before = p.prod[n-1]
   1211         except IndexError:
   1212             p.lr_before = None
   1213 
   1214         return p
   1215     
   1216     # Bind the production function name to a callable
   1217     def bind(self,pdict):
   1218         if self.func:
   1219             self.callable = pdict[self.func]
   1220 
   1221 # This class serves as a minimal standin for Production objects when
   1222 # reading table data from files.   It only contains information
   1223 # actually used by the LR parsing engine, plus some additional
   1224 # debugging information.
   1225 class MiniProduction(object):
   1226     def __init__(self,str,name,len,func,file,line):
   1227         self.name     = name
   1228         self.len      = len
   1229         self.func     = func
   1230         self.callable = None
   1231         self.file     = file
   1232         self.line     = line
   1233         self.str      = str
   1234     def __str__(self):
   1235         return self.str
   1236     def __repr__(self):
   1237         return "MiniProduction(%s)" % self.str
   1238 
   1239     # Bind the production function name to a callable
   1240     def bind(self,pdict):
   1241         if self.func:
   1242             self.callable = pdict[self.func]
   1243 
   1244 
   1245 # -----------------------------------------------------------------------------
   1246 # class LRItem
   1247 #
   1248 # This class represents a specific stage of parsing a production rule.  For
   1249 # example: 
   1250 #
   1251 #       expr : expr . PLUS term 
   1252 #
   1253 # In the above, the "." represents the current location of the parse.  Here
   1254 # basic attributes:
   1255 #
   1256 #       name       - Name of the production.  For example 'expr'
   1257 #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
   1258 #       number     - Production number.
   1259 #
   1260 #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
   1261 #                    then lr_next refers to 'expr -> expr PLUS . term'
   1262 #       lr_index   - LR item index (location of the ".") in the prod list.
   1263 #       lookaheads - LALR lookahead symbols for this item
   1264 #       len        - Length of the production (number of symbols on right hand side)
   1265 #       lr_after    - List of all productions that immediately follow
   1266 #       lr_before   - Grammar symbol immediately before
   1267 # -----------------------------------------------------------------------------
   1268 
   1269 class LRItem(object):
   1270     def __init__(self,p,n):
   1271         self.name       = p.name
   1272         self.prod       = list(p.prod)
   1273         self.number     = p.number
   1274         self.lr_index   = n
   1275         self.lookaheads = { }
   1276         self.prod.insert(n,".")
   1277         self.prod       = tuple(self.prod)
   1278         self.len        = len(self.prod)
   1279         self.usyms      = p.usyms
   1280 
   1281     def __str__(self):
   1282         if self.prod:
   1283             s = "%s -> %s" % (self.name," ".join(self.prod))
   1284         else:
   1285             s = "%s -> <empty>" % self.name
   1286         return s
   1287 
   1288     def __repr__(self):
   1289         return "LRItem("+str(self)+")"
   1290 
   1291 # -----------------------------------------------------------------------------
   1292 # rightmost_terminal()
   1293 #
   1294 # Return the rightmost terminal from a list of symbols.  Used in add_production()
   1295 # -----------------------------------------------------------------------------
   1296 def rightmost_terminal(symbols, terminals):
   1297     i = len(symbols) - 1
   1298     while i >= 0:
   1299         if symbols[i] in terminals:
   1300             return symbols[i]
   1301         i -= 1
   1302     return None
   1303 
   1304 # -----------------------------------------------------------------------------
   1305 #                           === GRAMMAR CLASS ===
   1306 #
   1307 # The following class represents the contents of the specified grammar along
   1308 # with various computed properties such as first sets, follow sets, LR items, etc.
   1309 # This data is used for critical parts of the table generation process later.
   1310 # -----------------------------------------------------------------------------
   1311 
   1312 class GrammarError(YaccError): pass
   1313 
   1314 class Grammar(object):
   1315     def __init__(self,terminals):
   1316         self.Productions  = [None]  # A list of all of the productions.  The first
   1317                                     # entry is always reserved for the purpose of
   1318                                     # building an augmented grammar
   1319 
   1320         self.Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
   1321                                     # productions of that nonterminal.
   1322 
   1323         self.Prodmap      = { }     # A dictionary that is only used to detect duplicate
   1324                                     # productions.
   1325 
   1326         self.Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
   1327                                     # list of the rules where they are used.
   1328 
   1329         for term in terminals:
   1330             self.Terminals[term] = []
   1331 
   1332         self.Terminals['error'] = []
   1333 
   1334         self.Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
   1335                                     # of rule numbers where they are used.
   1336 
   1337         self.First        = { }     # A dictionary of precomputed FIRST(x) symbols
   1338 
   1339         self.Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
   1340 
   1341         self.Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
   1342                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
   1343 
   1344         self.UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
   1345                                     # This is only used to provide error checking and to generate
   1346                                     # a warning about unused precedence rules.
   1347 
   1348         self.Start = None           # Starting symbol for the grammar
   1349 
   1350 
   1351     def __len__(self):
   1352         return len(self.Productions)
   1353 
   1354     def __getitem__(self,index):
   1355         return self.Productions[index]
   1356 
   1357     # -----------------------------------------------------------------------------
   1358     # set_precedence()
   1359     #
   1360     # Sets the precedence for a given terminal. assoc is the associativity such as
   1361     # 'left','right', or 'nonassoc'.  level is a numeric level.
   1362     #
   1363     # -----------------------------------------------------------------------------
   1364 
   1365     def set_precedence(self,term,assoc,level):
   1366         assert self.Productions == [None],"Must call set_precedence() before add_production()"
   1367         if term in self.Precedence:
   1368             raise GrammarError("Precedence already specified for terminal '%s'" % term)
   1369         if assoc not in ['left','right','nonassoc']:
   1370             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
   1371         self.Precedence[term] = (assoc,level)
   1372  
   1373     # -----------------------------------------------------------------------------
   1374     # add_production()
   1375     #
   1376     # Given an action function, this function assembles a production rule and
   1377     # computes its precedence level.
   1378     #
   1379     # The production rule is supplied as a list of symbols.   For example,
   1380     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
   1381     # symbols ['expr','PLUS','term'].
   1382     #
   1383     # Precedence is determined by the precedence of the right-most non-terminal
   1384     # or the precedence of a terminal specified by %prec.
   1385     #
   1386     # A variety of error checks are performed to make sure production symbols
   1387     # are valid and that %prec is used correctly.
   1388     # -----------------------------------------------------------------------------
   1389 
   1390     def add_production(self,prodname,syms,func=None,file='',line=0):
   1391 
   1392         if prodname in self.Terminals:
   1393             raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
   1394         if prodname == 'error':
   1395             raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
   1396         if not _is_identifier.match(prodname):
   1397             raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
   1398 
   1399         # Look for literal tokens 
   1400         for n,s in enumerate(syms):
   1401             if s[0] in "'\"":
   1402                  try:
   1403                      c = eval(s)
   1404                      if (len(c) > 1):
   1405                           raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
   1406                      if not c in self.Terminals:
   1407                           self.Terminals[c] = []
   1408                      syms[n] = c
   1409                      continue
   1410                  except SyntaxError:
   1411                      pass
   1412             if not _is_identifier.match(s) and s != '%prec':
   1413                 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
   1414         
   1415         # Determine the precedence level
   1416         if '%prec' in syms:
   1417             if syms[-1] == '%prec':
   1418                 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
   1419             if syms[-2] != '%prec':
   1420                 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
   1421             precname = syms[-1]
   1422             prodprec = self.Precedence.get(precname,None)
   1423             if not prodprec:
   1424                 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
   1425             else:
   1426                 self.UsedPrecedence[precname] = 1
   1427             del syms[-2:]     # Drop %prec from the rule
   1428         else:
   1429             # If no %prec, precedence is determined by the rightmost terminal symbol
   1430             precname = rightmost_terminal(syms,self.Terminals)
   1431             prodprec = self.Precedence.get(precname,('right',0)) 
   1432             
   1433         # See if the rule is already in the rulemap
   1434         map = "%s -> %s" % (prodname,syms)
   1435         if map in self.Prodmap:
   1436             m = self.Prodmap[map]
   1437             raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
   1438                                "Previous definition at %s:%d" % (m.file, m.line))
   1439 
   1440         # From this point on, everything is valid.  Create a new Production instance
   1441         pnumber  = len(self.Productions)
   1442         if not prodname in self.Nonterminals:
   1443             self.Nonterminals[prodname] = [ ]
   1444 
   1445         # Add the production number to Terminals and Nonterminals
   1446         for t in syms:
   1447             if t in self.Terminals:
   1448                 self.Terminals[t].append(pnumber)
   1449             else:
   1450                 if not t in self.Nonterminals:
   1451                     self.Nonterminals[t] = [ ]
   1452                 self.Nonterminals[t].append(pnumber)
   1453 
   1454         # Create a production and add it to the list of productions
   1455         p = Production(pnumber,prodname,syms,prodprec,func,file,line)
   1456         self.Productions.append(p)
   1457         self.Prodmap[map] = p
   1458 
   1459         # Add to the global productions list
   1460         try:
   1461             self.Prodnames[prodname].append(p)
   1462         except KeyError:
   1463             self.Prodnames[prodname] = [ p ]
   1464         return 0
   1465 
   1466     # -----------------------------------------------------------------------------
   1467     # set_start()
   1468     #
   1469     # Sets the starting symbol and creates the augmented grammar.  Production 
   1470     # rule 0 is S' -> start where start is the start symbol.
   1471     # -----------------------------------------------------------------------------
   1472 
   1473     def set_start(self,start=None):
   1474         if not start:
   1475             start = self.Productions[1].name
   1476         if start not in self.Nonterminals:
   1477             raise GrammarError("start symbol %s undefined" % start)
   1478         self.Productions[0] = Production(0,"S'",[start])
   1479         self.Nonterminals[start].append(0)
   1480         self.Start = start
   1481 
   1482     # -----------------------------------------------------------------------------
   1483     # find_unreachable()
   1484     #
   1485     # Find all of the nonterminal symbols that can't be reached from the starting
   1486     # symbol.  Returns a list of nonterminals that can't be reached.
   1487     # -----------------------------------------------------------------------------
   1488 
   1489     def find_unreachable(self):
   1490         
   1491         # Mark all symbols that are reachable from a symbol s
   1492         def mark_reachable_from(s):
   1493             if reachable[s]:
   1494                 # We've already reached symbol s.
   1495                 return
   1496             reachable[s] = 1
   1497             for p in self.Prodnames.get(s,[]):
   1498                 for r in p.prod:
   1499                     mark_reachable_from(r)
   1500 
   1501         reachable   = { }
   1502         for s in list(self.Terminals) + list(self.Nonterminals):
   1503             reachable[s] = 0
   1504 
   1505         mark_reachable_from( self.Productions[0].prod[0] )
   1506 
   1507         return [s for s in list(self.Nonterminals)
   1508                         if not reachable[s]]
   1509     
   1510     # -----------------------------------------------------------------------------
   1511     # infinite_cycles()
   1512     #
   1513     # This function looks at the various parsing rules and tries to detect
   1514     # infinite recursion cycles (grammar rules where there is no possible way
   1515     # to derive a string of only terminals).
   1516     # -----------------------------------------------------------------------------
   1517 
   1518     def infinite_cycles(self):
   1519         terminates = {}
   1520 
   1521         # Terminals:
   1522         for t in self.Terminals:
   1523             terminates[t] = 1
   1524 
   1525         terminates['$end'] = 1
   1526 
   1527         # Nonterminals:
   1528 
   1529         # Initialize to false:
   1530         for n in self.Nonterminals:
   1531             terminates[n] = 0
   1532 
   1533         # Then propagate termination until no change:
   1534         while 1:
   1535             some_change = 0
   1536             for (n,pl) in self.Prodnames.items():
   1537                 # Nonterminal n terminates iff any of its productions terminates.
   1538                 for p in pl:
   1539                     # Production p terminates iff all of its rhs symbols terminate.
   1540                     for s in p.prod:
   1541                         if not terminates[s]:
   1542                             # The symbol s does not terminate,
   1543                             # so production p does not terminate.
   1544                             p_terminates = 0
   1545                             break
   1546                     else:
   1547                         # didn't break from the loop,
   1548                         # so every symbol s terminates
   1549                         # so production p terminates.
   1550                         p_terminates = 1
   1551 
   1552                     if p_terminates:
   1553                         # symbol n terminates!
   1554                         if not terminates[n]:
   1555                             terminates[n] = 1
   1556                             some_change = 1
   1557                         # Don't need to consider any more productions for this n.
   1558                         break
   1559 
   1560             if not some_change:
   1561                 break
   1562 
   1563         infinite = []
   1564         for (s,term) in terminates.items():
   1565             if not term:
   1566                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
   1567                     # s is used-but-not-defined, and we've already warned of that,
   1568                     # so it would be overkill to say that it's also non-terminating.
   1569                     pass
   1570                 else:
   1571                     infinite.append(s)
   1572 
   1573         return infinite
   1574 
   1575 
   1576     # -----------------------------------------------------------------------------
   1577     # undefined_symbols()
   1578     #
   1579     # Find all symbols that were used the grammar, but not defined as tokens or
   1580     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
   1581     # and prod is the production where the symbol was used. 
   1582     # -----------------------------------------------------------------------------
   1583     def undefined_symbols(self):
   1584         result = []
   1585         for p in self.Productions:
   1586             if not p: continue
   1587 
   1588             for s in p.prod:
   1589                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
   1590                     result.append((s,p))
   1591         return result
   1592 
   1593     # -----------------------------------------------------------------------------
   1594     # unused_terminals()
   1595     #
   1596     # Find all terminals that were defined, but not used by the grammar.  Returns
   1597     # a list of all symbols.
   1598     # -----------------------------------------------------------------------------
   1599     def unused_terminals(self):
   1600         unused_tok = []
   1601         for s,v in self.Terminals.items():
   1602             if s != 'error' and not v:
   1603                 unused_tok.append(s)
   1604 
   1605         return unused_tok
   1606 
   1607     # ------------------------------------------------------------------------------
   1608     # unused_rules()
   1609     #
   1610     # Find all grammar rules that were defined,  but not used (maybe not reachable)
   1611     # Returns a list of productions.
   1612     # ------------------------------------------------------------------------------
   1613 
   1614     def unused_rules(self):
   1615         unused_prod = []
   1616         for s,v in self.Nonterminals.items():
   1617             if not v:
   1618                 p = self.Prodnames[s][0]
   1619                 unused_prod.append(p)
   1620         return unused_prod
   1621 
   1622     # -----------------------------------------------------------------------------
   1623     # unused_precedence()
   1624     #
   1625     # Returns a list of tuples (term,precedence) corresponding to precedence
   1626     # rules that were never used by the grammar.  term is the name of the terminal
   1627     # on which precedence was applied and precedence is a string such as 'left' or
   1628     # 'right' corresponding to the type of precedence. 
   1629     # -----------------------------------------------------------------------------
   1630 
   1631     def unused_precedence(self):
   1632         unused = []
   1633         for termname in self.Precedence:
   1634             if not (termname in self.Terminals or termname in self.UsedPrecedence):
   1635                 unused.append((termname,self.Precedence[termname][0]))
   1636                 
   1637         return unused
   1638 
   1639     # -------------------------------------------------------------------------
   1640     # _first()
   1641     #
   1642     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
   1643     #
   1644     # During execution of compute_first1, the result may be incomplete.
   1645     # Afterward (e.g., when called from compute_follow()), it will be complete.
   1646     # -------------------------------------------------------------------------
   1647     def _first(self,beta):
   1648 
   1649         # We are computing First(x1,x2,x3,...,xn)
   1650         result = [ ]
   1651         for x in beta:
   1652             x_produces_empty = 0
   1653 
   1654             # Add all the non-<empty> symbols of First[x] to the result.
   1655             for f in self.First[x]:
   1656                 if f == '<empty>':
   1657                     x_produces_empty = 1
   1658                 else:
   1659                     if f not in result: result.append(f)
   1660 
   1661             if x_produces_empty:
   1662                 # We have to consider the next x in beta,
   1663                 # i.e. stay in the loop.
   1664                 pass
   1665             else:
   1666                 # We don't have to consider any further symbols in beta.
   1667                 break
   1668         else:
   1669             # There was no 'break' from the loop,
   1670             # so x_produces_empty was true for all x in beta,
   1671             # so beta produces empty as well.
   1672             result.append('<empty>')
   1673 
   1674         return result
   1675 
   1676     # -------------------------------------------------------------------------
   1677     # compute_first()
   1678     #
   1679     # Compute the value of FIRST1(X) for all symbols
   1680     # -------------------------------------------------------------------------
   1681     def compute_first(self):
   1682         if self.First:
   1683             return self.First
   1684 
   1685         # Terminals:
   1686         for t in self.Terminals:
   1687             self.First[t] = [t]
   1688 
   1689         self.First['$end'] = ['$end']
   1690 
   1691         # Nonterminals:
   1692 
   1693         # Initialize to the empty set:
   1694         for n in self.Nonterminals:
   1695             self.First[n] = []
   1696 
   1697         # Then propagate symbols until no change:
   1698         while 1:
   1699             some_change = 0
   1700             for n in self.Nonterminals:
   1701                 for p in self.Prodnames[n]:
   1702                     for f in self._first(p.prod):
   1703                         if f not in self.First[n]:
   1704                             self.First[n].append( f )
   1705                             some_change = 1
   1706             if not some_change:
   1707                 break
   1708         
   1709         return self.First
   1710 
   1711     # ---------------------------------------------------------------------
   1712     # compute_follow()
   1713     #
   1714     # Computes all of the follow sets for every non-terminal symbol.  The
   1715     # follow set is the set of all symbols that might follow a given
   1716     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
   1717     # ---------------------------------------------------------------------
   1718     def compute_follow(self,start=None):
   1719         # If already computed, return the result
   1720         if self.Follow:
   1721             return self.Follow
   1722 
   1723         # If first sets not computed yet, do that first.
   1724         if not self.First:
   1725             self.compute_first()
   1726 
   1727         # Add '$end' to the follow list of the start symbol
   1728         for k in self.Nonterminals:
   1729             self.Follow[k] = [ ]
   1730 
   1731         if not start:
   1732             start = self.Productions[1].name
   1733 
   1734         self.Follow[start] = [ '$end' ]
   1735 
   1736         while 1:
   1737             didadd = 0
   1738             for p in self.Productions[1:]:
   1739                 # Here is the production set
   1740                 for i in range(len(p.prod)):
   1741                     B = p.prod[i]
   1742                     if B in self.Nonterminals:
   1743                         # Okay. We got a non-terminal in a production
   1744                         fst = self._first(p.prod[i+1:])
   1745                         hasempty = 0
   1746                         for f in fst:
   1747                             if f != '<empty>' and f not in self.Follow[B]:
   1748                                 self.Follow[B].append(f)
   1749                                 didadd = 1
   1750                             if f == '<empty>':
   1751                                 hasempty = 1
   1752                         if hasempty or i == (len(p.prod)-1):
   1753                             # Add elements of follow(a) to follow(b)
   1754                             for f in self.Follow[p.name]:
   1755                                 if f not in self.Follow[B]:
   1756                                     self.Follow[B].append(f)
   1757                                     didadd = 1
   1758             if not didadd: break
   1759         return self.Follow
   1760 
   1761 
   1762     # -----------------------------------------------------------------------------
   1763     # build_lritems()
   1764     #
   1765     # This function walks the list of productions and builds a complete set of the
   1766     # LR items.  The LR items are stored in two ways:  First, they are uniquely
   1767     # numbered and placed in the list _lritems.  Second, a linked list of LR items
   1768     # is built for each production.  For example:
   1769     #
   1770     #   E -> E PLUS E
   1771     #
   1772     # Creates the list
   1773     #
   1774     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
   1775     # -----------------------------------------------------------------------------
   1776 
   1777     def build_lritems(self):
   1778         for p in self.Productions:
   1779             lastlri = p
   1780             i = 0
   1781             lr_items = []
   1782             while 1:
   1783                 if i > len(p):
   1784                     lri = None
   1785                 else:
   1786                     lri = LRItem(p,i)
   1787                     # Precompute the list of productions immediately following
   1788                     try:
   1789                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
   1790                     except (IndexError,KeyError):
   1791                         lri.lr_after = []
   1792                     try:
   1793                         lri.lr_before = lri.prod[i-1]
   1794                     except IndexError:
   1795                         lri.lr_before = None
   1796 
   1797                 lastlri.lr_next = lri
   1798                 if not lri: break
   1799                 lr_items.append(lri)
   1800                 lastlri = lri
   1801                 i += 1
   1802             p.lr_items = lr_items
   1803 
   1804 # -----------------------------------------------------------------------------
   1805 #                            == Class LRTable ==
   1806 #
   1807 # This basic class represents a basic table of LR parsing information.  
   1808 # Methods for generating the tables are not defined here.  They are defined
   1809 # in the derived class LRGeneratedTable.
   1810 # -----------------------------------------------------------------------------
   1811 
   1812 class VersionError(YaccError): pass
   1813 
   1814 class LRTable(object):
   1815     def __init__(self):
   1816         self.lr_action = None
   1817         self.lr_goto = None
   1818         self.lr_productions = None
   1819         self.lr_method = None
   1820 
   1821     def read_table(self,module):
   1822         if isinstance(module,types.ModuleType):
   1823             parsetab = module
   1824         else:
   1825             if sys.version_info[0] < 3:
   1826                 exec("import %s as parsetab" % module)
   1827             else:
   1828                 env = { }
   1829                 exec("import %s as parsetab" % module, env, env)
   1830                 parsetab = env['parsetab']
   1831 
   1832         if parsetab._tabversion != __tabversion__:
   1833             raise VersionError("yacc table file version is out of date")
   1834 
   1835         self.lr_action = parsetab._lr_action
   1836         self.lr_goto = parsetab._lr_goto
   1837 
   1838         self.lr_productions = []
   1839         for p in parsetab._lr_productions:
   1840             self.lr_productions.append(MiniProduction(*p))
   1841 
   1842         self.lr_method = parsetab._lr_method
   1843         return parsetab._lr_signature
   1844 
   1845     def read_pickle(self,filename):
   1846         try:
   1847             import cPickle as pickle
   1848         except ImportError:
   1849             import pickle
   1850 
   1851         in_f = open(filename,"rb")
   1852 
   1853         tabversion = pickle.load(in_f)
   1854         if tabversion != __tabversion__:
   1855             raise VersionError("yacc table file version is out of date")
   1856         self.lr_method = pickle.load(in_f)
   1857         signature      = pickle.load(in_f)
   1858         self.lr_action = pickle.load(in_f)
   1859         self.lr_goto   = pickle.load(in_f)
   1860         productions    = pickle.load(in_f)
   1861 
   1862         self.lr_productions = []
   1863         for p in productions:
   1864             self.lr_productions.append(MiniProduction(*p))
   1865 
   1866         in_f.close()
   1867         return signature
   1868 
   1869     # Bind all production function names to callable objects in pdict
   1870     def bind_callables(self,pdict):
   1871         for p in self.lr_productions:
   1872             p.bind(pdict)
   1873     
   1874 # -----------------------------------------------------------------------------
   1875 #                           === LR Generator ===
   1876 #
   1877 # The following classes and functions are used to generate LR parsing tables on 
   1878 # a grammar.
   1879 # -----------------------------------------------------------------------------
   1880 
   1881 # -----------------------------------------------------------------------------
   1882 # digraph()
   1883 # traverse()
   1884 #
   1885 # The following two functions are used to compute set valued functions
   1886 # of the form:
   1887 #
   1888 #     F(x) = F'(x) U U{F(y) | x R y}
   1889 #
   1890 # This is used to compute the values of Read() sets as well as FOLLOW sets
   1891 # in LALR(1) generation.
   1892 #
   1893 # Inputs:  X    - An input set
   1894 #          R    - A relation
   1895 #          FP   - Set-valued function
   1896 # ------------------------------------------------------------------------------
   1897 
   1898 def digraph(X,R,FP):
   1899     N = { }
   1900     for x in X:
   1901        N[x] = 0
   1902     stack = []
   1903     F = { }
   1904     for x in X:
   1905         if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
   1906     return F
   1907 
   1908 def traverse(x,N,stack,F,X,R,FP):
   1909     stack.append(x)
   1910     d = len(stack)
   1911     N[x] = d
   1912     F[x] = FP(x)             # F(X) <- F'(x)
   1913 
   1914     rel = R(x)               # Get y's related to x
   1915     for y in rel:
   1916         if N[y] == 0:
   1917              traverse(y,N,stack,F,X,R,FP)
   1918         N[x] = min(N[x],N[y])
   1919         for a in F.get(y,[]):
   1920             if a not in F[x]: F[x].append(a)
   1921     if N[x] == d:
   1922        N[stack[-1]] = MAXINT
   1923        F[stack[-1]] = F[x]
   1924        element = stack.pop()
   1925        while element != x:
   1926            N[stack[-1]] = MAXINT
   1927            F[stack[-1]] = F[x]
   1928            element = stack.pop()
   1929 
   1930 class LALRError(YaccError): pass
   1931 
   1932 # -----------------------------------------------------------------------------
   1933 #                             == LRGeneratedTable ==
   1934 #
   1935 # This class implements the LR table generation algorithm.  There are no
   1936 # public methods except for write()
   1937 # -----------------------------------------------------------------------------
   1938 
   1939 class LRGeneratedTable(LRTable):
   1940     def __init__(self,grammar,method='LALR',log=None):
   1941         if method not in ['SLR','LALR']:
   1942             raise LALRError("Unsupported method %s" % method)
   1943 
   1944         self.grammar = grammar
   1945         self.lr_method = method
   1946 
   1947         # Set up the logger
   1948         if not log:
   1949             log = NullLogger()
   1950         self.log = log
   1951 
   1952         # Internal attributes
   1953         self.lr_action     = {}        # Action table
   1954         self.lr_goto       = {}        # Goto table
   1955         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
   1956         self.lr_goto_cache = {}        # Cache of computed gotos
   1957         self.lr0_cidhash   = {}        # Cache of closures
   1958 
   1959         self._add_count    = 0         # Internal counter used to detect cycles
   1960 
   1961         # Diagonistic information filled in by the table generator
   1962         self.sr_conflict   = 0
   1963         self.rr_conflict   = 0
   1964         self.conflicts     = []        # List of conflicts
   1965 
   1966         self.sr_conflicts  = []
   1967         self.rr_conflicts  = []
   1968 
   1969         # Build the tables
   1970         self.grammar.build_lritems()
   1971         self.grammar.compute_first()
   1972         self.grammar.compute_follow()
   1973         self.lr_parse_table()
   1974 
   1975     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
   1976 
   1977     def lr0_closure(self,I):
   1978         self._add_count += 1
   1979 
   1980         # Add everything in I to J
   1981         J = I[:]
   1982         didadd = 1
   1983         while didadd:
   1984             didadd = 0
   1985             for j in J:
   1986                 for x in j.lr_after:
   1987                     if getattr(x,"lr0_added",0) == self._add_count: continue
   1988                     # Add B --> .G to J
   1989                     J.append(x.lr_next)
   1990                     x.lr0_added = self._add_count
   1991                     didadd = 1
   1992 
   1993         return J
   1994 
   1995     # Compute the LR(0) goto function goto(I,X) where I is a set
   1996     # of LR(0) items and X is a grammar symbol.   This function is written
   1997     # in a way that guarantees uniqueness of the generated goto sets
   1998     # (i.e. the same goto set will never be returned as two different Python
   1999     # objects).  With uniqueness, we can later do fast set comparisons using
   2000     # id(obj) instead of element-wise comparison.
   2001 
   2002     def lr0_goto(self,I,x):
   2003         # First we look for a previously cached entry
   2004         g = self.lr_goto_cache.get((id(I),x),None)
   2005         if g: return g
   2006 
   2007         # Now we generate the goto set in a way that guarantees uniqueness
   2008         # of the result
   2009 
   2010         s = self.lr_goto_cache.get(x,None)
   2011         if not s:
   2012             s = { }
   2013             self.lr_goto_cache[x] = s
   2014 
   2015         gs = [ ]
   2016         for p in I:
   2017             n = p.lr_next
   2018             if n and n.lr_before == x:
   2019                 s1 = s.get(id(n),None)
   2020                 if not s1:
   2021                     s1 = { }
   2022                     s[id(n)] = s1
   2023                 gs.append(n)
   2024                 s = s1
   2025         g = s.get('$end',None)
   2026         if not g:
   2027             if gs:
   2028                 g = self.lr0_closure(gs)
   2029                 s['$end'] = g
   2030             else:
   2031                 s['$end'] = gs
   2032         self.lr_goto_cache[(id(I),x)] = g
   2033         return g
   2034 
   2035     # Compute the LR(0) sets of item function
   2036     def lr0_items(self):
   2037 
   2038         C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
   2039         i = 0
   2040         for I in C:
   2041             self.lr0_cidhash[id(I)] = i
   2042             i += 1
   2043 
   2044         # Loop over the items in C and each grammar symbols
   2045         i = 0
   2046         while i < len(C):
   2047             I = C[i]
   2048             i += 1
   2049 
   2050             # Collect all of the symbols that could possibly be in the goto(I,X) sets
   2051             asyms = { }
   2052             for ii in I:
   2053                 for s in ii.usyms:
   2054                     asyms[s] = None
   2055 
   2056             for x in asyms:
   2057                 g = self.lr0_goto(I,x)
   2058                 if not g:  continue
   2059                 if id(g) in self.lr0_cidhash: continue
   2060                 self.lr0_cidhash[id(g)] = len(C)
   2061                 C.append(g)
   2062 
   2063         return C
   2064 
   2065     # -----------------------------------------------------------------------------
   2066     #                       ==== LALR(1) Parsing ====
   2067     #
   2068     # LALR(1) parsing is almost exactly the same as SLR except that instead of
   2069     # relying upon Follow() sets when performing reductions, a more selective
   2070     # lookahead set that incorporates the state of the LR(0) machine is utilized.
   2071     # Thus, we mainly just have to focus on calculating the lookahead sets.
   2072     #
   2073     # The method used here is due to DeRemer and Pennelo (1982).
   2074     #
   2075     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
   2076     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
   2077     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
   2078     #
   2079     # Further details can also be found in:
   2080     #
   2081     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
   2082     #      McGraw-Hill Book Company, (1985).
   2083     #
   2084     # -----------------------------------------------------------------------------
   2085 
   2086     # -----------------------------------------------------------------------------
   2087     # compute_nullable_nonterminals()
   2088     #
   2089     # Creates a dictionary containing all of the non-terminals that might produce
   2090     # an empty production.
   2091     # -----------------------------------------------------------------------------
   2092 
   2093     def compute_nullable_nonterminals(self):
   2094         nullable = {}
   2095         num_nullable = 0
   2096         while 1:
   2097            for p in self.grammar.Productions[1:]:
   2098                if p.len == 0:
   2099                     nullable[p.name] = 1
   2100                     continue
   2101                for t in p.prod:
   2102                     if not t in nullable: break
   2103                else:
   2104                     nullable[p.name] = 1
   2105            if len(nullable) == num_nullable: break
   2106            num_nullable = len(nullable)
   2107         return nullable
   2108 
   2109     # -----------------------------------------------------------------------------
   2110     # find_nonterminal_trans(C)
   2111     #
   2112     # Given a set of LR(0) items, this functions finds all of the non-terminal
   2113     # transitions.    These are transitions in which a dot appears immediately before
   2114     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
   2115     # is the state number and N is the nonterminal symbol.
   2116     #
   2117     # The input C is the set of LR(0) items.
   2118     # -----------------------------------------------------------------------------
   2119 
   2120     def find_nonterminal_transitions(self,C):
   2121          trans = []
   2122          for state in range(len(C)):
   2123              for p in C[state]:
   2124                  if p.lr_index < p.len - 1:
   2125                       t = (state,p.prod[p.lr_index+1])
   2126                       if t[1] in self.grammar.Nonterminals:
   2127                             if t not in trans: trans.append(t)
   2128              state = state + 1
   2129          return trans
   2130 
   2131     # -----------------------------------------------------------------------------
   2132     # dr_relation()
   2133     #
   2134     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
   2135     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
   2136     #
   2137     # Returns a list of terminals.
   2138     # -----------------------------------------------------------------------------
   2139 
   2140     def dr_relation(self,C,trans,nullable):
   2141         dr_set = { }
   2142         state,N = trans
   2143         terms = []
   2144 
   2145         g = self.lr0_goto(C[state],N)
   2146         for p in g:
   2147            if p.lr_index < p.len - 1:
   2148                a = p.prod[p.lr_index+1]
   2149                if a in self.grammar.Terminals:
   2150                    if a not in terms: terms.append(a)
   2151 
   2152         # This extra bit is to handle the start state
   2153         if state == 0 and N == self.grammar.Productions[0].prod[0]:
   2154            terms.append('$end')
   2155 
   2156         return terms
   2157 
   2158     # -----------------------------------------------------------------------------
   2159     # reads_relation()
   2160     #
   2161     # Computes the READS() relation (p,A) READS (t,C).
   2162     # -----------------------------------------------------------------------------
   2163 
   2164     def reads_relation(self,C, trans, empty):
   2165         # Look for empty transitions
   2166         rel = []
   2167         state, N = trans
   2168 
   2169         g = self.lr0_goto(C[state],N)
   2170         j = self.lr0_cidhash.get(id(g),-1)
   2171         for p in g:
   2172             if p.lr_index < p.len - 1:
   2173                  a = p.prod[p.lr_index + 1]
   2174                  if a in empty:
   2175                       rel.append((j,a))
   2176 
   2177         return rel
   2178 
   2179     # -----------------------------------------------------------------------------
   2180     # compute_lookback_includes()
   2181     #
   2182     # Determines the lookback and includes relations
   2183     #
   2184     # LOOKBACK:
   2185     #
   2186     # This relation is determined by running the LR(0) state machine forward.
   2187     # For example, starting with a production "N : . A B C", we run it forward
   2188     # to obtain "N : A B C ."   We then build a relationship between this final
   2189     # state and the starting state.   These relationships are stored in a dictionary
   2190     # lookdict.
   2191     #
   2192     # INCLUDES:
   2193     #
   2194     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
   2195     #
   2196     # This relation is used to determine non-terminal transitions that occur
   2197     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
   2198     # if the following holds:
   2199     #
   2200     #       B -> LAT, where T -> epsilon and p' -L-> p
   2201     #
   2202     # L is essentially a prefix (which may be empty), T is a suffix that must be
   2203     # able to derive an empty string.  State p' must lead to state p with the string L.
   2204     #
   2205     # -----------------------------------------------------------------------------
   2206 
   2207     def compute_lookback_includes(self,C,trans,nullable):
   2208 
   2209         lookdict = {}          # Dictionary of lookback relations
   2210         includedict = {}       # Dictionary of include relations
   2211 
   2212         # Make a dictionary of non-terminal transitions
   2213         dtrans = {}
   2214         for t in trans:
   2215             dtrans[t] = 1
   2216 
   2217         # Loop over all transitions and compute lookbacks and includes
   2218         for state,N in trans:
   2219             lookb = []
   2220             includes = []
   2221             for p in C[state]:
   2222                 if p.name != N: continue
   2223 
   2224                 # Okay, we have a name match.  We now follow the production all the way
   2225                 # through the state machine until we get the . on the right hand side
   2226 
   2227                 lr_index = p.lr_index
   2228                 j = state
   2229                 while lr_index < p.len - 1:
   2230                      lr_index = lr_index + 1
   2231                      t = p.prod[lr_index]
   2232 
   2233                      # Check to see if this symbol and state are a non-terminal transition
   2234                      if (j,t) in dtrans:
   2235                            # Yes.  Okay, there is some chance that this is an includes relation
   2236                            # the only way to know for certain is whether the rest of the
   2237                            # production derives empty
   2238 
   2239                            li = lr_index + 1
   2240                            while li < p.len:
   2241                                 if p.prod[li] in self.grammar.Terminals: break      # No forget it
   2242                                 if not p.prod[li] in nullable: break
   2243                                 li = li + 1
   2244                            else:
   2245                                 # Appears to be a relation between (j,t) and (state,N)
   2246                                 includes.append((j,t))
   2247 
   2248                      g = self.lr0_goto(C[j],t)               # Go to next set
   2249                      j = self.lr0_cidhash.get(id(g),-1)     # Go to next state
   2250 
   2251                 # When we get here, j is the final state, now we have to locate the production
   2252                 for r in C[j]:
   2253                      if r.name != p.name: continue
   2254                      if r.len != p.len:   continue
   2255                      i = 0
   2256                      # This look is comparing a production ". A B C" with "A B C ."
   2257                      while i < r.lr_index:
   2258                           if r.prod[i] != p.prod[i+1]: break
   2259                           i = i + 1
   2260                      else:
   2261                           lookb.append((j,r))
   2262             for i in includes:
   2263                  if not i in includedict: includedict[i] = []
   2264                  includedict[i].append((state,N))
   2265             lookdict[(state,N)] = lookb
   2266 
   2267         return lookdict,includedict
   2268 
   2269     # -----------------------------------------------------------------------------
   2270     # compute_read_sets()
   2271     #
   2272     # Given a set of LR(0) items, this function computes the read sets.
   2273     #
   2274     # Inputs:  C        =  Set of LR(0) items
   2275     #          ntrans   = Set of nonterminal transitions
   2276     #          nullable = Set of empty transitions
   2277     #
   2278     # Returns a set containing the read sets
   2279     # -----------------------------------------------------------------------------
   2280 
   2281     def compute_read_sets(self,C, ntrans, nullable):
   2282         FP = lambda x: self.dr_relation(C,x,nullable)
   2283         R =  lambda x: self.reads_relation(C,x,nullable)
   2284         F = digraph(ntrans,R,FP)
   2285         return F
   2286 
   2287     # -----------------------------------------------------------------------------
   2288     # compute_follow_sets()
   2289     #
   2290     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
   2291     # and an include set, this function computes the follow sets
   2292     #
   2293     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
   2294     #
   2295     # Inputs:
   2296     #            ntrans     = Set of nonterminal transitions
   2297     #            readsets   = Readset (previously computed)
   2298     #            inclsets   = Include sets (previously computed)
   2299     #
   2300     # Returns a set containing the follow sets
   2301     # -----------------------------------------------------------------------------
   2302 
   2303     def compute_follow_sets(self,ntrans,readsets,inclsets):
   2304          FP = lambda x: readsets[x]
   2305          R  = lambda x: inclsets.get(x,[])
   2306          F = digraph(ntrans,R,FP)
   2307          return F
   2308 
   2309     # -----------------------------------------------------------------------------
   2310     # add_lookaheads()
   2311     #
   2312     # Attaches the lookahead symbols to grammar rules.
   2313     #
   2314     # Inputs:    lookbacks         -  Set of lookback relations
   2315     #            followset         -  Computed follow set
   2316     #
   2317     # This function directly attaches the lookaheads to productions contained
   2318     # in the lookbacks set
   2319     # -----------------------------------------------------------------------------
   2320 
   2321     def add_lookaheads(self,lookbacks,followset):
   2322         for trans,lb in lookbacks.items():
   2323             # Loop over productions in lookback
   2324             for state,p in lb:
   2325                  if not state in p.lookaheads:
   2326                       p.lookaheads[state] = []
   2327                  f = followset.get(trans,[])
   2328                  for a in f:
   2329                       if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
   2330 
   2331     # -----------------------------------------------------------------------------
   2332     # add_lalr_lookaheads()
   2333     #
   2334     # This function does all of the work of adding lookahead information for use
   2335     # with LALR parsing
   2336     # -----------------------------------------------------------------------------
   2337 
   2338     def add_lalr_lookaheads(self,C):
   2339         # Determine all of the nullable nonterminals
   2340         nullable = self.compute_nullable_nonterminals()
   2341 
   2342         # Find all non-terminal transitions
   2343         trans = self.find_nonterminal_transitions(C)
   2344 
   2345         # Compute read sets
   2346         readsets = self.compute_read_sets(C,trans,nullable)
   2347 
   2348         # Compute lookback/includes relations
   2349         lookd, included = self.compute_lookback_includes(C,trans,nullable)
   2350 
   2351         # Compute LALR FOLLOW sets
   2352         followsets = self.compute_follow_sets(trans,readsets,included)
   2353 
   2354         # Add all of the lookaheads
   2355         self.add_lookaheads(lookd,followsets)
   2356 
   2357     # -----------------------------------------------------------------------------
   2358     # lr_parse_table()
   2359     #
   2360     # This function constructs the parse tables for SLR or LALR
   2361     # -----------------------------------------------------------------------------
   2362     def lr_parse_table(self):
   2363         Productions = self.grammar.Productions
   2364         Precedence  = self.grammar.Precedence
   2365         goto   = self.lr_goto         # Goto array
   2366         action = self.lr_action       # Action array
   2367         log    = self.log             # Logger for output
   2368 
   2369         actionp = { }                 # Action production array (temporary)
   2370         
   2371         log.info("Parsing method: %s", self.lr_method)
   2372 
   2373         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
   2374         # This determines the number of states
   2375 
   2376         C = self.lr0_items()
   2377 
   2378         if self.lr_method == 'LALR':
   2379             self.add_lalr_lookaheads(C)
   2380 
   2381         # Build the parser table, state by state
   2382         st = 0
   2383         for I in C:
   2384             # Loop over each production in I
   2385             actlist = [ ]              # List of actions
   2386             st_action  = { }
   2387             st_actionp = { }
   2388             st_goto    = { }
   2389             log.info("")
   2390             log.info("state %d", st)
   2391             log.info("")
   2392             for p in I:
   2393                 log.info("    (%d) %s", p.number, str(p))
   2394             log.info("")
   2395 
   2396             for p in I:
   2397                     if p.len == p.lr_index + 1:
   2398                         if p.name == "S'":
   2399                             # Start symbol. Accept!
   2400                             st_action["$end"] = 0
   2401                             st_actionp["$end"] = p
   2402                         else:
   2403                             # We are at the end of a production.  Reduce!
   2404                             if self.lr_method == 'LALR':
   2405                                 laheads = p.lookaheads[st]
   2406                             else:
   2407                                 laheads = self.grammar.Follow[p.name]
   2408                             for a in laheads:
   2409                                 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
   2410                                 r = st_action.get(a,None)
   2411                                 if r is not None:
   2412                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
   2413                                     if r > 0:
   2414                                         # Need to decide on shift or reduce here
   2415                                         # By default we favor shifting. Need to add
   2416                                         # some precedence rules here.
   2417                                         sprec,slevel = Productions[st_actionp[a].number].prec
   2418                                         rprec,rlevel = Precedence.get(a,('right',0))
   2419                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
   2420                                             # We really need to reduce here.
   2421                                             st_action[a] = -p.number
   2422                                             st_actionp[a] = p
   2423                                             if not slevel and not rlevel:
   2424                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
   2425                                                 self.sr_conflicts.append((st,a,'reduce'))
   2426                                             Productions[p.number].reduced += 1
   2427                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
   2428                                             st_action[a] = None
   2429                                         else:
   2430                                             # Hmmm. Guess we'll keep the shift
   2431                                             if not rlevel:
   2432                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
   2433                                                 self.sr_conflicts.append((st,a,'shift'))
   2434                                     elif r < 0:
   2435                                         # Reduce/reduce conflict.   In this case, we favor the rule
   2436                                         # that was defined first in the grammar file
   2437                                         oldp = Productions[-r]
   2438                                         pp = Productions[p.number]
   2439                                         if oldp.line > pp.line:
   2440                                             st_action[a] = -p.number
   2441                                             st_actionp[a] = p
   2442                                             chosenp,rejectp = pp,oldp
   2443                                             Productions[p.number].reduced += 1
   2444                                             Productions[oldp.number].reduced -= 1
   2445                                         else:
   2446                                             chosenp,rejectp = oldp,pp
   2447                                         self.rr_conflicts.append((st,chosenp,rejectp))
   2448                                         log.info("  ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
   2449                                     else:
   2450                                         raise LALRError("Unknown conflict in state %d" % st)
   2451                                 else:
   2452                                     st_action[a] = -p.number
   2453                                     st_actionp[a] = p
   2454                                     Productions[p.number].reduced += 1
   2455                     else:
   2456                         i = p.lr_index
   2457                         a = p.prod[i+1]       # Get symbol right after the "."
   2458                         if a in self.grammar.Terminals:
   2459                             g = self.lr0_goto(I,a)
   2460                             j = self.lr0_cidhash.get(id(g),-1)
   2461                             if j >= 0:
   2462                                 # We are in a shift state
   2463                                 actlist.append((a,p,"shift and go to state %d" % j))
   2464                                 r = st_action.get(a,None)
   2465                                 if r is not None:
   2466                                     # Whoa have a shift/reduce or shift/shift conflict
   2467                                     if r > 0:
   2468                                         if r != j:
   2469                                             raise LALRError("Shift/shift conflict in state %d" % st)
   2470                                     elif r < 0:
   2471                                         # Do a precedence check.
   2472                                         #   -  if precedence of reduce rule is higher, we reduce.
   2473                                         #   -  if precedence of reduce is same and left assoc, we reduce.
   2474                                         #   -  otherwise we shift
   2475                                         rprec,rlevel = Productions[st_actionp[a].number].prec
   2476                                         sprec,slevel = Precedence.get(a,('right',0))
   2477                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
   2478                                             # We decide to shift here... highest precedence to shift
   2479                                             Productions[st_actionp[a].number].reduced -= 1
   2480                                             st_action[a] = j
   2481                                             st_actionp[a] = p
   2482                                             if not rlevel:
   2483                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
   2484                                                 self.sr_conflicts.append((st,a,'shift'))
   2485                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
   2486                                             st_action[a] = None
   2487                                         else:
   2488                                             # Hmmm. Guess we'll keep the reduce
   2489                                             if not slevel and not rlevel:
   2490                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
   2491                                                 self.sr_conflicts.append((st,a,'reduce'))
   2492 
   2493                                     else:
   2494                                         raise LALRError("Unknown conflict in state %d" % st)
   2495                                 else:
   2496                                     st_action[a] = j
   2497                                     st_actionp[a] = p
   2498 
   2499             # Print the actions associated with each terminal
   2500             _actprint = { }
   2501             for a,p,m in actlist:
   2502                 if a in st_action:
   2503                     if p is st_actionp[a]:
   2504                         log.info("    %-15s %s",a,m)
   2505                         _actprint[(a,m)] = 1
   2506             log.info("")
   2507             # Print the actions that were not used. (debugging)
   2508             not_used = 0
   2509             for a,p,m in actlist:
   2510                 if a in st_action:
   2511                     if p is not st_actionp[a]:
   2512                         if not (a,m) in _actprint:
   2513                             log.debug("  ! %-15s [ %s ]",a,m)
   2514                             not_used = 1
   2515                             _actprint[(a,m)] = 1
   2516             if not_used:
   2517                 log.debug("")
   2518 
   2519             # Construct the goto table for this state
   2520 
   2521             nkeys = { }
   2522             for ii in I:
   2523                 for s in ii.usyms:
   2524                     if s in self.grammar.Nonterminals:
   2525                         nkeys[s] = None
   2526             for n in nkeys:
   2527                 g = self.lr0_goto(I,n)
   2528                 j = self.lr0_cidhash.get(id(g),-1)
   2529                 if j >= 0:
   2530                     st_goto[n] = j
   2531                     log.info("    %-30s shift and go to state %d",n,j)
   2532 
   2533             action[st] = st_action
   2534             actionp[st] = st_actionp
   2535             goto[st] = st_goto
   2536             st += 1
   2537 
   2538 
   2539     # -----------------------------------------------------------------------------
   2540     # write()
   2541     #
   2542     # This function writes the LR parsing tables to a file
   2543     # -----------------------------------------------------------------------------
   2544 
   2545     def write_table(self,modulename,outputdir='',signature=""):
   2546         basemodulename = modulename.split(".")[-1]
   2547         filename = os.path.join(outputdir,basemodulename) + ".py"
   2548         try:
   2549             f = open(filename,"w")
   2550 
   2551             f.write("""
   2552 # %s
   2553 # This file is automatically generated. Do not edit.
   2554 _tabversion = %r
   2555 
   2556 _lr_method = %r
   2557 
   2558 _lr_signature = %r
   2559     """ % (filename, __tabversion__, self.lr_method, signature))
   2560 
   2561             # Change smaller to 0 to go back to original tables
   2562             smaller = 1
   2563 
   2564             # Factor out names to try and make smaller
   2565             if smaller:
   2566                 items = { }
   2567 
   2568                 for s,nd in self.lr_action.items():
   2569                    for name,v in nd.items():
   2570                       i = items.get(name)
   2571                       if not i:
   2572                          i = ([],[])
   2573                          items[name] = i
   2574                       i[0].append(s)
   2575                       i[1].append(v)
   2576 
   2577                 f.write("\n_lr_action_items = {")
   2578                 for k,v in items.items():
   2579                     f.write("%r:([" % k)
   2580                     for i in v[0]:
   2581                         f.write("%r," % i)
   2582                     f.write("],[")
   2583                     for i in v[1]:
   2584                         f.write("%r," % i)
   2585 
   2586                     f.write("]),")
   2587                 f.write("}\n")
   2588 
   2589                 f.write("""
   2590 _lr_action = { }
   2591 for _k, _v in _lr_action_items.items():
   2592    for _x,_y in zip(_v[0],_v[1]):
   2593       if not _x in _lr_action:  _lr_action[_x] = { }
   2594       _lr_action[_x][_k] = _y
   2595 del _lr_action_items
   2596 """)
   2597 
   2598             else:
   2599                 f.write("\n_lr_action = { ");
   2600                 for k,v in self.lr_action.items():
   2601                     f.write("(%r,%r):%r," % (k[0],k[1],v))
   2602                 f.write("}\n");
   2603 
   2604             if smaller:
   2605                 # Factor out names to try and make smaller
   2606                 items = { }
   2607 
   2608                 for s,nd in self.lr_goto.items():
   2609                    for name,v in nd.items():
   2610                       i = items.get(name)
   2611                       if not i:
   2612                          i = ([],[])
   2613                          items[name] = i
   2614                       i[0].append(s)
   2615                       i[1].append(v)
   2616 
   2617                 f.write("\n_lr_goto_items = {")
   2618                 for k,v in items.items():
   2619                     f.write("%r:([" % k)
   2620                     for i in v[0]:
   2621                         f.write("%r," % i)
   2622                     f.write("],[")
   2623                     for i in v[1]:
   2624                         f.write("%r," % i)
   2625 
   2626                     f.write("]),")
   2627                 f.write("}\n")
   2628 
   2629                 f.write("""
   2630 _lr_goto = { }
   2631 for _k, _v in _lr_goto_items.items():
   2632    for _x,_y in zip(_v[0],_v[1]):
   2633        if not _x in _lr_goto: _lr_goto[_x] = { }
   2634        _lr_goto[_x][_k] = _y
   2635 del _lr_goto_items
   2636 """)
   2637             else:
   2638                 f.write("\n_lr_goto = { ");
   2639                 for k,v in self.lr_goto.items():
   2640                     f.write("(%r,%r):%r," % (k[0],k[1],v))
   2641                 f.write("}\n");
   2642 
   2643             # Write production table
   2644             f.write("_lr_productions = [\n")
   2645             for p in self.lr_productions:
   2646                 if p.func:
   2647                     f.write("  (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
   2648                 else:
   2649                     f.write("  (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
   2650             f.write("]\n")
   2651             f.close()
   2652 
   2653         except IOError:
   2654             e = sys.exc_info()[1]
   2655             sys.stderr.write("Unable to create '%s'\n" % filename)
   2656             sys.stderr.write(str(e)+"\n")
   2657             return
   2658 
   2659 
   2660     # -----------------------------------------------------------------------------
   2661     # pickle_table()
   2662     #
   2663     # This function pickles the LR parsing tables to a supplied file object
   2664     # -----------------------------------------------------------------------------
   2665 
   2666     def pickle_table(self,filename,signature=""):
   2667         try:
   2668             import cPickle as pickle
   2669         except ImportError:
   2670             import pickle
   2671         outf = open(filename,"wb")
   2672         pickle.dump(__tabversion__,outf,pickle_protocol)
   2673         pickle.dump(self.lr_method,outf,pickle_protocol)
   2674         pickle.dump(signature,outf,pickle_protocol)
   2675         pickle.dump(self.lr_action,outf,pickle_protocol)
   2676         pickle.dump(self.lr_goto,outf,pickle_protocol)
   2677 
   2678         outp = []
   2679         for p in self.lr_productions:
   2680             if p.func:
   2681                 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
   2682             else:
   2683                 outp.append((str(p),p.name,p.len,None,None,None))
   2684         pickle.dump(outp,outf,pickle_protocol)
   2685         outf.close()
   2686 
   2687 # -----------------------------------------------------------------------------
   2688 #                            === INTROSPECTION ===
   2689 #
   2690 # The following functions and classes are used to implement the PLY
   2691 # introspection features followed by the yacc() function itself.
   2692 # -----------------------------------------------------------------------------
   2693 
   2694 # -----------------------------------------------------------------------------
   2695 # get_caller_module_dict()
   2696 #
   2697 # This function returns a dictionary containing all of the symbols defined within
   2698 # a caller further down the call stack.  This is used to get the environment
   2699 # associated with the yacc() call if none was provided.
   2700 # -----------------------------------------------------------------------------
   2701 
   2702 def get_caller_module_dict(levels):
   2703     try:
   2704         raise RuntimeError
   2705     except RuntimeError:
   2706         e,b,t = sys.exc_info()
   2707         f = t.tb_frame
   2708         while levels > 0:
   2709             f = f.f_back                   
   2710             levels -= 1
   2711         ldict = f.f_globals.copy()
   2712         if f.f_globals != f.f_locals:
   2713             ldict.update(f.f_locals)
   2714 
   2715         return ldict
   2716 
   2717 # -----------------------------------------------------------------------------
   2718 # parse_grammar()
   2719 #
   2720 # This takes a raw grammar rule string and parses it into production data
   2721 # -----------------------------------------------------------------------------
   2722 def parse_grammar(doc,file,line):
   2723     grammar = []
   2724     # Split the doc string into lines
   2725     pstrings = doc.splitlines()
   2726     lastp = None
   2727     dline = line
   2728     for ps in pstrings:
   2729         dline += 1
   2730         p = ps.split()
   2731         if not p: continue
   2732         try:
   2733             if p[0] == '|':
   2734                 # This is a continuation of a previous rule
   2735                 if not lastp:
   2736                     raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
   2737                 prodname = lastp
   2738                 syms = p[1:]
   2739             else:
   2740                 prodname = p[0]
   2741                 lastp = prodname
   2742                 syms   = p[2:]
   2743                 assign = p[1]
   2744                 if assign != ':' and assign != '::=':
   2745                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
   2746 
   2747             grammar.append((file,dline,prodname,syms))
   2748         except SyntaxError:
   2749             raise
   2750         except Exception:
   2751             raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
   2752 
   2753     return grammar
   2754 
   2755 # -----------------------------------------------------------------------------
   2756 # ParserReflect()
   2757 #
   2758 # This class represents information extracted for building a parser including
   2759 # start symbol, error function, tokens, precedence list, action functions,
   2760 # etc.
   2761 # -----------------------------------------------------------------------------
   2762 class ParserReflect(object):
   2763     def __init__(self,pdict,log=None):
   2764         self.pdict      = pdict
   2765         self.start      = None
   2766         self.error_func = None
   2767         self.tokens     = None
   2768         self.files      = {}
   2769         self.grammar    = []
   2770         self.error      = 0
   2771 
   2772         if log is None:
   2773             self.log = PlyLogger(sys.stderr)
   2774         else:
   2775             self.log = log
   2776 
   2777     # Get all of the basic information
   2778     def get_all(self):
   2779         self.get_start()
   2780         self.get_error_func()
   2781         self.get_tokens()
   2782         self.get_precedence()
   2783         self.get_pfunctions()
   2784         
   2785     # Validate all of the information
   2786     def validate_all(self):
   2787         self.validate_start()
   2788         self.validate_error_func()
   2789         self.validate_tokens()
   2790         self.validate_precedence()
   2791         self.validate_pfunctions()
   2792         self.validate_files()
   2793         return self.error
   2794 
   2795     # Compute a signature over the grammar
   2796     def signature(self):
   2797         try:
   2798             from hashlib import md5
   2799         except ImportError:
   2800             from md5 import md5
   2801         try:
   2802             sig = md5()
   2803             if self.start:
   2804                 sig.update(self.start.encode('latin-1'))
   2805             if self.prec:
   2806                 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
   2807             if self.tokens:
   2808                 sig.update(" ".join(self.tokens).encode('latin-1'))
   2809             for f in self.pfuncs:
   2810                 if f[3]:
   2811                     sig.update(f[3].encode('latin-1'))
   2812         except (TypeError,ValueError):
   2813             pass
   2814         return sig.digest()
   2815 
   2816     # -----------------------------------------------------------------------------
   2817     # validate_file()
   2818     #
   2819     # This method checks to see if there are duplicated p_rulename() functions
   2820     # in the parser module file.  Without this function, it is really easy for
   2821     # users to make mistakes by cutting and pasting code fragments (and it's a real
   2822     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
   2823     # we just do a little regular expression pattern matching of def statements
   2824     # to try and detect duplicates.
   2825     # -----------------------------------------------------------------------------
   2826 
   2827     def validate_files(self):
   2828         # Match def p_funcname(
   2829         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
   2830 
   2831         for filename in self.files.keys():
   2832             base,ext = os.path.splitext(filename)
   2833             if ext != '.py': return 1          # No idea. Assume it's okay.
   2834 
   2835             try:
   2836                 f = open(filename)
   2837                 lines = f.readlines()
   2838                 f.close()
   2839             except IOError:
   2840                 continue
   2841 
   2842             counthash = { }
   2843             for linen,l in enumerate(lines):
   2844                 linen += 1
   2845                 m = fre.match(l)
   2846                 if m:
   2847                     name = m.group(1)
   2848                     prev = counthash.get(name)
   2849                     if not prev:
   2850                         counthash[name] = linen
   2851                     else:
   2852                         self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
   2853 
   2854     # Get the start symbol
   2855     def get_start(self):
   2856         self.start = self.pdict.get('start')
   2857 
   2858     # Validate the start symbol
   2859     def validate_start(self):
   2860         if self.start is not None:
   2861             if not isinstance(self.start,str):
   2862                 self.log.error("'start' must be a string")
   2863 
   2864     # Look for error handler
   2865     def get_error_func(self):
   2866         self.error_func = self.pdict.get('p_error')
   2867 
   2868     # Validate the error function
   2869     def validate_error_func(self):
   2870         if self.error_func:
   2871             if isinstance(self.error_func,types.FunctionType):
   2872                 ismethod = 0
   2873             elif isinstance(self.error_func, types.MethodType):
   2874                 ismethod = 1
   2875             else:
   2876                 self.log.error("'p_error' defined, but is not a function or method")
   2877                 self.error = 1
   2878                 return
   2879 
   2880             eline = func_code(self.error_func).co_firstlineno
   2881             efile = func_code(self.error_func).co_filename
   2882             self.files[efile] = 1
   2883 
   2884             if (func_code(self.error_func).co_argcount != 1+ismethod):
   2885                 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
   2886                 self.error = 1
   2887 
   2888     # Get the tokens map
   2889     def get_tokens(self):
   2890         tokens = self.pdict.get("tokens",None)
   2891         if not tokens:
   2892             self.log.error("No token list is defined")
   2893             self.error = 1
   2894             return
   2895 
   2896         if not isinstance(tokens,(list, tuple)):
   2897             self.log.error("tokens must be a list or tuple")
   2898             self.error = 1
   2899             return
   2900         
   2901         if not tokens:
   2902             self.log.error("tokens is empty")
   2903             self.error = 1
   2904             return
   2905 
   2906         self.tokens = tokens
   2907 
   2908     # Validate the tokens
   2909     def validate_tokens(self):
   2910         # Validate the tokens.
   2911         if 'error' in self.tokens:
   2912             self.log.error("Illegal token name 'error'. Is a reserved word")
   2913             self.error = 1
   2914             return
   2915 
   2916         terminals = {}
   2917         for n in self.tokens:
   2918             if n in terminals:
   2919                 self.log.warning("Token '%s' multiply defined", n)
   2920             terminals[n] = 1
   2921 
   2922     # Get the precedence map (if any)
   2923     def get_precedence(self):
   2924         self.prec = self.pdict.get("precedence",None)
   2925 
   2926     # Validate and parse the precedence map
   2927     def validate_precedence(self):
   2928         preclist = []
   2929         if self.prec:
   2930             if not isinstance(self.prec,(list,tuple)):
   2931                 self.log.error("precedence must be a list or tuple")
   2932                 self.error = 1
   2933                 return
   2934             for level,p in enumerate(self.prec):
   2935                 if not isinstance(p,(list,tuple)):
   2936                     self.log.error("Bad precedence table")
   2937                     self.error = 1
   2938                     return
   2939 
   2940                 if len(p) < 2:
   2941                     self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
   2942                     self.error = 1
   2943                     return
   2944                 assoc = p[0]
   2945                 if not isinstance(assoc,str):
   2946                     self.log.error("precedence associativity must be a string")
   2947                     self.error = 1
   2948                     return
   2949                 for term in p[1:]:
   2950                     if not isinstance(term,str):
   2951                         self.log.error("precedence items must be strings")
   2952                         self.error = 1
   2953                         return
   2954                     preclist.append((term,assoc,level+1))
   2955         self.preclist = preclist
   2956 
   2957     # Get all p_functions from the grammar
   2958     def get_pfunctions(self):
   2959         p_functions = []
   2960         for name, item in self.pdict.items():
   2961             if name[:2] != 'p_': continue
   2962             if name == 'p_error': continue
   2963             if isinstance(item,(types.FunctionType,types.MethodType)):
   2964                 line = func_code(item).co_firstlineno
   2965                 file = func_code(item).co_filename
   2966                 p_functions.append((line,file,name,item.__doc__))
   2967 
   2968         # Sort all of the actions by line number
   2969         p_functions.sort()
   2970         self.pfuncs = p_functions
   2971 
   2972 
   2973     # Validate all of the p_functions
   2974     def validate_pfunctions(self):
   2975         grammar = []
   2976         # Check for non-empty symbols
   2977         if len(self.pfuncs) == 0:
   2978             self.log.error("no rules of the form p_rulename are defined")
   2979             self.error = 1
   2980             return 
   2981         
   2982         for line, file, name, doc in self.pfuncs:
   2983             func = self.pdict[name]
   2984             if isinstance(func, types.MethodType):
   2985                 reqargs = 2
   2986             else:
   2987                 reqargs = 1
   2988             if func_code(func).co_argcount > reqargs:
   2989                 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
   2990                 self.error = 1
   2991             elif func_code(func).co_argcount < reqargs:
   2992                 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
   2993                 self.error = 1
   2994             elif not func.__doc__:
   2995                 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
   2996             else:
   2997                 try:
   2998                     parsed_g = parse_grammar(doc,file,line)
   2999                     for g in parsed_g:
   3000                         grammar.append((name, g))
   3001                 except SyntaxError:
   3002                     e = sys.exc_info()[1]
   3003                     self.log.error(str(e))
   3004                     self.error = 1
   3005 
   3006                 # Looks like a valid grammar rule
   3007                 # Mark the file in which defined.
   3008                 self.files[file] = 1
   3009 
   3010         # Secondary validation step that looks for p_ definitions that are not functions
   3011         # or functions that look like they might be grammar rules.
   3012 
   3013         for n,v in self.pdict.items():
   3014             if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
   3015             if n[0:2] == 't_': continue
   3016             if n[0:2] == 'p_' and n != 'p_error':
   3017                 self.log.warning("'%s' not defined as a function", n)
   3018             if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
   3019                 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
   3020                 try:
   3021                     doc = v.__doc__.split(" ")
   3022                     if doc[1] == ':':
   3023                         self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
   3024                                          func_code(v).co_filename, func_code(v).co_firstlineno,n)
   3025                 except Exception:
   3026                     pass
   3027 
   3028         self.grammar = grammar
   3029 
   3030 # -----------------------------------------------------------------------------
   3031 # yacc(module)
   3032 #
   3033 # Build a parser
   3034 # -----------------------------------------------------------------------------
   3035 
   3036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 
   3037          check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
   3038          debuglog=None, errorlog = None, picklefile=None):
   3039 
   3040     global parse                 # Reference to the parsing method of the last built parser
   3041 
   3042     # If pickling is enabled, table files are not created
   3043 
   3044     if picklefile:
   3045         write_tables = 0
   3046 
   3047     if errorlog is None:
   3048         errorlog = PlyLogger(sys.stderr)
   3049 
   3050     # Get the module dictionary used for the parser
   3051     if module:
   3052         _items = [(k,getattr(module,k)) for k in dir(module)]
   3053         pdict = dict(_items)
   3054     else:
   3055         pdict = get_caller_module_dict(2)
   3056 
   3057     # Collect parser information from the dictionary
   3058     pinfo = ParserReflect(pdict,log=errorlog)
   3059     pinfo.get_all()
   3060 
   3061     if pinfo.error:
   3062         raise YaccError("Unable to build parser")
   3063 
   3064     # Check signature against table files (if any)
   3065     signature = pinfo.signature()
   3066 
   3067     # Read the tables
   3068     try:
   3069         lr = LRTable()
   3070         if picklefile:
   3071             read_signature = lr.read_pickle(picklefile)
   3072         else:
   3073             read_signature = lr.read_table(tabmodule)
   3074         if optimize or (read_signature == signature):
   3075             try:
   3076                 lr.bind_callables(pinfo.pdict)
   3077                 parser = LRParser(lr,pinfo.error_func)
   3078                 parse = parser.parse
   3079                 return parser
   3080             except Exception:
   3081                 e = sys.exc_info()[1]
   3082                 errorlog.warning("There was a problem loading the table file: %s", repr(e))
   3083     except VersionError:
   3084         e = sys.exc_info()
   3085         errorlog.warning(str(e))
   3086     except Exception:
   3087         pass
   3088 
   3089     if debuglog is None:
   3090         if debug:
   3091             debuglog = PlyLogger(open(debugfile,"w"))
   3092         else:
   3093             debuglog = NullLogger()
   3094 
   3095     debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
   3096 
   3097 
   3098     errors = 0
   3099 
   3100     # Validate the parser information
   3101     if pinfo.validate_all():
   3102         raise YaccError("Unable to build parser")
   3103     
   3104     if not pinfo.error_func:
   3105         errorlog.warning("no p_error() function is defined")
   3106 
   3107     # Create a grammar object
   3108     grammar = Grammar(pinfo.tokens)
   3109 
   3110     # Set precedence level for terminals
   3111     for term, assoc, level in pinfo.preclist:
   3112         try:
   3113             grammar.set_precedence(term,assoc,level)
   3114         except GrammarError:
   3115             e = sys.exc_info()[1]
   3116             errorlog.warning("%s",str(e))
   3117 
   3118     # Add productions to the grammar
   3119     for funcname, gram in pinfo.grammar:
   3120         file, line, prodname, syms = gram
   3121         try:
   3122             grammar.add_production(prodname,syms,funcname,file,line)
   3123         except GrammarError:
   3124             e = sys.exc_info()[1]
   3125             errorlog.error("%s",str(e))
   3126             errors = 1
   3127 
   3128     # Set the grammar start symbols
   3129     try:
   3130         if start is None:
   3131             grammar.set_start(pinfo.start)
   3132         else:
   3133             grammar.set_start(start)
   3134     except GrammarError:
   3135         e = sys.exc_info()[1]
   3136         errorlog.error(str(e))
   3137         errors = 1
   3138 
   3139     if errors:
   3140         raise YaccError("Unable to build parser")
   3141 
   3142     # Verify the grammar structure
   3143     undefined_symbols = grammar.undefined_symbols()
   3144     for sym, prod in undefined_symbols:
   3145         errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
   3146         errors = 1
   3147 
   3148     unused_terminals = grammar.unused_terminals()
   3149     if unused_terminals:
   3150         debuglog.info("")
   3151         debuglog.info("Unused terminals:")
   3152         debuglog.info("")
   3153         for term in unused_terminals:
   3154             errorlog.warning("Token '%s' defined, but not used", term)
   3155             debuglog.info("    %s", term)
   3156 
   3157     # Print out all productions to the debug log
   3158     if debug:
   3159         debuglog.info("")
   3160         debuglog.info("Grammar")
   3161         debuglog.info("")
   3162         for n,p in enumerate(grammar.Productions):
   3163             debuglog.info("Rule %-5d %s", n, p)
   3164 
   3165     # Find unused non-terminals
   3166     unused_rules = grammar.unused_rules()
   3167     for prod in unused_rules:
   3168         errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
   3169 
   3170     if len(unused_terminals) == 1:
   3171         errorlog.warning("There is 1 unused token")
   3172     if len(unused_terminals) > 1:
   3173         errorlog.warning("There are %d unused tokens", len(unused_terminals))
   3174 
   3175     if len(unused_rules) == 1:
   3176         errorlog.warning("There is 1 unused rule")
   3177     if len(unused_rules) > 1:
   3178         errorlog.warning("There are %d unused rules", len(unused_rules))
   3179 
   3180     if debug:
   3181         debuglog.info("")
   3182         debuglog.info("Terminals, with rules where they appear")
   3183         debuglog.info("")
   3184         terms = list(grammar.Terminals)
   3185         terms.sort()
   3186         for term in terms:
   3187             debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
   3188         
   3189         debuglog.info("")
   3190         debuglog.info("Nonterminals, with rules where they appear")
   3191         debuglog.info("")
   3192         nonterms = list(grammar.Nonterminals)
   3193         nonterms.sort()
   3194         for nonterm in nonterms:
   3195             debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
   3196         debuglog.info("")
   3197 
   3198     if check_recursion:
   3199         unreachable = grammar.find_unreachable()
   3200         for u in unreachable:
   3201             errorlog.warning("Symbol '%s' is unreachable",u)
   3202 
   3203         infinite = grammar.infinite_cycles()
   3204         for inf in infinite:
   3205             errorlog.error("Infinite recursion detected for symbol '%s'", inf)
   3206             errors = 1
   3207         
   3208     unused_prec = grammar.unused_precedence()
   3209     for term, assoc in unused_prec:
   3210         errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
   3211         errors = 1
   3212 
   3213     if errors:
   3214         raise YaccError("Unable to build parser")
   3215     
   3216     # Run the LRGeneratedTable on the grammar
   3217     if debug:
   3218         errorlog.debug("Generating %s tables", method)
   3219             
   3220     lr = LRGeneratedTable(grammar,method,debuglog)
   3221 
   3222     if debug:
   3223         num_sr = len(lr.sr_conflicts)
   3224 
   3225         # Report shift/reduce and reduce/reduce conflicts
   3226         if num_sr == 1:
   3227             errorlog.warning("1 shift/reduce conflict")
   3228         elif num_sr > 1:
   3229             errorlog.warning("%d shift/reduce conflicts", num_sr)
   3230 
   3231         num_rr = len(lr.rr_conflicts)
   3232         if num_rr == 1:
   3233             errorlog.warning("1 reduce/reduce conflict")
   3234         elif num_rr > 1:
   3235             errorlog.warning("%d reduce/reduce conflicts", num_rr)
   3236 
   3237     # Write out conflicts to the output file
   3238     if debug and (lr.sr_conflicts or lr.rr_conflicts):
   3239         debuglog.warning("")
   3240         debuglog.warning("Conflicts:")
   3241         debuglog.warning("")
   3242 
   3243         for state, tok, resolution in lr.sr_conflicts:
   3244             debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s",  tok, state, resolution)
   3245         
   3246         already_reported = {}
   3247         for state, rule, rejected in lr.rr_conflicts:
   3248             if (state,id(rule),id(rejected)) in already_reported:
   3249                 continue
   3250             debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
   3251             debuglog.warning("rejected rule (%s) in state %d", rejected,state)
   3252             errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
   3253             errorlog.warning("rejected rule (%s) in state %d", rejected, state)
   3254             already_reported[state,id(rule),id(rejected)] = 1
   3255         
   3256         warned_never = []
   3257         for state, rule, rejected in lr.rr_conflicts:
   3258             if not rejected.reduced and (rejected not in warned_never):
   3259                 debuglog.warning("Rule (%s) is never reduced", rejected)
   3260                 errorlog.warning("Rule (%s) is never reduced", rejected)
   3261                 warned_never.append(rejected)
   3262 
   3263     # Write the table file if requested
   3264     if write_tables:
   3265         lr.write_table(tabmodule,outputdir,signature)
   3266 
   3267     # Write a pickled version of the tables
   3268     if picklefile:
   3269         lr.pickle_table(picklefile,signature)
   3270 
   3271     # Build the parser
   3272     lr.bind_callables(pinfo.pdict)
   3273     parser = LRParser(lr,pinfo.error_func)
   3274 
   3275     parse = parser.parse
   3276     return parser
   3277