Home | History | Annotate | Download | only in sepolgen
      1 #-----------------------------------------------------------------------------
      2 # ply: yacc.py
      3 #
      4 # Author(s): David M. Beazley (dave (at] dabeaz.com)
      5 #
      6 # Copyright (C) 2001-2006, David M. Beazley
      7 #
      8 # This library is free software; you can redistribute it and/or
      9 # modify it under the terms of the GNU Lesser General Public
     10 # License as published by the Free Software Foundation; either
     11 # version 2.1 of the License, or (at your option) any later version.
     12 # 
     13 # This library is distributed in the hope that it will be useful,
     14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16 # Lesser General Public License for more details.
     17 # 
     18 # You should have received a copy of the GNU Lesser General Public
     19 # License along with this library; if not, write to the Free Software
     20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     21 # 
     22 # See the file COPYING for a complete copy of the LGPL.
     23 #
     24 #
     25 # This implements an LR parser that is constructed from grammar rules defined
     26 # as Python functions. The grammer is specified by supplying the BNF inside
     27 # Python documentation strings.  The inspiration for this technique was borrowed
     28 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
     29 # Spark and the GNU bison utility.
     30 #
     31 # The current implementation is only somewhat object-oriented. The
     32 # LR parser itself is defined in terms of an object (which allows multiple
     33 # parsers to co-exist).  However, most of the variables used during table
     34 # construction are defined in terms of global variables.  Users shouldn't
     35 # notice unless they are trying to define multiple parsers at the same
     36 # time using threads (in which case they should have their head examined).
     37 #
     38 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
     39 # support was originally implemented by Elias Ioup (ezioup (at] alumni.uchicago.edu),
     40 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
     41 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
     42 # by the more efficient DeRemer and Pennello algorithm.
     43 #
     44 # :::::::: WARNING :::::::
     45 #
     46 # Construction of LR parsing tables is fairly complicated and expensive.
     47 # To make this module run fast, a *LOT* of work has been put into
     48 # optimization---often at the expensive of readability and what might
     49 # consider to be good Python "coding style."   Modify the code at your
     50 # own risk!
     51 # ----------------------------------------------------------------------------
     52 
     53 __version__ = "2.2"
     54 
     55 #-----------------------------------------------------------------------------
     56 #                     === User configurable parameters ===
     57 #
     58 # Change these to modify the default behavior of yacc (if you wish)
     59 #-----------------------------------------------------------------------------
     60 
     61 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
     62                                # a 'parser.out' file in the current directory
     63 
     64 debug_file  = 'parser.out'     # Default name of the debugging file
     65 tab_module  = 'parsetab'       # Default name of the table module
     66 default_lr  = 'LALR'           # Default LR table generation method
     67 
     68 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
     69 
     70 import re, types, sys, hashlib, os.path
     71 try:
     72     from cStringIO import StringIO
     73 except ImportError:
     74     from io import StringIO
     75 
     76 from . import util
     77 
     78 # Exception raised for yacc-related errors
     79 class YaccError(Exception):   pass
     80 
     81 #-----------------------------------------------------------------------------
     82 #                        ===  LR Parsing Engine ===
     83 #
     84 # The following classes are used for the LR parser itself.  These are not
     85 # used during table construction and are independent of the actual LR
     86 # table generation algorithm
     87 #-----------------------------------------------------------------------------
     88 
     89 # This class is used to hold non-terminal grammar symbols during parsing.
     90 # It normally has the following attributes set:
     91 #        .type       = Grammar symbol type
     92 #        .value      = Symbol value
     93 #        .lineno     = Starting line number
     94 #        .endlineno  = Ending line number (optional, set automatically)
     95 #        .lexpos     = Starting lex position
     96 #        .endlexpos  = Ending lex position (optional, set automatically)
     97 
     98 class YaccSymbol:
     99     def __str__(self):    return self.type
    100     def __repr__(self):   return str(self)
    101 
    102 # This class is a wrapper around the objects actually passed to each
    103 # grammar rule.   Index lookup and assignment actually assign the
    104 # .value attribute of the underlying YaccSymbol object.
    105 # The lineno() method returns the line number of a given
    106 # item (or 0 if not defined).   The linespan() method returns
    107 # a tuple of (startline,endline) representing the range of lines
    108 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
    109 # representing the range of positional information for a symbol.
    110 
    111 class YaccProduction:
    112     def __init__(self,s,stack=None):
    113         self.slice = s
    114         self.pbstack = []
    115         self.stack = stack
    116 
    117     def __getitem__(self,n):
    118         if type(n) == int:
    119              if n >= 0: return self.slice[n].value
    120              else: return self.stack[n].value
    121         else:
    122              return [s.value for s in self.slice[n.start:n.stop:n.step]]
    123 
    124     def __setitem__(self,n,v):
    125         self.slice[n].value = v
    126 
    127     def __len__(self):
    128         return len(self.slice)
    129     
    130     def lineno(self,n):
    131         return getattr(self.slice[n],"lineno",0)
    132 
    133     def linespan(self,n):
    134         startline = getattr(self.slice[n],"lineno",0)
    135         endline = getattr(self.slice[n],"endlineno",startline)
    136         return startline,endline
    137 
    138     def lexpos(self,n):
    139         return getattr(self.slice[n],"lexpos",0)
    140 
    141     def lexspan(self,n):
    142         startpos = getattr(self.slice[n],"lexpos",0)
    143         endpos = getattr(self.slice[n],"endlexpos",startpos)
    144         return startpos,endpos
    145 
    146     def pushback(self,n):
    147         if n <= 0:
    148             raise ValueError("Expected a positive value")
    149         if n > (len(self.slice)-1):
    150             raise ValueError("Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1))
    151         for i in range(0,n):
    152             self.pbstack.append(self.slice[-i-1])
    153 
    154 # The LR Parsing engine.   This is defined as a class so that multiple parsers
    155 # can exist in the same process.  A user never instantiates this directly.
    156 # Instead, the global yacc() function should be used to create a suitable Parser
    157 # object. 
    158 
    159 class Parser:
    160     def __init__(self,magic=None):
    161 
    162         # This is a hack to keep users from trying to instantiate a Parser
    163         # object directly.
    164 
    165         if magic != "xyzzy":
    166             raise YaccError("Can't instantiate Parser. Use yacc() instead.")
    167 
    168         # Reset internal state
    169         self.productions = None          # List of productions
    170         self.errorfunc   = None          # Error handling function
    171         self.action      = { }           # LR Action table
    172         self.goto        = { }           # LR goto table
    173         self.require     = { }           # Attribute require table
    174         self.method      = "Unknown LR"  # Table construction method used
    175 
    176     def errok(self):
    177         self.errorcount = 0
    178 
    179     def restart(self):
    180         del self.statestack[:]
    181         del self.symstack[:]
    182         sym = YaccSymbol()
    183         sym.type = '$end'
    184         self.symstack.append(sym)
    185         self.statestack.append(0)
    186         
    187     def parse(self,input=None,lexer=None,debug=0):
    188         lookahead = None                 # Current lookahead symbol
    189         lookaheadstack = [ ]             # Stack of lookahead symbols
    190         actions = self.action            # Local reference to action table
    191         goto    = self.goto              # Local reference to goto table
    192         prod    = self.productions       # Local reference to production list
    193         pslice  = YaccProduction(None)   # Production object passed to grammar rules
    194         pslice.parser = self             # Parser object
    195         self.errorcount = 0              # Used during error recovery
    196 
    197         # If no lexer was given, we will try to use the lex module
    198         if not lexer:
    199             from . import lex
    200             lexer = lex.lexer
    201 
    202         pslice.lexer = lexer
    203         
    204         # If input was supplied, pass to lexer
    205         if input:
    206             lexer.input(input)
    207 
    208         # Tokenize function
    209         get_token = lexer.token
    210 
    211         statestack = [ ]                # Stack of parsing states
    212         self.statestack = statestack
    213         symstack   = [ ]                # Stack of grammar symbols
    214         self.symstack = symstack
    215 
    216         pslice.stack = symstack         # Put in the production
    217         errtoken   = None               # Err token
    218 
    219         # The start state is assumed to be (0,$end)
    220         statestack.append(0)
    221         sym = YaccSymbol()
    222         sym.type = '$end'
    223         symstack.append(sym)
    224         
    225         while 1:
    226             # Get the next symbol on the input.  If a lookahead symbol
    227             # is already set, we just use that. Otherwise, we'll pull
    228             # the next token off of the lookaheadstack or from the lexer
    229             if debug > 1:
    230                 print('state', statestack[-1])
    231             if not lookahead:
    232                 if not lookaheadstack:
    233                     lookahead = get_token()     # Get the next token
    234                 else:
    235                     lookahead = lookaheadstack.pop()
    236                 if not lookahead:
    237                     lookahead = YaccSymbol()
    238                     lookahead.type = '$end'
    239             if debug:
    240                 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
    241 
    242             # Check the action table
    243             s = statestack[-1]
    244             ltype = lookahead.type
    245             t = actions.get((s,ltype),None)
    246 
    247             if debug > 1:
    248                 print('action', t)
    249             if t is not None:
    250                 if t > 0:
    251                     # shift a symbol on the stack
    252                     if ltype == '$end':
    253                         # Error, end of input
    254                         sys.stderr.write("yacc: Parse error. EOF\n")
    255                         return
    256                     statestack.append(t)
    257                     if debug > 1:
    258                         sys.stderr.write("%-60s shift state %s\n" % (errorlead, t))
    259                     symstack.append(lookahead)
    260                     lookahead = None
    261 
    262                     # Decrease error count on successful shift
    263                     if self.errorcount > 0:
    264                         self.errorcount -= 1
    265                         
    266                     continue
    267                 
    268                 if t < 0:
    269                     # reduce a symbol on the stack, emit a production
    270                     p = prod[-t]
    271                     pname = p.name
    272                     plen  = p.len
    273 
    274                     # Get production function
    275                     sym = YaccSymbol()
    276                     sym.type = pname       # Production name
    277                     sym.value = None
    278                     if debug > 1:
    279                         sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t))
    280 
    281                     if plen:
    282                         targ = symstack[-plen-1:]
    283                         targ[0] = sym
    284                         try:
    285                             sym.lineno = targ[1].lineno
    286                             sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
    287                             sym.lexpos = targ[1].lexpos
    288                             sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos)
    289                         except AttributeError:
    290                             sym.lineno = 0
    291                         del symstack[-plen:]
    292                         del statestack[-plen:]
    293                     else:
    294                         sym.lineno = 0
    295                         targ = [ sym ]
    296                     pslice.slice = targ
    297                     pslice.pbstack = []
    298                     # Call the grammar rule with our special slice object
    299                     p.func(pslice)
    300 
    301                     # If there was a pushback, put that on the stack
    302                     if pslice.pbstack:
    303                         lookaheadstack.append(lookahead)
    304                         for _t in pslice.pbstack:
    305                             lookaheadstack.append(_t)
    306                         lookahead = None
    307 
    308                     symstack.append(sym)
    309                     statestack.append(goto[statestack[-1],pname])
    310                     continue
    311 
    312                 if t == 0:
    313                     n = symstack[-1]
    314                     return getattr(n,"value",None)
    315                     sys.stderr.write(errorlead, "\n")
    316 
    317             if t == None:
    318                 if debug:
    319                     sys.stderr.write(errorlead + "\n")
    320                 # We have some kind of parsing error here.  To handle
    321                 # this, we are going to push the current token onto
    322                 # the tokenstack and replace it with an 'error' token.
    323                 # If there are any synchronization rules, they may
    324                 # catch it.
    325                 #
    326                 # In addition to pushing the error token, we call call
    327                 # the user defined p_error() function if this is the
    328                 # first syntax error.  This function is only called if
    329                 # errorcount == 0.
    330                 if not self.errorcount:
    331                     self.errorcount = error_count
    332                     errtoken = lookahead
    333                     if errtoken.type == '$end':
    334                         errtoken = None               # End of file!
    335                     if self.errorfunc:
    336                         global errok,token,restart
    337                         errok = self.errok        # Set some special functions available in error recovery
    338                         token = get_token
    339                         restart = self.restart
    340                         tok = self.errorfunc(errtoken)
    341                         del errok, token, restart   # Delete special functions
    342                         
    343                         if not self.errorcount:
    344                             # User must have done some kind of panic
    345                             # mode recovery on their own.  The
    346                             # returned token is the next lookahead
    347                             lookahead = tok
    348                             errtoken = None
    349                             continue
    350                     else:
    351                         if errtoken:
    352                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
    353                             else: lineno = 0
    354                             if lineno:
    355                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
    356                             else:
    357                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
    358                         else:
    359                             sys.stderr.write("yacc: Parse error in input. EOF\n")
    360                             return
    361 
    362                 else:
    363                     self.errorcount = error_count
    364                 
    365                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
    366                 # entire parse has been rolled back and we're completely hosed.   The token is
    367                 # discarded and we just keep going.
    368 
    369                 if len(statestack) <= 1 and lookahead.type != '$end':
    370                     lookahead = None
    371                     errtoken = None
    372                     # Nuke the pushback stack
    373                     del lookaheadstack[:]
    374                     continue
    375 
    376                 # case 2: the statestack has a couple of entries on it, but we're
    377                 # at the end of the file. nuke the top entry and generate an error token
    378 
    379                 # Start nuking entries on the stack
    380                 if lookahead.type == '$end':
    381                     # Whoa. We're really hosed here. Bail out
    382                     return 
    383 
    384                 if lookahead.type != 'error':
    385                     sym = symstack[-1]
    386                     if sym.type == 'error':
    387                         # Hmmm. Error is on top of stack, we'll just nuke input
    388                         # symbol and continue
    389                         lookahead = None
    390                         continue
    391                     t = YaccSymbol()
    392                     t.type = 'error'
    393                     if hasattr(lookahead,"lineno"):
    394                         t.lineno = lookahead.lineno
    395                     t.value = lookahead
    396                     lookaheadstack.append(lookahead)
    397                     lookahead = t
    398                 else:
    399                     symstack.pop()
    400                     statestack.pop()
    401 
    402                 continue
    403 
    404             # Call an error function here
    405             raise RuntimeError("yacc: internal parser error!!!\n")
    406 
    407 # -----------------------------------------------------------------------------
    408 #                          === Parser Construction ===
    409 #
    410 # The following functions and variables are used to implement the yacc() function
    411 # itself.   This is pretty hairy stuff involving lots of error checking,
    412 # construction of LR items, kernels, and so forth.   Although a lot of
    413 # this work is done using global variables, the resulting Parser object
    414 # is completely self contained--meaning that it is safe to repeatedly
    415 # call yacc() with different grammars in the same application.
    416 # -----------------------------------------------------------------------------
    417         
    418 # -----------------------------------------------------------------------------
    419 # validate_file()
    420 #
    421 # This function checks to see if there are duplicated p_rulename() functions
    422 # in the parser module file.  Without this function, it is really easy for
    423 # users to make mistakes by cutting and pasting code fragments (and it's a real
    424 # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
    425 # we just do a little regular expression pattern matching of def statements
    426 # to try and detect duplicates.
    427 # -----------------------------------------------------------------------------
    428 
    429 def validate_file(filename):
    430     base,ext = os.path.splitext(filename)
    431     if ext != '.py': return 1          # No idea. Assume it's okay.
    432 
    433     try:
    434         f = open(filename)
    435         lines = f.readlines()
    436         f.close()
    437     except IOError:
    438         return 1                       # Oh well
    439 
    440     # Match def p_funcname(
    441     fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
    442     counthash = { }
    443     linen = 1
    444     noerror = 1
    445     for l in lines:
    446         m = fre.match(l)
    447         if m:
    448             name = m.group(1)
    449             prev = counthash.get(name)
    450             if not prev:
    451                 counthash[name] = linen
    452             else:
    453                 sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev))
    454                 noerror = 0
    455         linen += 1
    456     return noerror
    457 
    458 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
    459 def validate_dict(d):
    460     for n,v in d.items(): 
    461         if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue
    462         if n[0:2] == 't_': continue
    463 
    464         if n[0:2] == 'p_':
    465             sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n)
    466         if 1 and isinstance(v,types.FunctionType) and v.__code__.co_argcount == 1:
    467             try:
    468                 doc = v.__doc__.split(" ")
    469                 if doc[1] == ':':
    470                     sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.__code__.co_filename, v.__code__.co_firstlineno,n))
    471             except Exception:
    472                 pass
    473 
    474 # -----------------------------------------------------------------------------
    475 #                           === GRAMMAR FUNCTIONS ===
    476 #
    477 # The following global variables and functions are used to store, manipulate,
    478 # and verify the grammar rules specified by the user.
    479 # -----------------------------------------------------------------------------
    480 
    481 # Initialize all of the global variables used during grammar construction
    482 def initialize_vars():
    483     global Productions, Prodnames, Prodmap, Terminals 
    484     global Nonterminals, First, Follow, Precedence, LRitems
    485     global Errorfunc, Signature, Requires
    486 
    487     Productions  = [None]  # A list of all of the productions.  The first
    488                            # entry is always reserved for the purpose of
    489                            # building an augmented grammar
    490                         
    491     Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
    492                            # productions of that nonterminal.
    493                         
    494     Prodmap      = { }     # A dictionary that is only used to detect duplicate
    495                            # productions.
    496 
    497     Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
    498                            # list of the rules where they are used.
    499 
    500     Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
    501                            # of rule numbers where they are used.
    502 
    503     First        = { }     # A dictionary of precomputed FIRST(x) symbols
    504     
    505     Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
    506 
    507     Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
    508                            # form ('right',level) or ('nonassoc', level) or ('left',level)
    509 
    510     LRitems      = [ ]     # A list of all LR items for the grammar.  These are the
    511                            # productions with the "dot" like E -> E . PLUS E
    512 
    513     Errorfunc    = None    # User defined error handler
    514 
    515     Signature    = hashlib.sha256()   # Digital signature of the grammar rules, precedence
    516                                # and other information.  Used to determined when a
    517                                # parsing table needs to be regenerated.
    518 
    519     Requires     = { }     # Requires list
    520 
    521     # File objects used when creating the parser.out debugging file
    522     global _vf, _vfc
    523     _vf           = StringIO()
    524     _vfc          = StringIO()
    525 
    526 # -----------------------------------------------------------------------------
    527 # class Production:
    528 #
    529 # This class stores the raw information about a single production or grammar rule.
    530 # It has a few required attributes:
    531 #
    532 #       name     - Name of the production (nonterminal)
    533 #       prod     - A list of symbols making up its production
    534 #       number   - Production number.
    535 #
    536 # In addition, a few additional attributes are used to help with debugging or
    537 # optimization of table generation.
    538 #
    539 #       file     - File where production action is defined.
    540 #       lineno   - Line number where action is defined
    541 #       func     - Action function
    542 #       prec     - Precedence level
    543 #       lr_next  - Next LR item. Example, if we are ' E -> E . PLUS E'
    544 #                  then lr_next refers to 'E -> E PLUS . E'   
    545 #       lr_index - LR item index (location of the ".") in the prod list.
    546 #       lookaheads - LALR lookahead symbols for this item
    547 #       len      - Length of the production (number of symbols on right hand side)
    548 # -----------------------------------------------------------------------------
    549 
    550 class Production:
    551     def __init__(self,**kw):
    552         for k,v in kw.items():
    553             setattr(self,k,v)
    554         self.lr_index = -1
    555         self.lr0_added = 0    # Flag indicating whether or not added to LR0 closure
    556         self.lr1_added = 0    # Flag indicating whether or not added to LR1
    557         self.usyms = [ ]
    558         self.lookaheads = { }
    559         self.lk_added = { }
    560         self.setnumbers = [ ]
    561         
    562     def __str__(self):
    563         if self.prod:
    564             s = "%s -> %s" % (self.name," ".join(self.prod))
    565         else:
    566             s = "%s -> <empty>" % self.name
    567         return s
    568 
    569     def __repr__(self):
    570         return str(self)
    571 
    572     # Compute lr_items from the production
    573     def lr_item(self,n):
    574         if n > len(self.prod): return None
    575         p = Production()
    576         p.name = self.name
    577         p.prod = list(self.prod)
    578         p.number = self.number
    579         p.lr_index = n
    580         p.lookaheads = { }
    581         p.setnumbers = self.setnumbers
    582         p.prod.insert(n,".")
    583         p.prod = tuple(p.prod)
    584         p.len = len(p.prod)
    585         p.usyms = self.usyms
    586 
    587         # Precompute list of productions immediately following
    588         try:
    589             p.lrafter = Prodnames[p.prod[n+1]]
    590         except (IndexError,KeyError) as e:
    591             p.lrafter = []
    592         try:
    593             p.lrbefore = p.prod[n-1]
    594         except IndexError:
    595             p.lrbefore = None
    596 
    597         return p
    598 
    599 class MiniProduction:
    600     pass
    601 
    602 # regex matching identifiers
    603 _is_identifier = re.compile(r'^[a-zA-Z0-9_-~]+$')
    604 
    605 # -----------------------------------------------------------------------------
    606 # add_production()
    607 #
    608 # Given an action function, this function assembles a production rule.
    609 # The production rule is assumed to be found in the function's docstring.
    610 # This rule has the general syntax:
    611 #
    612 #              name1 ::= production1
    613 #                     |  production2
    614 #                     |  production3
    615 #                    ...
    616 #                     |  productionn
    617 #              name2 ::= production1
    618 #                     |  production2
    619 #                    ... 
    620 # -----------------------------------------------------------------------------
    621 
    622 def add_production(f,file,line,prodname,syms):
    623     
    624     if prodname in Terminals:
    625         sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname))
    626         return -1
    627     if prodname == 'error':
    628         sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname))
    629         return -1
    630                 
    631     if not _is_identifier.match(prodname):
    632         sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname))
    633         return -1
    634 
    635     for x in range(len(syms)):
    636         s = syms[x]
    637         if s[0] in "'\"":
    638              try:
    639                  c = eval(s)
    640                  if (len(c) > 1):
    641                       sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) 
    642                       return -1
    643                  if c not in Terminals:
    644                       Terminals[c] = []
    645                  syms[x] = c
    646                  continue
    647              except SyntaxError:
    648                  pass
    649         if not _is_identifier.match(s) and s != '%prec':
    650             sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname))
    651             return -1
    652 
    653     # See if the rule is already in the rulemap
    654     map = "%s -> %s" % (prodname,syms)
    655     if map in Prodmap:
    656         m = Prodmap[map]
    657         sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m))
    658         sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line))
    659         return -1
    660 
    661     p = Production()
    662     p.name = prodname
    663     p.prod = syms
    664     p.file = file
    665     p.line = line
    666     p.func = f
    667     p.number = len(Productions)
    668 
    669             
    670     Productions.append(p)
    671     Prodmap[map] = p
    672     if prodname not in Nonterminals:
    673         Nonterminals[prodname] = [ ]
    674     
    675     # Add all terminals to Terminals
    676     i = 0
    677     while i < len(p.prod):
    678         t = p.prod[i]
    679         if t == '%prec':
    680             try:
    681                 precname = p.prod[i+1]
    682             except IndexError:
    683                 sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line))
    684                 return -1
    685 
    686             prec = Precedence.get(precname,None)
    687             if not prec:
    688                 sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname))
    689                 return -1
    690             else:
    691                 p.prec = prec
    692             del p.prod[i]
    693             del p.prod[i]
    694             continue
    695 
    696         if t in Terminals:
    697             Terminals[t].append(p.number)
    698             # Is a terminal.  We'll assign a precedence to p based on this
    699             if not hasattr(p,"prec"):
    700                 p.prec = Precedence.get(t,('right',0))
    701         else:
    702             if t not in Nonterminals:
    703                 Nonterminals[t] = [ ]
    704             Nonterminals[t].append(p.number)
    705         i += 1
    706 
    707     if not hasattr(p,"prec"):
    708         p.prec = ('right',0)
    709         
    710     # Set final length of productions
    711     p.len  = len(p.prod)
    712     p.prod = tuple(p.prod)
    713 
    714     # Calculate unique syms in the production
    715     p.usyms = [ ]
    716     for s in p.prod:
    717         if s not in p.usyms:
    718             p.usyms.append(s)
    719     
    720     # Add to the global productions list
    721     try:
    722         Prodnames[p.name].append(p)
    723     except KeyError:
    724         Prodnames[p.name] = [ p ]
    725     return 0
    726 
    727 # Given a raw rule function, this function rips out its doc string
    728 # and adds rules to the grammar
    729 
    730 def add_function(f):
    731     line = f.__code__.co_firstlineno
    732     file = f.__code__.co_filename
    733     error = 0
    734 
    735     if isinstance(f,types.MethodType):
    736         reqdargs = 2
    737     else:
    738         reqdargs = 1
    739         
    740     if f.__code__.co_argcount > reqdargs:
    741         sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__))
    742         return -1
    743 
    744     if f.__code__.co_argcount < reqdargs:
    745         sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__))
    746         return -1
    747           
    748     if f.__doc__:
    749         # Split the doc string into lines
    750         pstrings = f.__doc__.splitlines()
    751         lastp = None
    752         dline = line
    753         for ps in pstrings:
    754             dline += 1
    755             p = ps.split()
    756             if not p: continue
    757             try:
    758                 if p[0] == '|':
    759                     # This is a continuation of a previous rule
    760                     if not lastp:
    761                         sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline))
    762                         return -1
    763                     prodname = lastp
    764                     if len(p) > 1:
    765                         syms = p[1:]
    766                     else:
    767                         syms = [ ]
    768                 else:
    769                     prodname = p[0]
    770                     lastp = prodname
    771                     assign = p[1]
    772                     if len(p) > 2:
    773                         syms = p[2:]
    774                     else:
    775                         syms = [ ]
    776                     if assign != ':' and assign != '::=':
    777                         sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline))
    778                         return -1
    779                          
    780  
    781                 e = add_production(f,file,dline,prodname,syms)
    782                 error += e
    783 
    784                 
    785             except Exception:
    786                 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps))
    787                 error -= 1
    788     else:
    789         sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__))
    790     return error
    791 
    792 
    793 # Cycle checking code (Michael Dyck)
    794 
    795 def compute_reachable():
    796     '''
    797     Find each symbol that can be reached from the start symbol.
    798     Print a warning for any nonterminals that can't be reached.
    799     (Unused terminals have already had their warning.)
    800     '''
    801     Reachable = { }
    802     for s in list(Terminals.keys()) + list(Nonterminals.keys()):
    803         Reachable[s] = 0
    804 
    805     mark_reachable_from( Productions[0].prod[0], Reachable )
    806 
    807     for s in Nonterminals.keys():
    808         if not Reachable[s]:
    809             sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s)
    810 
    811 def mark_reachable_from(s, Reachable):
    812     '''
    813     Mark all symbols that are reachable from symbol s.
    814     '''
    815     if Reachable[s]:
    816         # We've already reached symbol s.
    817         return
    818     Reachable[s] = 1
    819     for p in Prodnames.get(s,[]):
    820         for r in p.prod:
    821             mark_reachable_from(r, Reachable)
    822 
    823 # -----------------------------------------------------------------------------
    824 # compute_terminates()
    825 #
    826 # This function looks at the various parsing rules and tries to detect
    827 # infinite recursion cycles (grammar rules where there is no possible way
    828 # to derive a string of only terminals).
    829 # -----------------------------------------------------------------------------
    830 def compute_terminates():
    831     '''
    832     Raise an error for any symbols that don't terminate.
    833     '''
    834     Terminates = {}
    835 
    836     # Terminals:
    837     for t in Terminals.keys():
    838         Terminates[t] = 1
    839 
    840     Terminates['$end'] = 1
    841 
    842     # Nonterminals:
    843 
    844     # Initialize to false:
    845     for n in Nonterminals.keys():
    846         Terminates[n] = 0
    847 
    848     # Then propagate termination until no change:
    849     while 1:
    850         some_change = 0
    851         for (n,pl) in Prodnames.items():
    852             # Nonterminal n terminates iff any of its productions terminates.
    853             for p in pl:
    854                 # Production p terminates iff all of its rhs symbols terminate.
    855                 for s in p.prod:
    856                     if not Terminates[s]:
    857                         # The symbol s does not terminate,
    858                         # so production p does not terminate.
    859                         p_terminates = 0
    860                         break
    861                 else:
    862                     # didn't break from the loop,
    863                     # so every symbol s terminates
    864                     # so production p terminates.
    865                     p_terminates = 1
    866 
    867                 if p_terminates:
    868                     # symbol n terminates!
    869                     if not Terminates[n]:
    870                         Terminates[n] = 1
    871                         some_change = 1
    872                     # Don't need to consider any more productions for this n.
    873                     break
    874 
    875         if not some_change:
    876             break
    877 
    878     some_error = 0
    879     for (s,terminates) in Terminates.items():
    880         if not terminates:
    881             if s not in Prodnames and s not in Terminals and s != 'error':
    882                 # s is used-but-not-defined, and we've already warned of that,
    883                 # so it would be overkill to say that it's also non-terminating.
    884                 pass
    885             else:
    886                 sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s)
    887                 some_error = 1
    888 
    889     return some_error
    890 
    891 # -----------------------------------------------------------------------------
    892 # verify_productions()
    893 #
    894 # This function examines all of the supplied rules to see if they seem valid.
    895 # -----------------------------------------------------------------------------
    896 def verify_productions(cycle_check=1):
    897     error = 0
    898     for p in Productions:
    899         if not p: continue
    900 
    901         for s in p.prod:
    902             if s not in Prodnames and s not in Terminals and s != 'error':
    903                 sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s))
    904                 error = 1
    905                 continue
    906 
    907     unused_tok = 0 
    908     # Now verify all of the tokens
    909     if yaccdebug:
    910         _vf.write("Unused terminals:\n\n")
    911     for s,v in Terminals.items():
    912         if s != 'error' and not v:
    913             sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s)
    914             if yaccdebug: _vf.write("   %s\n"% s)
    915             unused_tok += 1
    916 
    917     # Print out all of the productions
    918     if yaccdebug:
    919         _vf.write("\nGrammar\n\n")
    920         for i in range(1,len(Productions)):
    921             _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
    922         
    923     unused_prod = 0
    924     # Verify the use of all productions
    925     for s,v in Nonterminals.items():
    926         if not v:
    927             p = Prodnames[s][0]
    928             sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s))
    929             unused_prod += 1
    930 
    931     
    932     if unused_tok == 1:
    933         sys.stderr.write("yacc: Warning. There is 1 unused token.\n")
    934     if unused_tok > 1:
    935         sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok)
    936 
    937     if unused_prod == 1:
    938         sys.stderr.write("yacc: Warning. There is 1 unused rule.\n")
    939     if unused_prod > 1:
    940         sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod)
    941 
    942     if yaccdebug:
    943         _vf.write("\nTerminals, with rules where they appear\n\n")
    944         ks = list(Terminals.keys())
    945         ks.sort()
    946         for k in ks:
    947             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
    948         _vf.write("\nNonterminals, with rules where they appear\n\n")
    949         ks = list(Nonterminals.keys())
    950         ks.sort()
    951         for k in ks:
    952             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
    953 
    954     if (cycle_check):
    955         compute_reachable()
    956         error += compute_terminates()
    957 #        error += check_cycles()
    958     return error
    959 
    960 # -----------------------------------------------------------------------------
    961 # build_lritems()
    962 #
    963 # This function walks the list of productions and builds a complete set of the
    964 # LR items.  The LR items are stored in two ways:  First, they are uniquely
    965 # numbered and placed in the list _lritems.  Second, a linked list of LR items
    966 # is built for each production.  For example:
    967 #
    968 #   E -> E PLUS E
    969 #
    970 # Creates the list
    971 #
    972 #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 
    973 # -----------------------------------------------------------------------------
    974 
    975 def build_lritems():
    976     for p in Productions:
    977         lastlri = p
    978         lri = p.lr_item(0)
    979         i = 0
    980         while 1:
    981             lri = p.lr_item(i)
    982             lastlri.lr_next = lri
    983             if not lri: break
    984             lri.lr_num = len(LRitems)
    985             LRitems.append(lri)
    986             lastlri = lri
    987             i += 1
    988 
    989     # In order for the rest of the parser generator to work, we need to
    990     # guarantee that no more lritems are generated.  Therefore, we nuke
    991     # the p.lr_item method.  (Only used in debugging)
    992     # Production.lr_item = None
    993 
    994 # -----------------------------------------------------------------------------
    995 # add_precedence()
    996 #
    997 # Given a list of precedence rules, add to the precedence table.
    998 # -----------------------------------------------------------------------------
    999 
   1000 def add_precedence(plist):
   1001     plevel = 0
   1002     error = 0
   1003     for p in plist:
   1004         plevel += 1
   1005         try:
   1006             prec = p[0]
   1007             terms = p[1:]
   1008             if prec != 'left' and prec != 'right' and prec != 'nonassoc':
   1009                 sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec)
   1010                 return -1
   1011             for t in terms:
   1012                 if t in Precedence:
   1013                     sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t)
   1014                     error += 1
   1015                     continue
   1016                 Precedence[t] = (prec,plevel)
   1017         except:
   1018             sys.stderr.write("yacc: Invalid precedence table.\n")
   1019             error += 1
   1020 
   1021     return error
   1022 
   1023 # -----------------------------------------------------------------------------
   1024 # augment_grammar()
   1025 #
   1026 # Compute the augmented grammar.  This is just a rule S' -> start where start
   1027 # is the starting symbol.
   1028 # -----------------------------------------------------------------------------
   1029 
   1030 def augment_grammar(start=None):
   1031     if not start:
   1032         start = Productions[1].name
   1033     Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
   1034     Productions[0].usyms = [ start ]
   1035     Nonterminals[start].append(0)
   1036 
   1037 
   1038 # -------------------------------------------------------------------------
   1039 # first()
   1040 #
   1041 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
   1042 #
   1043 # During execution of compute_first1, the result may be incomplete.
   1044 # Afterward (e.g., when called from compute_follow()), it will be complete.
   1045 # -------------------------------------------------------------------------
   1046 def first(beta):
   1047 
   1048     # We are computing First(x1,x2,x3,...,xn)
   1049     result = [ ]
   1050     for x in beta:
   1051         x_produces_empty = 0
   1052 
   1053         # Add all the non-<empty> symbols of First[x] to the result.
   1054         for f in First[x]:
   1055             if f == '<empty>':
   1056                 x_produces_empty = 1
   1057             else:
   1058                 if f not in result: result.append(f)
   1059 
   1060         if x_produces_empty:
   1061             # We have to consider the next x in beta,
   1062             # i.e. stay in the loop.
   1063             pass
   1064         else:
   1065             # We don't have to consider any further symbols in beta.
   1066             break
   1067     else:
   1068         # There was no 'break' from the loop,
   1069         # so x_produces_empty was true for all x in beta,
   1070         # so beta produces empty as well.
   1071         result.append('<empty>')
   1072 
   1073     return result
   1074 
   1075 
   1076 # FOLLOW(x)
   1077 # Given a non-terminal.  This function computes the set of all symbols
   1078 # that might follow it.  Dragon book, p. 189.
   1079 
   1080 def compute_follow(start=None):
   1081     # Add '$end' to the follow list of the start symbol
   1082     for k in Nonterminals.keys():
   1083         Follow[k] = [ ]
   1084 
   1085     if not start:
   1086         start = Productions[1].name
   1087         
   1088     Follow[start] = [ '$end' ]
   1089         
   1090     while 1:
   1091         didadd = 0
   1092         for p in Productions[1:]:
   1093             # Here is the production set
   1094             for i in range(len(p.prod)):
   1095                 B = p.prod[i]
   1096                 if B in Nonterminals:
   1097                     # Okay. We got a non-terminal in a production
   1098                     fst = first(p.prod[i+1:])
   1099                     hasempty = 0
   1100                     for f in fst:
   1101                         if f != '<empty>' and f not in Follow[B]:
   1102                             Follow[B].append(f)
   1103                             didadd = 1
   1104                         if f == '<empty>':
   1105                             hasempty = 1
   1106                     if hasempty or i == (len(p.prod)-1):
   1107                         # Add elements of follow(a) to follow(b)
   1108                         for f in Follow[p.name]:
   1109                             if f not in Follow[B]:
   1110                                 Follow[B].append(f)
   1111                                 didadd = 1
   1112         if not didadd: break
   1113 
   1114     if 0 and yaccdebug:
   1115         _vf.write('\nFollow:\n')
   1116         for k in Nonterminals.keys():
   1117             _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
   1118 
   1119 # -------------------------------------------------------------------------
   1120 # compute_first1()
   1121 #
   1122 # Compute the value of FIRST1(X) for all symbols
   1123 # -------------------------------------------------------------------------
   1124 def compute_first1():
   1125 
   1126     # Terminals:
   1127     for t in Terminals.keys():
   1128         First[t] = [t]
   1129 
   1130     First['$end'] = ['$end']
   1131     First['#'] = ['#'] # what's this for?
   1132 
   1133     # Nonterminals:
   1134 
   1135     # Initialize to the empty set:
   1136     for n in Nonterminals.keys():
   1137         First[n] = []
   1138 
   1139     # Then propagate symbols until no change:
   1140     while 1:
   1141         some_change = 0
   1142         for n in Nonterminals.keys():
   1143             for p in Prodnames[n]:
   1144                 for f in first(p.prod):
   1145                     if f not in First[n]:
   1146                         First[n].append( f )
   1147                         some_change = 1
   1148         if not some_change:
   1149             break
   1150 
   1151     if 0 and yaccdebug:
   1152         _vf.write('\nFirst:\n')
   1153         for k in Nonterminals.keys():
   1154             _vf.write("%-20s : %s\n" %
   1155                 (k, " ".join([str(s) for s in First[k]])))
   1156 
   1157 # -----------------------------------------------------------------------------
   1158 #                           === SLR Generation ===
   1159 #
   1160 # The following functions are used to construct SLR (Simple LR) parsing tables
   1161 # as described on p.221-229 of the dragon book.
   1162 # -----------------------------------------------------------------------------
   1163 
   1164 # Global variables for the LR parsing engine
   1165 def lr_init_vars():
   1166     global _lr_action, _lr_goto, _lr_method
   1167     global _lr_goto_cache, _lr0_cidhash
   1168     
   1169     _lr_action       = { }        # Action table
   1170     _lr_goto         = { }        # Goto table
   1171     _lr_method       = "Unknown"  # LR method used
   1172     _lr_goto_cache   = { }
   1173     _lr0_cidhash     = { }
   1174 
   1175 
   1176 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
   1177 # prodlist is a list of productions.
   1178 
   1179 _add_count = 0       # Counter used to detect cycles
   1180 
   1181 def lr0_closure(I):
   1182     global _add_count
   1183     
   1184     _add_count += 1
   1185     prodlist = Productions
   1186     
   1187     # Add everything in I to J        
   1188     J = I[:]
   1189     didadd = 1
   1190     while didadd:
   1191         didadd = 0
   1192         for j in J:
   1193             for x in j.lrafter:
   1194                 if x.lr0_added == _add_count: continue
   1195                 # Add B --> .G to J
   1196                 J.append(x.lr_next)
   1197                 x.lr0_added = _add_count
   1198                 didadd = 1
   1199                
   1200     return J
   1201 
   1202 # Compute the LR(0) goto function goto(I,X) where I is a set
   1203 # of LR(0) items and X is a grammar symbol.   This function is written
   1204 # in a way that guarantees uniqueness of the generated goto sets
   1205 # (i.e. the same goto set will never be returned as two different Python
   1206 # objects).  With uniqueness, we can later do fast set comparisons using
   1207 # id(obj) instead of element-wise comparison.
   1208 
   1209 def lr0_goto(I,x):
   1210     # First we look for a previously cached entry
   1211     g = _lr_goto_cache.get((id(I),x),None)
   1212     if g: return g
   1213 
   1214     # Now we generate the goto set in a way that guarantees uniqueness
   1215     # of the result
   1216     
   1217     s = _lr_goto_cache.get(x,None)
   1218     if not s:
   1219         s = { }
   1220         _lr_goto_cache[x] = s
   1221 
   1222     gs = [ ]
   1223     for p in I:
   1224         n = p.lr_next
   1225         if n and n.lrbefore == x:
   1226             s1 = s.get(id(n),None)
   1227             if not s1:
   1228                 s1 = { }
   1229                 s[id(n)] = s1
   1230             gs.append(n)
   1231             s = s1
   1232     g = s.get('$end',None)
   1233     if not g:
   1234         if gs:
   1235             g = lr0_closure(gs)
   1236             s['$end'] = g
   1237         else:
   1238             s['$end'] = gs
   1239     _lr_goto_cache[(id(I),x)] = g
   1240     return g
   1241 
   1242 _lr0_cidhash = { }
   1243 
   1244 # Compute the LR(0) sets of item function
   1245 def lr0_items():
   1246     
   1247     C = [ lr0_closure([Productions[0].lr_next]) ]
   1248     i = 0
   1249     for I in C:
   1250         _lr0_cidhash[id(I)] = i
   1251         i += 1
   1252 
   1253     # Loop over the items in C and each grammar symbols
   1254     i = 0
   1255     while i < len(C):
   1256         I = C[i]
   1257         i += 1
   1258 
   1259         # Collect all of the symbols that could possibly be in the goto(I,X) sets
   1260         asyms = { }
   1261         for ii in I:
   1262             for s in ii.usyms:
   1263                 asyms[s] = None
   1264 
   1265         for x in asyms.keys():
   1266             g = lr0_goto(I,x)
   1267             if not g:  continue
   1268             if id(g) in _lr0_cidhash: continue
   1269             _lr0_cidhash[id(g)] = len(C)            
   1270             C.append(g)
   1271             
   1272     return C
   1273 
   1274 # -----------------------------------------------------------------------------
   1275 #                       ==== LALR(1) Parsing ====
   1276 #
   1277 # LALR(1) parsing is almost exactly the same as SLR except that instead of
   1278 # relying upon Follow() sets when performing reductions, a more selective
   1279 # lookahead set that incorporates the state of the LR(0) machine is utilized.
   1280 # Thus, we mainly just have to focus on calculating the lookahead sets.
   1281 #
   1282 # The method used here is due to DeRemer and Pennelo (1982).
   1283 #
   1284 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
   1285 #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
   1286 #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
   1287 #
   1288 # Further details can also be found in:
   1289 #
   1290 #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
   1291 #      McGraw-Hill Book Company, (1985).
   1292 #
   1293 # Note:  This implementation is a complete replacement of the LALR(1) 
   1294 #        implementation in PLY-1.x releases.   That version was based on
   1295 #        a less efficient algorithm and it had bugs in its implementation.
   1296 # -----------------------------------------------------------------------------
   1297 
   1298 # -----------------------------------------------------------------------------
   1299 # compute_nullable_nonterminals()
   1300 #
   1301 # Creates a dictionary containing all of the non-terminals that might produce
   1302 # an empty production.   
   1303 # -----------------------------------------------------------------------------
   1304 
   1305 def compute_nullable_nonterminals():
   1306     nullable = {}
   1307     num_nullable = 0
   1308     while 1:
   1309        for p in Productions[1:]:
   1310            if p.len == 0:
   1311                 nullable[p.name] = 1
   1312                 continue
   1313            for t in p.prod:
   1314                 if t not in nullable: break
   1315            else:
   1316                 nullable[p.name] = 1
   1317        if len(nullable) == num_nullable: break
   1318        num_nullable = len(nullable)
   1319     return nullable
   1320 
   1321 # -----------------------------------------------------------------------------
   1322 # find_nonterminal_trans(C)
   1323 #
   1324 # Given a set of LR(0) items, this functions finds all of the non-terminal
   1325 # transitions.    These are transitions in which a dot appears immediately before
   1326 # a non-terminal.   Returns a list of tuples of the form (state,N) where state
   1327 # is the state number and N is the nonterminal symbol.
   1328 #
   1329 # The input C is the set of LR(0) items.
   1330 # -----------------------------------------------------------------------------
   1331 
   1332 def find_nonterminal_transitions(C):
   1333      trans = []
   1334      for state in range(len(C)):
   1335          for p in C[state]:
   1336              if p.lr_index < p.len - 1:
   1337                   t = (state,p.prod[p.lr_index+1])
   1338                   if t[1] in Nonterminals:
   1339                         if t not in trans: trans.append(t)
   1340          state = state + 1
   1341      return trans
   1342 
   1343 # -----------------------------------------------------------------------------
   1344 # dr_relation()
   1345 #
   1346 # Computes the DR(p,A) relationships for non-terminal transitions.  The input
   1347 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
   1348 #
   1349 # Returns a list of terminals.
   1350 # -----------------------------------------------------------------------------
   1351 
   1352 def dr_relation(C,trans,nullable):
   1353     dr_set = { }
   1354     state,N = trans
   1355     terms = []
   1356 
   1357     g = lr0_goto(C[state],N)
   1358     for p in g:
   1359        if p.lr_index < p.len - 1:
   1360            a = p.prod[p.lr_index+1]
   1361            if a in Terminals:
   1362                if a not in terms: terms.append(a)
   1363 
   1364     # This extra bit is to handle the start state
   1365     if state == 0 and N == Productions[0].prod[0]:
   1366        terms.append('$end')
   1367  
   1368     return terms
   1369 
   1370 # -----------------------------------------------------------------------------
   1371 # reads_relation()
   1372 #
   1373 # Computes the READS() relation (p,A) READS (t,C).
   1374 # -----------------------------------------------------------------------------
   1375 
   1376 def reads_relation(C, trans, empty):
   1377     # Look for empty transitions
   1378     rel = []
   1379     state, N = trans
   1380 
   1381     g = lr0_goto(C[state],N)
   1382     j = _lr0_cidhash.get(id(g),-1)
   1383     for p in g:
   1384         if p.lr_index < p.len - 1:
   1385              a = p.prod[p.lr_index + 1]
   1386              if a in empty:
   1387                   rel.append((j,a))
   1388 
   1389     return rel
   1390 
   1391 # -----------------------------------------------------------------------------
   1392 # compute_lookback_includes()
   1393 #
   1394 # Determines the lookback and includes relations
   1395 #
   1396 # LOOKBACK:
   1397 # 
   1398 # This relation is determined by running the LR(0) state machine forward.
   1399 # For example, starting with a production "N : . A B C", we run it forward
   1400 # to obtain "N : A B C ."   We then build a relationship between this final
   1401 # state and the starting state.   These relationships are stored in a dictionary
   1402 # lookdict.   
   1403 #
   1404 # INCLUDES:
   1405 #
   1406 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).   
   1407 #
   1408 # This relation is used to determine non-terminal transitions that occur
   1409 # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
   1410 # if the following holds:
   1411 #
   1412 #       B -> LAT, where T -> epsilon and p' -L-> p 
   1413 #
   1414 # L is essentially a prefix (which may be empty), T is a suffix that must be
   1415 # able to derive an empty string.  State p' must lead to state p with the string L.
   1416 # 
   1417 # -----------------------------------------------------------------------------
   1418 
   1419 def compute_lookback_includes(C,trans,nullable):
   1420     
   1421     lookdict = {}          # Dictionary of lookback relations
   1422     includedict = {}       # Dictionary of include relations
   1423 
   1424     # Make a dictionary of non-terminal transitions
   1425     dtrans = {}
   1426     for t in trans:
   1427         dtrans[t] = 1
   1428     
   1429     # Loop over all transitions and compute lookbacks and includes
   1430     for state,N in trans:
   1431         lookb = []
   1432         includes = []
   1433         for p in C[state]:
   1434             if p.name != N: continue
   1435         
   1436             # Okay, we have a name match.  We now follow the production all the way
   1437             # through the state machine until we get the . on the right hand side
   1438 
   1439             lr_index = p.lr_index
   1440             j = state
   1441             while lr_index < p.len - 1:
   1442                  lr_index = lr_index + 1
   1443                  t = p.prod[lr_index]
   1444 
   1445                  # Check to see if this symbol and state are a non-terminal transition
   1446                  if (j,t) in dtrans:
   1447                        # Yes.  Okay, there is some chance that this is an includes relation
   1448                        # the only way to know for certain is whether the rest of the 
   1449                        # production derives empty
   1450 
   1451                        li = lr_index + 1
   1452                        while li < p.len:
   1453                             if p.prod[li] in Terminals: break      # No forget it
   1454                             if p.prod[li] not in nullable: break
   1455                             li = li + 1
   1456                        else:
   1457                             # Appears to be a relation between (j,t) and (state,N)
   1458                             includes.append((j,t))
   1459 
   1460                  g = lr0_goto(C[j],t)               # Go to next set             
   1461                  j = _lr0_cidhash.get(id(g),-1)     # Go to next state
   1462              
   1463             # When we get here, j is the final state, now we have to locate the production
   1464             for r in C[j]:
   1465                  if r.name != p.name: continue
   1466                  if r.len != p.len:   continue
   1467                  i = 0
   1468                  # This look is comparing a production ". A B C" with "A B C ."
   1469                  while i < r.lr_index:
   1470                       if r.prod[i] != p.prod[i+1]: break
   1471                       i = i + 1
   1472                  else:
   1473                       lookb.append((j,r))
   1474         for i in includes:
   1475              if i not in includedict: includedict[i] = []
   1476              includedict[i].append((state,N))
   1477         lookdict[(state,N)] = lookb
   1478 
   1479     return lookdict,includedict
   1480 
   1481 # -----------------------------------------------------------------------------
   1482 # digraph()
   1483 # traverse()
   1484 #
   1485 # The following two functions are used to compute set valued functions
   1486 # of the form:
   1487 #
   1488 #     F(x) = F'(x) U U{F(y) | x R y}
   1489 #
   1490 # This is used to compute the values of Read() sets as well as FOLLOW sets
   1491 # in LALR(1) generation.
   1492 #
   1493 # Inputs:  X    - An input set
   1494 #          R    - A relation
   1495 #          FP   - Set-valued function
   1496 # ------------------------------------------------------------------------------
   1497 
   1498 def digraph(X,R,FP):
   1499     N = { }
   1500     for x in X:
   1501        N[x] = 0
   1502     stack = []
   1503     F = { }
   1504     for x in X:
   1505         if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
   1506     return F
   1507 
   1508 def traverse(x,N,stack,F,X,R,FP):
   1509     stack.append(x)
   1510     d = len(stack)
   1511     N[x] = d
   1512     F[x] = FP(x)             # F(X) <- F'(x)
   1513     
   1514     rel = R(x)               # Get y's related to x
   1515     for y in rel:
   1516         if N[y] == 0:
   1517              traverse(y,N,stack,F,X,R,FP)
   1518         N[x] = min(N[x],N[y])
   1519         for a in F.get(y,[]):
   1520             if a not in F[x]: F[x].append(a)
   1521     if N[x] == d:
   1522        N[stack[-1]] = sys.maxsize
   1523        F[stack[-1]] = F[x]
   1524        element = stack.pop()
   1525        while element != x:
   1526            N[stack[-1]] = sys.maxsize
   1527            F[stack[-1]] = F[x]
   1528            element = stack.pop()
   1529 
   1530 # -----------------------------------------------------------------------------
   1531 # compute_read_sets()
   1532 #
   1533 # Given a set of LR(0) items, this function computes the read sets.
   1534 #
   1535 # Inputs:  C        =  Set of LR(0) items
   1536 #          ntrans   = Set of nonterminal transitions
   1537 #          nullable = Set of empty transitions
   1538 #
   1539 # Returns a set containing the read sets
   1540 # -----------------------------------------------------------------------------
   1541 
   1542 def compute_read_sets(C, ntrans, nullable):
   1543     FP = lambda x: dr_relation(C,x,nullable)
   1544     R =  lambda x: reads_relation(C,x,nullable)
   1545     F = digraph(ntrans,R,FP)
   1546     return F
   1547 
   1548 # -----------------------------------------------------------------------------
   1549 # compute_follow_sets()
   1550 #
   1551 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 
   1552 # and an include set, this function computes the follow sets
   1553 #
   1554 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
   1555 #
   1556 # Inputs:    
   1557 #            ntrans     = Set of nonterminal transitions
   1558 #            readsets   = Readset (previously computed)
   1559 #            inclsets   = Include sets (previously computed)
   1560 #
   1561 # Returns a set containing the follow sets      
   1562 # -----------------------------------------------------------------------------
   1563 
   1564 def compute_follow_sets(ntrans,readsets,inclsets):
   1565      FP = lambda x: readsets[x]
   1566      R  = lambda x: inclsets.get(x,[])
   1567      F = digraph(ntrans,R,FP)
   1568      return F
   1569 
   1570 # -----------------------------------------------------------------------------
   1571 # add_lookaheads()
   1572 #
   1573 # Attaches the lookahead symbols to grammar rules. 
   1574 #
   1575 # Inputs:    lookbacks         -  Set of lookback relations
   1576 #            followset         -  Computed follow set
   1577 #
   1578 # This function directly attaches the lookaheads to productions contained
   1579 # in the lookbacks set
   1580 # -----------------------------------------------------------------------------
   1581 
   1582 def add_lookaheads(lookbacks,followset):
   1583     for trans,lb in lookbacks.items():
   1584         # Loop over productions in lookback
   1585         for state,p in lb:
   1586              if state not in p.lookaheads:
   1587                   p.lookaheads[state] = []
   1588              f = followset.get(trans,[])
   1589              for a in f:
   1590                   if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
   1591 
   1592 # -----------------------------------------------------------------------------
   1593 # add_lalr_lookaheads()
   1594 #
   1595 # This function does all of the work of adding lookahead information for use
   1596 # with LALR parsing
   1597 # -----------------------------------------------------------------------------
   1598 
   1599 def add_lalr_lookaheads(C):
   1600     # Determine all of the nullable nonterminals
   1601     nullable = compute_nullable_nonterminals()
   1602 
   1603     # Find all non-terminal transitions
   1604     trans = find_nonterminal_transitions(C)
   1605 
   1606     # Compute read sets
   1607     readsets = compute_read_sets(C,trans,nullable)
   1608 
   1609     # Compute lookback/includes relations
   1610     lookd, included = compute_lookback_includes(C,trans,nullable)
   1611 
   1612     # Compute LALR FOLLOW sets
   1613     followsets = compute_follow_sets(trans,readsets,included)
   1614     
   1615     # Add all of the lookaheads
   1616     add_lookaheads(lookd,followsets)
   1617 
   1618 # -----------------------------------------------------------------------------
   1619 # lr_parse_table()
   1620 #
   1621 # This function constructs the parse tables for SLR or LALR
   1622 # -----------------------------------------------------------------------------
   1623 def lr_parse_table(method):
   1624     global _lr_method
   1625     goto = _lr_goto           # Goto array
   1626     action = _lr_action       # Action array
   1627     actionp = { }             # Action production array (temporary)
   1628 
   1629     _lr_method = method
   1630     
   1631     n_srconflict = 0
   1632     n_rrconflict = 0
   1633 
   1634     if yaccdebug:
   1635         sys.stderr.write("yacc: Generating %s parsing table...\n" % method)        
   1636         _vf.write("\n\nParsing method: %s\n\n" % method)
   1637         
   1638     # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
   1639     # This determines the number of states
   1640     
   1641     C = lr0_items()
   1642 
   1643     if method == 'LALR':
   1644         add_lalr_lookaheads(C)
   1645 
   1646     # Build the parser table, state by state
   1647     st = 0
   1648     for I in C:
   1649         # Loop over each production in I
   1650         actlist = [ ]              # List of actions
   1651         
   1652         if yaccdebug:
   1653             _vf.write("\nstate %d\n\n" % st)
   1654             for p in I:
   1655                 _vf.write("    (%d) %s\n" % (p.number, str(p)))
   1656             _vf.write("\n")
   1657 
   1658         for p in I:
   1659             try:
   1660                 if p.prod[-1] == ".":
   1661                     if p.name == "S'":
   1662                         # Start symbol. Accept!
   1663                         action[st,"$end"] = 0
   1664                         actionp[st,"$end"] = p
   1665                     else:
   1666                         # We are at the end of a production.  Reduce!
   1667                         if method == 'LALR':
   1668                             laheads = p.lookaheads[st]
   1669                         else:
   1670                             laheads = Follow[p.name]
   1671                         for a in laheads:
   1672                             actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
   1673                             r = action.get((st,a),None)
   1674                             if r is not None:
   1675                                 # Whoa. Have a shift/reduce or reduce/reduce conflict
   1676                                 if r > 0:
   1677                                     # Need to decide on shift or reduce here
   1678                                     # By default we favor shifting. Need to add
   1679                                     # some precedence rules here.
   1680                                     sprec,slevel = Productions[actionp[st,a].number].prec                                    
   1681                                     rprec,rlevel = Precedence.get(a,('right',0))
   1682                                     if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
   1683                                         # We really need to reduce here.  
   1684                                         action[st,a] = -p.number
   1685                                         actionp[st,a] = p
   1686                                         if not slevel and not rlevel:
   1687                                             _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
   1688                                             _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
   1689                                             n_srconflict += 1
   1690                                     elif (slevel == rlevel) and (rprec == 'nonassoc'):
   1691                                         action[st,a] = None
   1692                                     else:
   1693                                         # Hmmm. Guess we'll keep the shift
   1694                                         if not rlevel:
   1695                                             _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
   1696                                             _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
   1697                                             n_srconflict +=1                                    
   1698                                 elif r < 0:
   1699                                     # Reduce/reduce conflict.   In this case, we favor the rule
   1700                                     # that was defined first in the grammar file
   1701                                     oldp = Productions[-r]
   1702                                     pp = Productions[p.number]
   1703                                     if oldp.line > pp.line:
   1704                                         action[st,a] = -p.number
   1705                                         actionp[st,a] = p
   1706                                     # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
   1707                                     n_rrconflict += 1
   1708                                     _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
   1709                                     _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
   1710                                 else:
   1711                                     sys.stderr.write("Unknown conflict in state %d\n" % st)
   1712                             else:
   1713                                 action[st,a] = -p.number
   1714                                 actionp[st,a] = p
   1715                 else:
   1716                     i = p.lr_index
   1717                     a = p.prod[i+1]       # Get symbol right after the "."
   1718                     if a in Terminals:
   1719                         g = lr0_goto(I,a)
   1720                         j = _lr0_cidhash.get(id(g),-1)
   1721                         if j >= 0:
   1722                             # We are in a shift state
   1723                             actlist.append((a,p,"shift and go to state %d" % j))
   1724                             r = action.get((st,a),None)
   1725                             if r is not None:
   1726                                 # Whoa have a shift/reduce or shift/shift conflict
   1727                                 if r > 0:
   1728                                     if r != j:
   1729                                         sys.stderr.write("Shift/shift conflict in state %d\n" % st)
   1730                                 elif r < 0:
   1731                                     # Do a precedence check.
   1732                                     #   -  if precedence of reduce rule is higher, we reduce.
   1733                                     #   -  if precedence of reduce is same and left assoc, we reduce.
   1734                                     #   -  otherwise we shift
   1735                                     rprec,rlevel = Productions[actionp[st,a].number].prec
   1736                                     sprec,slevel = Precedence.get(a,('right',0))
   1737                                     if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
   1738                                         # We decide to shift here... highest precedence to shift
   1739                                         action[st,a] = j
   1740                                         actionp[st,a] = p
   1741                                         if not rlevel:
   1742                                             n_srconflict += 1
   1743                                             _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
   1744                                             _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
   1745                                     elif (slevel == rlevel) and (rprec == 'nonassoc'):
   1746                                         action[st,a] = None
   1747                                     else:                                            
   1748                                         # Hmmm. Guess we'll keep the reduce
   1749                                         if not slevel and not rlevel:
   1750                                             n_srconflict +=1
   1751                                             _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
   1752                                             _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
   1753                                             
   1754                                 else:
   1755                                     sys.stderr.write("Unknown conflict in state %d\n" % st)
   1756                             else:
   1757                                 action[st,a] = j
   1758                                 actionp[st,a] = p
   1759                                 
   1760             except Exception as e:
   1761                 raise YaccError("Hosed in lr_parse_table").with_traceback(e)
   1762 
   1763         # Print the actions associated with each terminal
   1764         if yaccdebug:
   1765           _actprint = { }
   1766           for a,p,m in actlist:
   1767             if (st,a) in action:
   1768                 if p is actionp[st,a]:
   1769                     _vf.write("    %-15s %s\n" % (a,m))
   1770                     _actprint[(a,m)] = 1
   1771           _vf.write("\n")
   1772           for a,p,m in actlist:
   1773             if (st,a) in action:
   1774                 if p is not actionp[st,a]:
   1775                     if (a,m) not in _actprint:
   1776                         _vf.write("  ! %-15s [ %s ]\n" % (a,m))
   1777                         _actprint[(a,m)] = 1
   1778             
   1779         # Construct the goto table for this state
   1780         if yaccdebug:
   1781             _vf.write("\n")
   1782         nkeys = { }
   1783         for ii in I:
   1784             for s in ii.usyms:
   1785                 if s in Nonterminals:
   1786                     nkeys[s] = None
   1787         for n in nkeys.keys():
   1788             g = lr0_goto(I,n)
   1789             j = _lr0_cidhash.get(id(g),-1)            
   1790             if j >= 0:
   1791                 goto[st,n] = j
   1792                 if yaccdebug:
   1793                     _vf.write("    %-30s shift and go to state %d\n" % (n,j))
   1794 
   1795         st += 1
   1796 
   1797     if yaccdebug:
   1798         if n_srconflict == 1:
   1799             sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
   1800         if n_srconflict > 1:
   1801             sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
   1802         if n_rrconflict == 1:
   1803             sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
   1804         if n_rrconflict > 1:
   1805             sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
   1806 
   1807 # -----------------------------------------------------------------------------
   1808 #                          ==== LR Utility functions ====
   1809 # -----------------------------------------------------------------------------
   1810 
   1811 # -----------------------------------------------------------------------------
   1812 # _lr_write_tables()
   1813 #
   1814 # This function writes the LR parsing tables to a file
   1815 # -----------------------------------------------------------------------------
   1816 
   1817 def lr_write_tables(modulename=tab_module,outputdir=''):
   1818     filename = os.path.join(outputdir,modulename) + ".py"
   1819     try:
   1820         f = open(filename,"w")
   1821 
   1822         f.write("""
   1823 # %s
   1824 # This file is automatically generated. Do not edit.
   1825 
   1826 _lr_method = %s
   1827 
   1828 _lr_signature = %s
   1829 """ % (filename, repr(_lr_method), repr(Signature.digest())))
   1830 
   1831         # Change smaller to 0 to go back to original tables
   1832         smaller = 1
   1833                 
   1834         # Factor out names to try and make smaller
   1835         if smaller:
   1836             items = { }
   1837         
   1838             for k,v in _lr_action.items():
   1839                 i = items.get(k[1])
   1840                 if not i:
   1841                     i = ([],[])
   1842                     items[k[1]] = i
   1843                 i[0].append(k[0])
   1844                 i[1].append(v)
   1845 
   1846             f.write("\n_lr_action_items = {")
   1847             for k,v in items.items():
   1848                 f.write("%r:([" % k)
   1849                 for i in v[0]:
   1850                     f.write("%r," % i)
   1851                 f.write("],[")
   1852                 for i in v[1]:
   1853                     f.write("%r," % i)
   1854                            
   1855                 f.write("]),")
   1856             f.write("}\n")
   1857 
   1858             f.write("""
   1859 _lr_action = { }
   1860 for _k, _v in _lr_action_items.items():
   1861    for _x,_y in zip(_v[0],_v[1]):
   1862        _lr_action[(_x,_k)] = _y
   1863 del _lr_action_items
   1864 """)
   1865             
   1866         else:
   1867             f.write("\n_lr_action = { ");
   1868             for k,v in _lr_action.items():
   1869                 f.write("(%r,%r):%r," % (k[0],k[1],v))
   1870             f.write("}\n");
   1871 
   1872         if smaller:
   1873             # Factor out names to try and make smaller
   1874             items = { }
   1875         
   1876             for k,v in _lr_goto.items():
   1877                 i = items.get(k[1])
   1878                 if not i:
   1879                     i = ([],[])
   1880                     items[k[1]] = i
   1881                 i[0].append(k[0])
   1882                 i[1].append(v)
   1883 
   1884             f.write("\n_lr_goto_items = {")
   1885             for k,v in items.items():
   1886                 f.write("%r:([" % k)
   1887                 for i in v[0]:
   1888                     f.write("%r," % i)
   1889                 f.write("],[")
   1890                 for i in v[1]:
   1891                     f.write("%r," % i)
   1892                            
   1893                 f.write("]),")
   1894             f.write("}\n")
   1895 
   1896             f.write("""
   1897 _lr_goto = { }
   1898 for _k, _v in _lr_goto_items.items():
   1899    for _x,_y in zip(_v[0],_v[1]):
   1900        _lr_goto[(_x,_k)] = _y
   1901 del _lr_goto_items
   1902 """)
   1903         else:
   1904             f.write("\n_lr_goto = { ");
   1905             for k,v in _lr_goto.items():
   1906                 f.write("(%r,%r):%r," % (k[0],k[1],v))                    
   1907             f.write("}\n");
   1908 
   1909         # Write production table
   1910         f.write("_lr_productions = [\n")
   1911         for p in Productions:
   1912             if p:
   1913                 if (p.func):
   1914                     f.write("  (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
   1915                 else:
   1916                     f.write("  (%r,%d,None,None,None),\n" % (p.name, p.len))
   1917             else:
   1918                 f.write("  None,\n")
   1919         f.write("]\n")
   1920         
   1921         f.close()
   1922 
   1923     except IOError as e:
   1924         print("Unable to create '%s'" % filename)
   1925         print(e)
   1926 
   1927 def lr_read_tables(module=tab_module,optimize=0):
   1928     global _lr_action, _lr_goto, _lr_productions, _lr_method
   1929     try:
   1930         exec("import %s as parsetab" % module)
   1931         
   1932         if (optimize) or (Signature.digest() == parsetab._lr_signature):
   1933             _lr_action = parsetab._lr_action
   1934             _lr_goto   = parsetab._lr_goto
   1935             _lr_productions = parsetab._lr_productions
   1936             _lr_method = parsetab._lr_method
   1937             return 1
   1938         else:
   1939             return 0
   1940         
   1941     except (ImportError,AttributeError):
   1942         return 0
   1943 
   1944 
   1945 # Available instance types.  This is used when parsers are defined by a class.
   1946 # In Python3 the InstanceType and ObjectType are no more, they've passed, ceased
   1947 # to be, they are ex-classes along with old-style classes
   1948 
   1949 try:
   1950    _INSTANCETYPE = (types.InstanceType, types.ObjectType)
   1951 except AttributeError:
   1952    _INSTANCETYPE = object
   1953 
   1954 # -----------------------------------------------------------------------------
   1955 # yacc(module)
   1956 #
   1957 # Build the parser module
   1958 # -----------------------------------------------------------------------------
   1959 
   1960 def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
   1961     global yaccdebug
   1962     yaccdebug = debug
   1963     
   1964     initialize_vars()
   1965     files = { }
   1966     error = 0
   1967 
   1968 
   1969     # Add parsing method to signature
   1970     Signature.update(util.encode_input(method))
   1971     
   1972     # If a "module" parameter was supplied, extract its dictionary.
   1973     # Note: a module may in fact be an instance as well.
   1974     
   1975     if module:
   1976         # User supplied a module object.
   1977         if isinstance(module, types.ModuleType):
   1978             ldict = module.__dict__
   1979         elif isinstance(module, _INSTANCETYPE):
   1980             _items = [(k,getattr(module,k)) for k in dir(module)]
   1981             ldict = { }
   1982             for i in _items:
   1983                 ldict[i[0]] = i[1]
   1984         else:
   1985             raise ValueError("Expected a module")
   1986         
   1987     else:
   1988         # No module given.  We might be able to get information from the caller.
   1989         # Throw an exception and unwind the traceback to get the globals
   1990         
   1991         try:
   1992             raise RuntimeError
   1993         except RuntimeError:
   1994             e,b,t = sys.exc_info()
   1995             f = t.tb_frame
   1996             f = f.f_back           # Walk out to our calling function
   1997             ldict = f.f_globals    # Grab its globals dictionary
   1998 
   1999     # Add starting symbol to signature
   2000     if not start:
   2001         start = ldict.get("start",None)
   2002     if start:
   2003         Signature.update(util.encode_input(start))
   2004 
   2005     # If running in optimized mode.  We're going to
   2006 
   2007     if (optimize and lr_read_tables(tabmodule,1)):
   2008         # Read parse table
   2009         del Productions[:]
   2010         for p in _lr_productions:
   2011             if not p:
   2012                 Productions.append(None)
   2013             else:
   2014                 m = MiniProduction()
   2015                 m.name = p[0]
   2016                 m.len  = p[1]
   2017                 m.file = p[3]
   2018                 m.line = p[4]
   2019                 if p[2]:
   2020                     m.func = ldict[p[2]]
   2021                 Productions.append(m)
   2022         
   2023     else:
   2024         # Get the tokens map
   2025         if (module and isinstance(module,_INSTANCETYPE)):
   2026             tokens = getattr(module,"tokens",None)
   2027         else:
   2028             tokens = ldict.get("tokens",None)
   2029     
   2030         if not tokens:
   2031             raise YaccError("module does not define a list 'tokens'")
   2032         if not (isinstance(tokens,list) or isinstance(tokens,tuple)):
   2033             raise YaccError("tokens must be a list or tuple.")
   2034 
   2035         # Check to see if a requires dictionary is defined.
   2036         requires = ldict.get("require",None)
   2037         if requires:
   2038             if not (isinstance(requires,dict)):
   2039                 raise YaccError("require must be a dictionary.")
   2040 
   2041             for r,v in requires.items():
   2042                 try:
   2043                     if not (isinstance(v,list)):
   2044                         raise TypeError
   2045                     v1 = [x.split(".") for x in v]
   2046                     Requires[r] = v1
   2047                 except Exception:
   2048                     print("Invalid specification for rule '%s' in require. Expected a list of strings" % r)
   2049 
   2050         
   2051         # Build the dictionary of terminals.  We a record a 0 in the
   2052         # dictionary to track whether or not a terminal is actually
   2053         # used in the grammar
   2054 
   2055         if 'error' in tokens:
   2056             print("yacc: Illegal token 'error'.  Is a reserved word.")
   2057             raise YaccError("Illegal token name")
   2058 
   2059         for n in tokens:
   2060             if n in Terminals:
   2061                 print("yacc: Warning. Token '%s' multiply defined." % n)
   2062             Terminals[n] = [ ]
   2063 
   2064         Terminals['error'] = [ ]
   2065 
   2066         # Get the precedence map (if any)
   2067         prec = ldict.get("precedence",None)
   2068         if prec:
   2069             if not (isinstance(prec,list) or isinstance(prec,tuple)):
   2070                 raise YaccError("precedence must be a list or tuple.")
   2071             add_precedence(prec)
   2072             Signature.update(util.encode_input(repr(prec)))
   2073 
   2074         for n in tokens:
   2075             if n not in Precedence:
   2076                 Precedence[n] = ('right',0)         # Default, right associative, 0 precedence
   2077 
   2078         # Look for error handler
   2079         ef = ldict.get('p_error',None)
   2080         if ef:
   2081             if isinstance(ef,types.FunctionType):
   2082                 ismethod = 0
   2083             elif isinstance(ef, types.MethodType):
   2084                 ismethod = 1
   2085             else:
   2086                 raise YaccError("'p_error' defined, but is not a function or method.")
   2087             eline = ef.__code__.co_firstlineno
   2088             efile = ef.__code__.co_filename
   2089             files[efile] = None
   2090 
   2091             if (ef.__code__.co_argcount != 1+ismethod):
   2092                 raise YaccError("%s:%d: p_error() requires 1 argument." % (efile,eline))
   2093             global Errorfunc
   2094             Errorfunc = ef
   2095         else:
   2096             print("yacc: Warning. no p_error() function is defined.")
   2097             
   2098         # Get the list of built-in functions with p_ prefix
   2099         symbols = [ldict[f] for f in ldict.keys()
   2100                if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_'
   2101                    and ldict[f].__name__ != 'p_error')]
   2102 
   2103         # Check for non-empty symbols
   2104         if len(symbols) == 0:
   2105             raise YaccError("no rules of the form p_rulename are defined.")
   2106     
   2107         # Sort the symbols by line number
   2108         symbols.sort(key=lambda x: x.__code__.co_firstlineno)
   2109 
   2110         # Add all of the symbols to the grammar
   2111         for f in symbols:
   2112             if (add_function(f)) < 0:
   2113                 error += 1
   2114             else:
   2115                 files[f.__code__.co_filename] = None
   2116 
   2117         # Make a signature of the docstrings
   2118         for f in symbols:
   2119             if f.__doc__:
   2120                 Signature.update(util.encode_input(f.__doc__))
   2121     
   2122         lr_init_vars()
   2123 
   2124         if error:
   2125             raise YaccError("Unable to construct parser.")
   2126 
   2127         if not lr_read_tables(tabmodule):
   2128 
   2129             # Validate files
   2130             for filename in files.keys():
   2131                 if not validate_file(filename):
   2132                     error = 1
   2133 
   2134             # Validate dictionary
   2135             validate_dict(ldict)
   2136 
   2137             if start and start not in Prodnames:
   2138                 raise YaccError("Bad starting symbol '%s'" % start)
   2139         
   2140             augment_grammar(start)    
   2141             error = verify_productions(cycle_check=check_recursion)
   2142             otherfunc = [ldict[f] for f in ldict.keys()
   2143                if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
   2144 
   2145             if error:
   2146                 raise YaccError("Unable to construct parser.")
   2147             
   2148             build_lritems()
   2149             compute_first1()
   2150             compute_follow(start)
   2151         
   2152             if method in ['SLR','LALR']:
   2153                 lr_parse_table(method)
   2154             else:
   2155                 raise YaccError("Unknown parsing method '%s'" % method)
   2156 
   2157             if write_tables:
   2158                 lr_write_tables(tabmodule,outputdir)        
   2159     
   2160             if yaccdebug:
   2161                 try:
   2162                     f = open(os.path.join(outputdir,debugfile),"w")
   2163                     f.write(_vfc.getvalue())
   2164                     f.write("\n\n")
   2165                     f.write(_vf.getvalue())
   2166                     f.close()
   2167                 except IOError as e:
   2168                     print("yacc: can't create '%s'" % debugfile,e)
   2169         
   2170     # Made it here.   Create a parser object and set up its internal state.
   2171     # Set global parse() method to bound method of parser object.
   2172 
   2173     p = Parser("xyzzy")
   2174     p.productions = Productions
   2175     p.errorfunc = Errorfunc
   2176     p.action = _lr_action
   2177     p.goto   = _lr_goto
   2178     p.method = _lr_method
   2179     p.require = Requires
   2180 
   2181     global parse
   2182     parse = p.parse
   2183 
   2184     global parser
   2185     parser = p
   2186 
   2187     # Clean up all of the globals we created
   2188     if (not optimize):
   2189         yacc_cleanup()
   2190     return p
   2191 
   2192 # yacc_cleanup function.  Delete all of the global variables
   2193 # used during table construction
   2194 
   2195 def yacc_cleanup():
   2196     global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
   2197     del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
   2198 
   2199     global Productions, Prodnames, Prodmap, Terminals 
   2200     global Nonterminals, First, Follow, Precedence, LRitems
   2201     global Errorfunc, Signature, Requires
   2202     
   2203     del Productions, Prodnames, Prodmap, Terminals
   2204     del Nonterminals, First, Follow, Precedence, LRitems
   2205     del Errorfunc, Signature, Requires
   2206     
   2207     global _vf, _vfc
   2208     del _vf, _vfc
   2209     
   2210     
   2211 # Stub that raises an error if parsing is attempted without first calling yacc()
   2212 def parse(*args,**kwargs):
   2213     raise YaccError("yacc: No parser built with yacc()")
   2214 
   2215