1 # ----------------------------------------------------------------------------- 2 # ply: yacc.py 3 # 4 # Copyright (C) 2001-2011, 5 # David M. Beazley (Dabeaz LLC) 6 # All rights reserved. 7 # 8 # Redistribution and use in source and binary forms, with or without 9 # modification, are permitted provided that the following conditions are 10 # met: 11 # 12 # * Redistributions of source code must retain the above copyright notice, 13 # this list of conditions and the following disclaimer. 14 # * Redistributions in binary form must reproduce the above copyright notice, 15 # this list of conditions and the following disclaimer in the documentation 16 # and/or other materials provided with the distribution. 17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to 18 # endorse or promote products derived from this software without 19 # specific prior written permission. 20 # 21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 # ----------------------------------------------------------------------------- 33 # 34 # This implements an LR parser that is constructed from grammar rules defined 35 # as Python functions. The grammer is specified by supplying the BNF inside 36 # Python documentation strings. The inspiration for this technique was borrowed 37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between 38 # Spark and the GNU bison utility. 39 # 40 # The current implementation is only somewhat object-oriented. The 41 # LR parser itself is defined in terms of an object (which allows multiple 42 # parsers to co-exist). However, most of the variables used during table 43 # construction are defined in terms of global variables. Users shouldn't 44 # notice unless they are trying to define multiple parsers at the same 45 # time using threads (in which case they should have their head examined). 46 # 47 # This implementation supports both SLR and LALR(1) parsing. LALR(1) 48 # support was originally implemented by Elias Ioup (ezioup (at] alumni.uchicago.edu), 49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51 # by the more efficient DeRemer and Pennello algorithm. 52 # 53 # :::::::: WARNING ::::::: 54 # 55 # Construction of LR parsing tables is fairly complicated and expensive. 56 # To make this module run fast, a *LOT* of work has been put into 57 # optimization---often at the expensive of readability and what might 58 # consider to be good Python "coding style." Modify the code at your 59 # own risk! 60 # ---------------------------------------------------------------------------- 61 62 __version__ = "3.4" 63 __tabversion__ = "3.2" # Table version 64 65 #----------------------------------------------------------------------------- 66 # === User configurable parameters === 67 # 68 # Change these to modify the default behavior of yacc (if you wish) 69 #----------------------------------------------------------------------------- 70 71 yaccdebug = 1 # Debugging mode. If set, yacc generates a 72 # a 'parser.out' file in the current directory 73 74 debug_file = 'parser.out' # Default name of the debugging file 75 tab_module = 'parsetab' # Default name of the table module 76 default_lr = 'LALR' # Default LR table generation method 77 78 error_count = 3 # Number of symbols that must be shifted to leave recovery mode 79 80 yaccdevel = 0 # Set to True if developing yacc. This turns off optimized 81 # implementations of certain functions. 82 83 resultlimit = 40 # Size limit of results when running in debug mode. 84 85 pickle_protocol = 0 # Protocol to use when writing pickle files 86 87 import re, types, sys, os.path 88 89 # Compatibility function for python 2.6/3.0 90 if sys.version_info[0] < 3: 91 def func_code(f): 92 return f.func_code 93 else: 94 def func_code(f): 95 return f.__code__ 96 97 # Compatibility 98 try: 99 MAXINT = sys.maxint 100 except AttributeError: 101 MAXINT = sys.maxsize 102 103 # Python 2.x/3.0 compatibility. 104 def load_ply_lex(): 105 if sys.version_info[0] < 3: 106 import lex 107 else: 108 import ply.lex as lex 109 return lex 110 111 # This object is a stand-in for a logging object created by the 112 # logging module. PLY will use this by default to create things 113 # such as the parser.out file. If a user wants more detailed 114 # information, they can create their own logging object and pass 115 # it into PLY. 116 117 class PlyLogger(object): 118 def __init__(self,f): 119 self.f = f 120 def debug(self,msg,*args,**kwargs): 121 self.f.write((msg % args) + "\n") 122 info = debug 123 124 def warning(self,msg,*args,**kwargs): 125 self.f.write("WARNING: "+ (msg % args) + "\n") 126 127 def error(self,msg,*args,**kwargs): 128 self.f.write("ERROR: " + (msg % args) + "\n") 129 130 critical = debug 131 132 # Null logger is used when no output is generated. Does nothing. 133 class NullLogger(object): 134 def __getattribute__(self,name): 135 return self 136 def __call__(self,*args,**kwargs): 137 return self 138 139 # Exception raised for yacc-related errors 140 class YaccError(Exception): pass 141 142 # Format the result message that the parser produces when running in debug mode. 143 def format_result(r): 144 repr_str = repr(r) 145 if '\n' in repr_str: repr_str = repr(repr_str) 146 if len(repr_str) > resultlimit: 147 repr_str = repr_str[:resultlimit]+" ..." 148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 149 return result 150 151 152 # Format stack entries when the parser is running in debug mode 153 def format_stack_entry(r): 154 repr_str = repr(r) 155 if '\n' in repr_str: repr_str = repr(repr_str) 156 if len(repr_str) < 16: 157 return repr_str 158 else: 159 return "<%s @ 0x%x>" % (type(r).__name__,id(r)) 160 161 #----------------------------------------------------------------------------- 162 # === LR Parsing Engine === 163 # 164 # The following classes are used for the LR parser itself. These are not 165 # used during table construction and are independent of the actual LR 166 # table generation algorithm 167 #----------------------------------------------------------------------------- 168 169 # This class is used to hold non-terminal grammar symbols during parsing. 170 # It normally has the following attributes set: 171 # .type = Grammar symbol type 172 # .value = Symbol value 173 # .lineno = Starting line number 174 # .endlineno = Ending line number (optional, set automatically) 175 # .lexpos = Starting lex position 176 # .endlexpos = Ending lex position (optional, set automatically) 177 178 class YaccSymbol: 179 def __str__(self): return self.type 180 def __repr__(self): return str(self) 181 182 # This class is a wrapper around the objects actually passed to each 183 # grammar rule. Index lookup and assignment actually assign the 184 # .value attribute of the underlying YaccSymbol object. 185 # The lineno() method returns the line number of a given 186 # item (or 0 if not defined). The linespan() method returns 187 # a tuple of (startline,endline) representing the range of lines 188 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 189 # representing the range of positional information for a symbol. 190 191 class YaccProduction: 192 def __init__(self,s,stack=None): 193 self.slice = s 194 self.stack = stack 195 self.lexer = None 196 self.parser= None 197 def __getitem__(self,n): 198 if n >= 0: return self.slice[n].value 199 else: return self.stack[n].value 200 201 def __setitem__(self,n,v): 202 self.slice[n].value = v 203 204 def __getslice__(self,i,j): 205 return [s.value for s in self.slice[i:j]] 206 207 def __len__(self): 208 return len(self.slice) 209 210 def lineno(self,n): 211 return getattr(self.slice[n],"lineno",0) 212 213 def set_lineno(self,n,lineno): 214 self.slice[n].lineno = lineno 215 216 def linespan(self,n): 217 startline = getattr(self.slice[n],"lineno",0) 218 endline = getattr(self.slice[n],"endlineno",startline) 219 return startline,endline 220 221 def lexpos(self,n): 222 return getattr(self.slice[n],"lexpos",0) 223 224 def lexspan(self,n): 225 startpos = getattr(self.slice[n],"lexpos",0) 226 endpos = getattr(self.slice[n],"endlexpos",startpos) 227 return startpos,endpos 228 229 def error(self): 230 raise SyntaxError 231 232 233 # ----------------------------------------------------------------------------- 234 # == LRParser == 235 # 236 # The LR Parsing engine. 237 # ----------------------------------------------------------------------------- 238 239 class LRParser: 240 def __init__(self,lrtab,errorf): 241 self.productions = lrtab.lr_productions 242 self.action = lrtab.lr_action 243 self.goto = lrtab.lr_goto 244 self.errorfunc = errorf 245 246 def errok(self): 247 self.errorok = 1 248 249 def restart(self): 250 del self.statestack[:] 251 del self.symstack[:] 252 sym = YaccSymbol() 253 sym.type = '$end' 254 self.symstack.append(sym) 255 self.statestack.append(0) 256 257 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 258 if debug or yaccdevel: 259 if isinstance(debug,int): 260 debug = PlyLogger(sys.stderr) 261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 262 elif tracking: 263 return self.parseopt(input,lexer,debug,tracking,tokenfunc) 264 else: 265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) 266 267 268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 269 # parsedebug(). 270 # 271 # This is the debugging enabled version of parse(). All changes made to the 272 # parsing engine should be made here. For the non-debugging version, 273 # copy this code to a method parseopt() and delete all of the sections 274 # enclosed in: 275 # 276 # #--! DEBUG 277 # statements 278 # #--! DEBUG 279 # 280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 281 282 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): 283 lookahead = None # Current lookahead symbol 284 lookaheadstack = [ ] # Stack of lookahead symbols 285 actions = self.action # Local reference to action table (to avoid lookup on self.) 286 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 287 prod = self.productions # Local reference to production list (to avoid lookup on self.) 288 pslice = YaccProduction(None) # Production object passed to grammar rules 289 errorcount = 0 # Used during error recovery 290 291 # --! DEBUG 292 debug.info("PLY: PARSE DEBUG START") 293 # --! DEBUG 294 295 # If no lexer was given, we will try to use the lex module 296 if not lexer: 297 lex = load_ply_lex() 298 lexer = lex.lexer 299 300 # Set up the lexer and parser objects on pslice 301 pslice.lexer = lexer 302 pslice.parser = self 303 304 # If input was supplied, pass to lexer 305 if input is not None: 306 lexer.input(input) 307 308 if tokenfunc is None: 309 # Tokenize function 310 get_token = lexer.token 311 else: 312 get_token = tokenfunc 313 314 # Set up the state and symbol stacks 315 316 statestack = [ ] # Stack of parsing states 317 self.statestack = statestack 318 symstack = [ ] # Stack of grammar symbols 319 self.symstack = symstack 320 321 pslice.stack = symstack # Put in the production 322 errtoken = None # Err token 323 324 # The start state is assumed to be (0,$end) 325 326 statestack.append(0) 327 sym = YaccSymbol() 328 sym.type = "$end" 329 symstack.append(sym) 330 state = 0 331 while 1: 332 # Get the next symbol on the input. If a lookahead symbol 333 # is already set, we just use that. Otherwise, we'll pull 334 # the next token off of the lookaheadstack or from the lexer 335 336 # --! DEBUG 337 debug.debug('') 338 debug.debug('State : %s', state) 339 # --! DEBUG 340 341 if not lookahead: 342 if not lookaheadstack: 343 lookahead = get_token() # Get the next token 344 else: 345 lookahead = lookaheadstack.pop() 346 if not lookahead: 347 lookahead = YaccSymbol() 348 lookahead.type = "$end" 349 350 # --! DEBUG 351 debug.debug('Stack : %s', 352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 353 # --! DEBUG 354 355 # Check the action table 356 ltype = lookahead.type 357 t = actions[state].get(ltype) 358 359 if t is not None: 360 if t > 0: 361 # shift a symbol on the stack 362 statestack.append(t) 363 state = t 364 365 # --! DEBUG 366 debug.debug("Action : Shift and goto state %s", t) 367 # --! DEBUG 368 369 symstack.append(lookahead) 370 lookahead = None 371 372 # Decrease error count on successful shift 373 if errorcount: errorcount -=1 374 continue 375 376 if t < 0: 377 # reduce a symbol on the stack, emit a production 378 p = prod[-t] 379 pname = p.name 380 plen = p.len 381 382 # Get production function 383 sym = YaccSymbol() 384 sym.type = pname # Production name 385 sym.value = None 386 387 # --! DEBUG 388 if plen: 389 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 390 else: 391 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 392 393 # --! DEBUG 394 395 if plen: 396 targ = symstack[-plen-1:] 397 targ[0] = sym 398 399 # --! TRACKING 400 if tracking: 401 t1 = targ[1] 402 sym.lineno = t1.lineno 403 sym.lexpos = t1.lexpos 404 t1 = targ[-1] 405 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 407 408 # --! TRACKING 409 410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 411 # The code enclosed in this section is duplicated 412 # below as a performance optimization. Make sure 413 # changes get made in both locations. 414 415 pslice.slice = targ 416 417 try: 418 # Call the grammar rule with our special slice object 419 del symstack[-plen:] 420 del statestack[-plen:] 421 p.callable(pslice) 422 # --! DEBUG 423 debug.info("Result : %s", format_result(pslice[0])) 424 # --! DEBUG 425 symstack.append(sym) 426 state = goto[statestack[-1]][pname] 427 statestack.append(state) 428 except SyntaxError: 429 # If an error was set. Enter error recovery state 430 lookaheadstack.append(lookahead) 431 symstack.pop() 432 statestack.pop() 433 state = statestack[-1] 434 sym.type = 'error' 435 lookahead = sym 436 errorcount = error_count 437 self.errorok = 0 438 continue 439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 440 441 else: 442 443 # --! TRACKING 444 if tracking: 445 sym.lineno = lexer.lineno 446 sym.lexpos = lexer.lexpos 447 # --! TRACKING 448 449 targ = [ sym ] 450 451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 452 # The code enclosed in this section is duplicated 453 # above as a performance optimization. Make sure 454 # changes get made in both locations. 455 456 pslice.slice = targ 457 458 try: 459 # Call the grammar rule with our special slice object 460 p.callable(pslice) 461 # --! DEBUG 462 debug.info("Result : %s", format_result(pslice[0])) 463 # --! DEBUG 464 symstack.append(sym) 465 state = goto[statestack[-1]][pname] 466 statestack.append(state) 467 except SyntaxError: 468 # If an error was set. Enter error recovery state 469 lookaheadstack.append(lookahead) 470 symstack.pop() 471 statestack.pop() 472 state = statestack[-1] 473 sym.type = 'error' 474 lookahead = sym 475 errorcount = error_count 476 self.errorok = 0 477 continue 478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 479 480 if t == 0: 481 n = symstack[-1] 482 result = getattr(n,"value",None) 483 # --! DEBUG 484 debug.info("Done : Returning %s", format_result(result)) 485 debug.info("PLY: PARSE DEBUG END") 486 # --! DEBUG 487 return result 488 489 if t == None: 490 491 # --! DEBUG 492 debug.error('Error : %s', 493 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 494 # --! DEBUG 495 496 # We have some kind of parsing error here. To handle 497 # this, we are going to push the current token onto 498 # the tokenstack and replace it with an 'error' token. 499 # If there are any synchronization rules, they may 500 # catch it. 501 # 502 # In addition to pushing the error token, we call call 503 # the user defined p_error() function if this is the 504 # first syntax error. This function is only called if 505 # errorcount == 0. 506 if errorcount == 0 or self.errorok: 507 errorcount = error_count 508 self.errorok = 0 509 errtoken = lookahead 510 if errtoken.type == "$end": 511 errtoken = None # End of file! 512 if self.errorfunc: 513 global errok,token,restart 514 errok = self.errok # Set some special functions available in error recovery 515 token = get_token 516 restart = self.restart 517 if errtoken and not hasattr(errtoken,'lexer'): 518 errtoken.lexer = lexer 519 tok = self.errorfunc(errtoken) 520 del errok, token, restart # Delete special functions 521 522 if self.errorok: 523 # User must have done some kind of panic 524 # mode recovery on their own. The 525 # returned token is the next lookahead 526 lookahead = tok 527 errtoken = None 528 continue 529 else: 530 if errtoken: 531 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 532 else: lineno = 0 533 if lineno: 534 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 535 else: 536 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 537 else: 538 sys.stderr.write("yacc: Parse error in input. EOF\n") 539 return 540 541 else: 542 errorcount = error_count 543 544 # case 1: the statestack only has 1 entry on it. If we're in this state, the 545 # entire parse has been rolled back and we're completely hosed. The token is 546 # discarded and we just keep going. 547 548 if len(statestack) <= 1 and lookahead.type != "$end": 549 lookahead = None 550 errtoken = None 551 state = 0 552 # Nuke the pushback stack 553 del lookaheadstack[:] 554 continue 555 556 # case 2: the statestack has a couple of entries on it, but we're 557 # at the end of the file. nuke the top entry and generate an error token 558 559 # Start nuking entries on the stack 560 if lookahead.type == "$end": 561 # Whoa. We're really hosed here. Bail out 562 return 563 564 if lookahead.type != 'error': 565 sym = symstack[-1] 566 if sym.type == 'error': 567 # Hmmm. Error is on top of stack, we'll just nuke input 568 # symbol and continue 569 lookahead = None 570 continue 571 t = YaccSymbol() 572 t.type = 'error' 573 if hasattr(lookahead,"lineno"): 574 t.lineno = lookahead.lineno 575 t.value = lookahead 576 lookaheadstack.append(lookahead) 577 lookahead = t 578 else: 579 symstack.pop() 580 statestack.pop() 581 state = statestack[-1] # Potential bug fix 582 583 continue 584 585 # Call an error function here 586 raise RuntimeError("yacc: internal parser error!!!\n") 587 588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 589 # parseopt(). 590 # 591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 592 # Edit the debug version above, then copy any modifications to the method 593 # below while removing #--! DEBUG sections. 594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 595 596 597 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 598 lookahead = None # Current lookahead symbol 599 lookaheadstack = [ ] # Stack of lookahead symbols 600 actions = self.action # Local reference to action table (to avoid lookup on self.) 601 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 602 prod = self.productions # Local reference to production list (to avoid lookup on self.) 603 pslice = YaccProduction(None) # Production object passed to grammar rules 604 errorcount = 0 # Used during error recovery 605 606 # If no lexer was given, we will try to use the lex module 607 if not lexer: 608 lex = load_ply_lex() 609 lexer = lex.lexer 610 611 # Set up the lexer and parser objects on pslice 612 pslice.lexer = lexer 613 pslice.parser = self 614 615 # If input was supplied, pass to lexer 616 if input is not None: 617 lexer.input(input) 618 619 if tokenfunc is None: 620 # Tokenize function 621 get_token = lexer.token 622 else: 623 get_token = tokenfunc 624 625 # Set up the state and symbol stacks 626 627 statestack = [ ] # Stack of parsing states 628 self.statestack = statestack 629 symstack = [ ] # Stack of grammar symbols 630 self.symstack = symstack 631 632 pslice.stack = symstack # Put in the production 633 errtoken = None # Err token 634 635 # The start state is assumed to be (0,$end) 636 637 statestack.append(0) 638 sym = YaccSymbol() 639 sym.type = '$end' 640 symstack.append(sym) 641 state = 0 642 while 1: 643 # Get the next symbol on the input. If a lookahead symbol 644 # is already set, we just use that. Otherwise, we'll pull 645 # the next token off of the lookaheadstack or from the lexer 646 647 if not lookahead: 648 if not lookaheadstack: 649 lookahead = get_token() # Get the next token 650 else: 651 lookahead = lookaheadstack.pop() 652 if not lookahead: 653 lookahead = YaccSymbol() 654 lookahead.type = '$end' 655 656 # Check the action table 657 ltype = lookahead.type 658 t = actions[state].get(ltype) 659 660 if t is not None: 661 if t > 0: 662 # shift a symbol on the stack 663 statestack.append(t) 664 state = t 665 666 symstack.append(lookahead) 667 lookahead = None 668 669 # Decrease error count on successful shift 670 if errorcount: errorcount -=1 671 continue 672 673 if t < 0: 674 # reduce a symbol on the stack, emit a production 675 p = prod[-t] 676 pname = p.name 677 plen = p.len 678 679 # Get production function 680 sym = YaccSymbol() 681 sym.type = pname # Production name 682 sym.value = None 683 684 if plen: 685 targ = symstack[-plen-1:] 686 targ[0] = sym 687 688 # --! TRACKING 689 if tracking: 690 t1 = targ[1] 691 sym.lineno = t1.lineno 692 sym.lexpos = t1.lexpos 693 t1 = targ[-1] 694 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 696 697 # --! TRACKING 698 699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 700 # The code enclosed in this section is duplicated 701 # below as a performance optimization. Make sure 702 # changes get made in both locations. 703 704 pslice.slice = targ 705 706 try: 707 # Call the grammar rule with our special slice object 708 del symstack[-plen:] 709 del statestack[-plen:] 710 p.callable(pslice) 711 symstack.append(sym) 712 state = goto[statestack[-1]][pname] 713 statestack.append(state) 714 except SyntaxError: 715 # If an error was set. Enter error recovery state 716 lookaheadstack.append(lookahead) 717 symstack.pop() 718 statestack.pop() 719 state = statestack[-1] 720 sym.type = 'error' 721 lookahead = sym 722 errorcount = error_count 723 self.errorok = 0 724 continue 725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 726 727 else: 728 729 # --! TRACKING 730 if tracking: 731 sym.lineno = lexer.lineno 732 sym.lexpos = lexer.lexpos 733 # --! TRACKING 734 735 targ = [ sym ] 736 737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 738 # The code enclosed in this section is duplicated 739 # above as a performance optimization. Make sure 740 # changes get made in both locations. 741 742 pslice.slice = targ 743 744 try: 745 # Call the grammar rule with our special slice object 746 p.callable(pslice) 747 symstack.append(sym) 748 state = goto[statestack[-1]][pname] 749 statestack.append(state) 750 except SyntaxError: 751 # If an error was set. Enter error recovery state 752 lookaheadstack.append(lookahead) 753 symstack.pop() 754 statestack.pop() 755 state = statestack[-1] 756 sym.type = 'error' 757 lookahead = sym 758 errorcount = error_count 759 self.errorok = 0 760 continue 761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 762 763 if t == 0: 764 n = symstack[-1] 765 return getattr(n,"value",None) 766 767 if t == None: 768 769 # We have some kind of parsing error here. To handle 770 # this, we are going to push the current token onto 771 # the tokenstack and replace it with an 'error' token. 772 # If there are any synchronization rules, they may 773 # catch it. 774 # 775 # In addition to pushing the error token, we call call 776 # the user defined p_error() function if this is the 777 # first syntax error. This function is only called if 778 # errorcount == 0. 779 if errorcount == 0 or self.errorok: 780 errorcount = error_count 781 self.errorok = 0 782 errtoken = lookahead 783 if errtoken.type == '$end': 784 errtoken = None # End of file! 785 if self.errorfunc: 786 global errok,token,restart 787 errok = self.errok # Set some special functions available in error recovery 788 token = get_token 789 restart = self.restart 790 if errtoken and not hasattr(errtoken,'lexer'): 791 errtoken.lexer = lexer 792 tok = self.errorfunc(errtoken) 793 del errok, token, restart # Delete special functions 794 795 if self.errorok: 796 # User must have done some kind of panic 797 # mode recovery on their own. The 798 # returned token is the next lookahead 799 lookahead = tok 800 errtoken = None 801 continue 802 else: 803 if errtoken: 804 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 805 else: lineno = 0 806 if lineno: 807 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 808 else: 809 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 810 else: 811 sys.stderr.write("yacc: Parse error in input. EOF\n") 812 return 813 814 else: 815 errorcount = error_count 816 817 # case 1: the statestack only has 1 entry on it. If we're in this state, the 818 # entire parse has been rolled back and we're completely hosed. The token is 819 # discarded and we just keep going. 820 821 if len(statestack) <= 1 and lookahead.type != '$end': 822 lookahead = None 823 errtoken = None 824 state = 0 825 # Nuke the pushback stack 826 del lookaheadstack[:] 827 continue 828 829 # case 2: the statestack has a couple of entries on it, but we're 830 # at the end of the file. nuke the top entry and generate an error token 831 832 # Start nuking entries on the stack 833 if lookahead.type == '$end': 834 # Whoa. We're really hosed here. Bail out 835 return 836 837 if lookahead.type != 'error': 838 sym = symstack[-1] 839 if sym.type == 'error': 840 # Hmmm. Error is on top of stack, we'll just nuke input 841 # symbol and continue 842 lookahead = None 843 continue 844 t = YaccSymbol() 845 t.type = 'error' 846 if hasattr(lookahead,"lineno"): 847 t.lineno = lookahead.lineno 848 t.value = lookahead 849 lookaheadstack.append(lookahead) 850 lookahead = t 851 else: 852 symstack.pop() 853 statestack.pop() 854 state = statestack[-1] # Potential bug fix 855 856 continue 857 858 # Call an error function here 859 raise RuntimeError("yacc: internal parser error!!!\n") 860 861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 862 # parseopt_notrack(). 863 # 864 # Optimized version of parseopt() with line number tracking removed. 865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 866 # code in the #--! TRACKING sections 867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 868 869 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 870 lookahead = None # Current lookahead symbol 871 lookaheadstack = [ ] # Stack of lookahead symbols 872 actions = self.action # Local reference to action table (to avoid lookup on self.) 873 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 874 prod = self.productions # Local reference to production list (to avoid lookup on self.) 875 pslice = YaccProduction(None) # Production object passed to grammar rules 876 errorcount = 0 # Used during error recovery 877 878 # If no lexer was given, we will try to use the lex module 879 if not lexer: 880 lex = load_ply_lex() 881 lexer = lex.lexer 882 883 # Set up the lexer and parser objects on pslice 884 pslice.lexer = lexer 885 pslice.parser = self 886 887 # If input was supplied, pass to lexer 888 if input is not None: 889 lexer.input(input) 890 891 if tokenfunc is None: 892 # Tokenize function 893 get_token = lexer.token 894 else: 895 get_token = tokenfunc 896 897 # Set up the state and symbol stacks 898 899 statestack = [ ] # Stack of parsing states 900 self.statestack = statestack 901 symstack = [ ] # Stack of grammar symbols 902 self.symstack = symstack 903 904 pslice.stack = symstack # Put in the production 905 errtoken = None # Err token 906 907 # The start state is assumed to be (0,$end) 908 909 statestack.append(0) 910 sym = YaccSymbol() 911 sym.type = '$end' 912 symstack.append(sym) 913 state = 0 914 while 1: 915 # Get the next symbol on the input. If a lookahead symbol 916 # is already set, we just use that. Otherwise, we'll pull 917 # the next token off of the lookaheadstack or from the lexer 918 919 if not lookahead: 920 if not lookaheadstack: 921 lookahead = get_token() # Get the next token 922 else: 923 lookahead = lookaheadstack.pop() 924 if not lookahead: 925 lookahead = YaccSymbol() 926 lookahead.type = '$end' 927 928 # Check the action table 929 ltype = lookahead.type 930 t = actions[state].get(ltype) 931 932 if t is not None: 933 if t > 0: 934 # shift a symbol on the stack 935 statestack.append(t) 936 state = t 937 938 symstack.append(lookahead) 939 lookahead = None 940 941 # Decrease error count on successful shift 942 if errorcount: errorcount -=1 943 continue 944 945 if t < 0: 946 # reduce a symbol on the stack, emit a production 947 p = prod[-t] 948 pname = p.name 949 plen = p.len 950 951 # Get production function 952 sym = YaccSymbol() 953 sym.type = pname # Production name 954 sym.value = None 955 956 if plen: 957 targ = symstack[-plen-1:] 958 targ[0] = sym 959 960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 961 # The code enclosed in this section is duplicated 962 # below as a performance optimization. Make sure 963 # changes get made in both locations. 964 965 pslice.slice = targ 966 967 try: 968 # Call the grammar rule with our special slice object 969 del symstack[-plen:] 970 del statestack[-plen:] 971 p.callable(pslice) 972 symstack.append(sym) 973 state = goto[statestack[-1]][pname] 974 statestack.append(state) 975 except SyntaxError: 976 # If an error was set. Enter error recovery state 977 lookaheadstack.append(lookahead) 978 symstack.pop() 979 statestack.pop() 980 state = statestack[-1] 981 sym.type = 'error' 982 lookahead = sym 983 errorcount = error_count 984 self.errorok = 0 985 continue 986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 987 988 else: 989 990 targ = [ sym ] 991 992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 993 # The code enclosed in this section is duplicated 994 # above as a performance optimization. Make sure 995 # changes get made in both locations. 996 997 pslice.slice = targ 998 999 try: 1000 # Call the grammar rule with our special slice object 1001 p.callable(pslice) 1002 symstack.append(sym) 1003 state = goto[statestack[-1]][pname] 1004 statestack.append(state) 1005 except SyntaxError: 1006 # If an error was set. Enter error recovery state 1007 lookaheadstack.append(lookahead) 1008 symstack.pop() 1009 statestack.pop() 1010 state = statestack[-1] 1011 sym.type = 'error' 1012 lookahead = sym 1013 errorcount = error_count 1014 self.errorok = 0 1015 continue 1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1017 1018 if t == 0: 1019 n = symstack[-1] 1020 return getattr(n,"value",None) 1021 1022 if t == None: 1023 1024 # We have some kind of parsing error here. To handle 1025 # this, we are going to push the current token onto 1026 # the tokenstack and replace it with an 'error' token. 1027 # If there are any synchronization rules, they may 1028 # catch it. 1029 # 1030 # In addition to pushing the error token, we call call 1031 # the user defined p_error() function if this is the 1032 # first syntax error. This function is only called if 1033 # errorcount == 0. 1034 if errorcount == 0 or self.errorok: 1035 errorcount = error_count 1036 self.errorok = 0 1037 errtoken = lookahead 1038 if errtoken.type == '$end': 1039 errtoken = None # End of file! 1040 if self.errorfunc: 1041 global errok,token,restart 1042 errok = self.errok # Set some special functions available in error recovery 1043 token = get_token 1044 restart = self.restart 1045 if errtoken and not hasattr(errtoken,'lexer'): 1046 errtoken.lexer = lexer 1047 tok = self.errorfunc(errtoken) 1048 del errok, token, restart # Delete special functions 1049 1050 if self.errorok: 1051 # User must have done some kind of panic 1052 # mode recovery on their own. The 1053 # returned token is the next lookahead 1054 lookahead = tok 1055 errtoken = None 1056 continue 1057 else: 1058 if errtoken: 1059 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 1060 else: lineno = 0 1061 if lineno: 1062 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 1063 else: 1064 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 1065 else: 1066 sys.stderr.write("yacc: Parse error in input. EOF\n") 1067 return 1068 1069 else: 1070 errorcount = error_count 1071 1072 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1073 # entire parse has been rolled back and we're completely hosed. The token is 1074 # discarded and we just keep going. 1075 1076 if len(statestack) <= 1 and lookahead.type != '$end': 1077 lookahead = None 1078 errtoken = None 1079 state = 0 1080 # Nuke the pushback stack 1081 del lookaheadstack[:] 1082 continue 1083 1084 # case 2: the statestack has a couple of entries on it, but we're 1085 # at the end of the file. nuke the top entry and generate an error token 1086 1087 # Start nuking entries on the stack 1088 if lookahead.type == '$end': 1089 # Whoa. We're really hosed here. Bail out 1090 return 1091 1092 if lookahead.type != 'error': 1093 sym = symstack[-1] 1094 if sym.type == 'error': 1095 # Hmmm. Error is on top of stack, we'll just nuke input 1096 # symbol and continue 1097 lookahead = None 1098 continue 1099 t = YaccSymbol() 1100 t.type = 'error' 1101 if hasattr(lookahead,"lineno"): 1102 t.lineno = lookahead.lineno 1103 t.value = lookahead 1104 lookaheadstack.append(lookahead) 1105 lookahead = t 1106 else: 1107 symstack.pop() 1108 statestack.pop() 1109 state = statestack[-1] # Potential bug fix 1110 1111 continue 1112 1113 # Call an error function here 1114 raise RuntimeError("yacc: internal parser error!!!\n") 1115 1116 # ----------------------------------------------------------------------------- 1117 # === Grammar Representation === 1118 # 1119 # The following functions, classes, and variables are used to represent and 1120 # manipulate the rules that make up a grammar. 1121 # ----------------------------------------------------------------------------- 1122 1123 import re 1124 1125 # regex matching identifiers 1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1127 1128 # ----------------------------------------------------------------------------- 1129 # class Production: 1130 # 1131 # This class stores the raw information about a single production or grammar rule. 1132 # A grammar rule refers to a specification such as this: 1133 # 1134 # expr : expr PLUS term 1135 # 1136 # Here are the basic attributes defined on all productions 1137 # 1138 # name - Name of the production. For example 'expr' 1139 # prod - A list of symbols on the right side ['expr','PLUS','term'] 1140 # prec - Production precedence level 1141 # number - Production number. 1142 # func - Function that executes on reduce 1143 # file - File where production function is defined 1144 # lineno - Line number where production function is defined 1145 # 1146 # The following attributes are defined or optional. 1147 # 1148 # len - Length of the production (number of symbols on right hand side) 1149 # usyms - Set of unique symbols found in the production 1150 # ----------------------------------------------------------------------------- 1151 1152 class Production(object): 1153 reduced = 0 1154 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): 1155 self.name = name 1156 self.prod = tuple(prod) 1157 self.number = number 1158 self.func = func 1159 self.callable = None 1160 self.file = file 1161 self.line = line 1162 self.prec = precedence 1163 1164 # Internal settings used during table construction 1165 1166 self.len = len(self.prod) # Length of the production 1167 1168 # Create a list of unique production symbols used in the production 1169 self.usyms = [ ] 1170 for s in self.prod: 1171 if s not in self.usyms: 1172 self.usyms.append(s) 1173 1174 # List of all LR items for the production 1175 self.lr_items = [] 1176 self.lr_next = None 1177 1178 # Create a string representation 1179 if self.prod: 1180 self.str = "%s -> %s" % (self.name," ".join(self.prod)) 1181 else: 1182 self.str = "%s -> <empty>" % self.name 1183 1184 def __str__(self): 1185 return self.str 1186 1187 def __repr__(self): 1188 return "Production("+str(self)+")" 1189 1190 def __len__(self): 1191 return len(self.prod) 1192 1193 def __nonzero__(self): 1194 return 1 1195 1196 def __getitem__(self,index): 1197 return self.prod[index] 1198 1199 # Return the nth lr_item from the production (or None if at the end) 1200 def lr_item(self,n): 1201 if n > len(self.prod): return None 1202 p = LRItem(self,n) 1203 1204 # Precompute the list of productions immediately following. Hack. Remove later 1205 try: 1206 p.lr_after = Prodnames[p.prod[n+1]] 1207 except (IndexError,KeyError): 1208 p.lr_after = [] 1209 try: 1210 p.lr_before = p.prod[n-1] 1211 except IndexError: 1212 p.lr_before = None 1213 1214 return p 1215 1216 # Bind the production function name to a callable 1217 def bind(self,pdict): 1218 if self.func: 1219 self.callable = pdict[self.func] 1220 1221 # This class serves as a minimal standin for Production objects when 1222 # reading table data from files. It only contains information 1223 # actually used by the LR parsing engine, plus some additional 1224 # debugging information. 1225 class MiniProduction(object): 1226 def __init__(self,str,name,len,func,file,line): 1227 self.name = name 1228 self.len = len 1229 self.func = func 1230 self.callable = None 1231 self.file = file 1232 self.line = line 1233 self.str = str 1234 def __str__(self): 1235 return self.str 1236 def __repr__(self): 1237 return "MiniProduction(%s)" % self.str 1238 1239 # Bind the production function name to a callable 1240 def bind(self,pdict): 1241 if self.func: 1242 self.callable = pdict[self.func] 1243 1244 1245 # ----------------------------------------------------------------------------- 1246 # class LRItem 1247 # 1248 # This class represents a specific stage of parsing a production rule. For 1249 # example: 1250 # 1251 # expr : expr . PLUS term 1252 # 1253 # In the above, the "." represents the current location of the parse. Here 1254 # basic attributes: 1255 # 1256 # name - Name of the production. For example 'expr' 1257 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1258 # number - Production number. 1259 # 1260 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1261 # then lr_next refers to 'expr -> expr PLUS . term' 1262 # lr_index - LR item index (location of the ".") in the prod list. 1263 # lookaheads - LALR lookahead symbols for this item 1264 # len - Length of the production (number of symbols on right hand side) 1265 # lr_after - List of all productions that immediately follow 1266 # lr_before - Grammar symbol immediately before 1267 # ----------------------------------------------------------------------------- 1268 1269 class LRItem(object): 1270 def __init__(self,p,n): 1271 self.name = p.name 1272 self.prod = list(p.prod) 1273 self.number = p.number 1274 self.lr_index = n 1275 self.lookaheads = { } 1276 self.prod.insert(n,".") 1277 self.prod = tuple(self.prod) 1278 self.len = len(self.prod) 1279 self.usyms = p.usyms 1280 1281 def __str__(self): 1282 if self.prod: 1283 s = "%s -> %s" % (self.name," ".join(self.prod)) 1284 else: 1285 s = "%s -> <empty>" % self.name 1286 return s 1287 1288 def __repr__(self): 1289 return "LRItem("+str(self)+")" 1290 1291 # ----------------------------------------------------------------------------- 1292 # rightmost_terminal() 1293 # 1294 # Return the rightmost terminal from a list of symbols. Used in add_production() 1295 # ----------------------------------------------------------------------------- 1296 def rightmost_terminal(symbols, terminals): 1297 i = len(symbols) - 1 1298 while i >= 0: 1299 if symbols[i] in terminals: 1300 return symbols[i] 1301 i -= 1 1302 return None 1303 1304 # ----------------------------------------------------------------------------- 1305 # === GRAMMAR CLASS === 1306 # 1307 # The following class represents the contents of the specified grammar along 1308 # with various computed properties such as first sets, follow sets, LR items, etc. 1309 # This data is used for critical parts of the table generation process later. 1310 # ----------------------------------------------------------------------------- 1311 1312 class GrammarError(YaccError): pass 1313 1314 class Grammar(object): 1315 def __init__(self,terminals): 1316 self.Productions = [None] # A list of all of the productions. The first 1317 # entry is always reserved for the purpose of 1318 # building an augmented grammar 1319 1320 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 1321 # productions of that nonterminal. 1322 1323 self.Prodmap = { } # A dictionary that is only used to detect duplicate 1324 # productions. 1325 1326 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 1327 # list of the rules where they are used. 1328 1329 for term in terminals: 1330 self.Terminals[term] = [] 1331 1332 self.Terminals['error'] = [] 1333 1334 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 1335 # of rule numbers where they are used. 1336 1337 self.First = { } # A dictionary of precomputed FIRST(x) symbols 1338 1339 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 1340 1341 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 1342 # form ('right',level) or ('nonassoc', level) or ('left',level) 1343 1344 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 1345 # This is only used to provide error checking and to generate 1346 # a warning about unused precedence rules. 1347 1348 self.Start = None # Starting symbol for the grammar 1349 1350 1351 def __len__(self): 1352 return len(self.Productions) 1353 1354 def __getitem__(self,index): 1355 return self.Productions[index] 1356 1357 # ----------------------------------------------------------------------------- 1358 # set_precedence() 1359 # 1360 # Sets the precedence for a given terminal. assoc is the associativity such as 1361 # 'left','right', or 'nonassoc'. level is a numeric level. 1362 # 1363 # ----------------------------------------------------------------------------- 1364 1365 def set_precedence(self,term,assoc,level): 1366 assert self.Productions == [None],"Must call set_precedence() before add_production()" 1367 if term in self.Precedence: 1368 raise GrammarError("Precedence already specified for terminal '%s'" % term) 1369 if assoc not in ['left','right','nonassoc']: 1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1371 self.Precedence[term] = (assoc,level) 1372 1373 # ----------------------------------------------------------------------------- 1374 # add_production() 1375 # 1376 # Given an action function, this function assembles a production rule and 1377 # computes its precedence level. 1378 # 1379 # The production rule is supplied as a list of symbols. For example, 1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1381 # symbols ['expr','PLUS','term']. 1382 # 1383 # Precedence is determined by the precedence of the right-most non-terminal 1384 # or the precedence of a terminal specified by %prec. 1385 # 1386 # A variety of error checks are performed to make sure production symbols 1387 # are valid and that %prec is used correctly. 1388 # ----------------------------------------------------------------------------- 1389 1390 def add_production(self,prodname,syms,func=None,file='',line=0): 1391 1392 if prodname in self.Terminals: 1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 1394 if prodname == 'error': 1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 1396 if not _is_identifier.match(prodname): 1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 1398 1399 # Look for literal tokens 1400 for n,s in enumerate(syms): 1401 if s[0] in "'\"": 1402 try: 1403 c = eval(s) 1404 if (len(c) > 1): 1405 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 1406 if not c in self.Terminals: 1407 self.Terminals[c] = [] 1408 syms[n] = c 1409 continue 1410 except SyntaxError: 1411 pass 1412 if not _is_identifier.match(s) and s != '%prec': 1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 1414 1415 # Determine the precedence level 1416 if '%prec' in syms: 1417 if syms[-1] == '%prec': 1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 1419 if syms[-2] != '%prec': 1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 1421 precname = syms[-1] 1422 prodprec = self.Precedence.get(precname,None) 1423 if not prodprec: 1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 1425 else: 1426 self.UsedPrecedence[precname] = 1 1427 del syms[-2:] # Drop %prec from the rule 1428 else: 1429 # If no %prec, precedence is determined by the rightmost terminal symbol 1430 precname = rightmost_terminal(syms,self.Terminals) 1431 prodprec = self.Precedence.get(precname,('right',0)) 1432 1433 # See if the rule is already in the rulemap 1434 map = "%s -> %s" % (prodname,syms) 1435 if map in self.Prodmap: 1436 m = self.Prodmap[map] 1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 1438 "Previous definition at %s:%d" % (m.file, m.line)) 1439 1440 # From this point on, everything is valid. Create a new Production instance 1441 pnumber = len(self.Productions) 1442 if not prodname in self.Nonterminals: 1443 self.Nonterminals[prodname] = [ ] 1444 1445 # Add the production number to Terminals and Nonterminals 1446 for t in syms: 1447 if t in self.Terminals: 1448 self.Terminals[t].append(pnumber) 1449 else: 1450 if not t in self.Nonterminals: 1451 self.Nonterminals[t] = [ ] 1452 self.Nonterminals[t].append(pnumber) 1453 1454 # Create a production and add it to the list of productions 1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line) 1456 self.Productions.append(p) 1457 self.Prodmap[map] = p 1458 1459 # Add to the global productions list 1460 try: 1461 self.Prodnames[prodname].append(p) 1462 except KeyError: 1463 self.Prodnames[prodname] = [ p ] 1464 return 0 1465 1466 # ----------------------------------------------------------------------------- 1467 # set_start() 1468 # 1469 # Sets the starting symbol and creates the augmented grammar. Production 1470 # rule 0 is S' -> start where start is the start symbol. 1471 # ----------------------------------------------------------------------------- 1472 1473 def set_start(self,start=None): 1474 if not start: 1475 start = self.Productions[1].name 1476 if start not in self.Nonterminals: 1477 raise GrammarError("start symbol %s undefined" % start) 1478 self.Productions[0] = Production(0,"S'",[start]) 1479 self.Nonterminals[start].append(0) 1480 self.Start = start 1481 1482 # ----------------------------------------------------------------------------- 1483 # find_unreachable() 1484 # 1485 # Find all of the nonterminal symbols that can't be reached from the starting 1486 # symbol. Returns a list of nonterminals that can't be reached. 1487 # ----------------------------------------------------------------------------- 1488 1489 def find_unreachable(self): 1490 1491 # Mark all symbols that are reachable from a symbol s 1492 def mark_reachable_from(s): 1493 if reachable[s]: 1494 # We've already reached symbol s. 1495 return 1496 reachable[s] = 1 1497 for p in self.Prodnames.get(s,[]): 1498 for r in p.prod: 1499 mark_reachable_from(r) 1500 1501 reachable = { } 1502 for s in list(self.Terminals) + list(self.Nonterminals): 1503 reachable[s] = 0 1504 1505 mark_reachable_from( self.Productions[0].prod[0] ) 1506 1507 return [s for s in list(self.Nonterminals) 1508 if not reachable[s]] 1509 1510 # ----------------------------------------------------------------------------- 1511 # infinite_cycles() 1512 # 1513 # This function looks at the various parsing rules and tries to detect 1514 # infinite recursion cycles (grammar rules where there is no possible way 1515 # to derive a string of only terminals). 1516 # ----------------------------------------------------------------------------- 1517 1518 def infinite_cycles(self): 1519 terminates = {} 1520 1521 # Terminals: 1522 for t in self.Terminals: 1523 terminates[t] = 1 1524 1525 terminates['$end'] = 1 1526 1527 # Nonterminals: 1528 1529 # Initialize to false: 1530 for n in self.Nonterminals: 1531 terminates[n] = 0 1532 1533 # Then propagate termination until no change: 1534 while 1: 1535 some_change = 0 1536 for (n,pl) in self.Prodnames.items(): 1537 # Nonterminal n terminates iff any of its productions terminates. 1538 for p in pl: 1539 # Production p terminates iff all of its rhs symbols terminate. 1540 for s in p.prod: 1541 if not terminates[s]: 1542 # The symbol s does not terminate, 1543 # so production p does not terminate. 1544 p_terminates = 0 1545 break 1546 else: 1547 # didn't break from the loop, 1548 # so every symbol s terminates 1549 # so production p terminates. 1550 p_terminates = 1 1551 1552 if p_terminates: 1553 # symbol n terminates! 1554 if not terminates[n]: 1555 terminates[n] = 1 1556 some_change = 1 1557 # Don't need to consider any more productions for this n. 1558 break 1559 1560 if not some_change: 1561 break 1562 1563 infinite = [] 1564 for (s,term) in terminates.items(): 1565 if not term: 1566 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1567 # s is used-but-not-defined, and we've already warned of that, 1568 # so it would be overkill to say that it's also non-terminating. 1569 pass 1570 else: 1571 infinite.append(s) 1572 1573 return infinite 1574 1575 1576 # ----------------------------------------------------------------------------- 1577 # undefined_symbols() 1578 # 1579 # Find all symbols that were used the grammar, but not defined as tokens or 1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1581 # and prod is the production where the symbol was used. 1582 # ----------------------------------------------------------------------------- 1583 def undefined_symbols(self): 1584 result = [] 1585 for p in self.Productions: 1586 if not p: continue 1587 1588 for s in p.prod: 1589 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1590 result.append((s,p)) 1591 return result 1592 1593 # ----------------------------------------------------------------------------- 1594 # unused_terminals() 1595 # 1596 # Find all terminals that were defined, but not used by the grammar. Returns 1597 # a list of all symbols. 1598 # ----------------------------------------------------------------------------- 1599 def unused_terminals(self): 1600 unused_tok = [] 1601 for s,v in self.Terminals.items(): 1602 if s != 'error' and not v: 1603 unused_tok.append(s) 1604 1605 return unused_tok 1606 1607 # ------------------------------------------------------------------------------ 1608 # unused_rules() 1609 # 1610 # Find all grammar rules that were defined, but not used (maybe not reachable) 1611 # Returns a list of productions. 1612 # ------------------------------------------------------------------------------ 1613 1614 def unused_rules(self): 1615 unused_prod = [] 1616 for s,v in self.Nonterminals.items(): 1617 if not v: 1618 p = self.Prodnames[s][0] 1619 unused_prod.append(p) 1620 return unused_prod 1621 1622 # ----------------------------------------------------------------------------- 1623 # unused_precedence() 1624 # 1625 # Returns a list of tuples (term,precedence) corresponding to precedence 1626 # rules that were never used by the grammar. term is the name of the terminal 1627 # on which precedence was applied and precedence is a string such as 'left' or 1628 # 'right' corresponding to the type of precedence. 1629 # ----------------------------------------------------------------------------- 1630 1631 def unused_precedence(self): 1632 unused = [] 1633 for termname in self.Precedence: 1634 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1635 unused.append((termname,self.Precedence[termname][0])) 1636 1637 return unused 1638 1639 # ------------------------------------------------------------------------- 1640 # _first() 1641 # 1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1643 # 1644 # During execution of compute_first1, the result may be incomplete. 1645 # Afterward (e.g., when called from compute_follow()), it will be complete. 1646 # ------------------------------------------------------------------------- 1647 def _first(self,beta): 1648 1649 # We are computing First(x1,x2,x3,...,xn) 1650 result = [ ] 1651 for x in beta: 1652 x_produces_empty = 0 1653 1654 # Add all the non-<empty> symbols of First[x] to the result. 1655 for f in self.First[x]: 1656 if f == '<empty>': 1657 x_produces_empty = 1 1658 else: 1659 if f not in result: result.append(f) 1660 1661 if x_produces_empty: 1662 # We have to consider the next x in beta, 1663 # i.e. stay in the loop. 1664 pass 1665 else: 1666 # We don't have to consider any further symbols in beta. 1667 break 1668 else: 1669 # There was no 'break' from the loop, 1670 # so x_produces_empty was true for all x in beta, 1671 # so beta produces empty as well. 1672 result.append('<empty>') 1673 1674 return result 1675 1676 # ------------------------------------------------------------------------- 1677 # compute_first() 1678 # 1679 # Compute the value of FIRST1(X) for all symbols 1680 # ------------------------------------------------------------------------- 1681 def compute_first(self): 1682 if self.First: 1683 return self.First 1684 1685 # Terminals: 1686 for t in self.Terminals: 1687 self.First[t] = [t] 1688 1689 self.First['$end'] = ['$end'] 1690 1691 # Nonterminals: 1692 1693 # Initialize to the empty set: 1694 for n in self.Nonterminals: 1695 self.First[n] = [] 1696 1697 # Then propagate symbols until no change: 1698 while 1: 1699 some_change = 0 1700 for n in self.Nonterminals: 1701 for p in self.Prodnames[n]: 1702 for f in self._first(p.prod): 1703 if f not in self.First[n]: 1704 self.First[n].append( f ) 1705 some_change = 1 1706 if not some_change: 1707 break 1708 1709 return self.First 1710 1711 # --------------------------------------------------------------------- 1712 # compute_follow() 1713 # 1714 # Computes all of the follow sets for every non-terminal symbol. The 1715 # follow set is the set of all symbols that might follow a given 1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1717 # --------------------------------------------------------------------- 1718 def compute_follow(self,start=None): 1719 # If already computed, return the result 1720 if self.Follow: 1721 return self.Follow 1722 1723 # If first sets not computed yet, do that first. 1724 if not self.First: 1725 self.compute_first() 1726 1727 # Add '$end' to the follow list of the start symbol 1728 for k in self.Nonterminals: 1729 self.Follow[k] = [ ] 1730 1731 if not start: 1732 start = self.Productions[1].name 1733 1734 self.Follow[start] = [ '$end' ] 1735 1736 while 1: 1737 didadd = 0 1738 for p in self.Productions[1:]: 1739 # Here is the production set 1740 for i in range(len(p.prod)): 1741 B = p.prod[i] 1742 if B in self.Nonterminals: 1743 # Okay. We got a non-terminal in a production 1744 fst = self._first(p.prod[i+1:]) 1745 hasempty = 0 1746 for f in fst: 1747 if f != '<empty>' and f not in self.Follow[B]: 1748 self.Follow[B].append(f) 1749 didadd = 1 1750 if f == '<empty>': 1751 hasempty = 1 1752 if hasempty or i == (len(p.prod)-1): 1753 # Add elements of follow(a) to follow(b) 1754 for f in self.Follow[p.name]: 1755 if f not in self.Follow[B]: 1756 self.Follow[B].append(f) 1757 didadd = 1 1758 if not didadd: break 1759 return self.Follow 1760 1761 1762 # ----------------------------------------------------------------------------- 1763 # build_lritems() 1764 # 1765 # This function walks the list of productions and builds a complete set of the 1766 # LR items. The LR items are stored in two ways: First, they are uniquely 1767 # numbered and placed in the list _lritems. Second, a linked list of LR items 1768 # is built for each production. For example: 1769 # 1770 # E -> E PLUS E 1771 # 1772 # Creates the list 1773 # 1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1775 # ----------------------------------------------------------------------------- 1776 1777 def build_lritems(self): 1778 for p in self.Productions: 1779 lastlri = p 1780 i = 0 1781 lr_items = [] 1782 while 1: 1783 if i > len(p): 1784 lri = None 1785 else: 1786 lri = LRItem(p,i) 1787 # Precompute the list of productions immediately following 1788 try: 1789 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1790 except (IndexError,KeyError): 1791 lri.lr_after = [] 1792 try: 1793 lri.lr_before = lri.prod[i-1] 1794 except IndexError: 1795 lri.lr_before = None 1796 1797 lastlri.lr_next = lri 1798 if not lri: break 1799 lr_items.append(lri) 1800 lastlri = lri 1801 i += 1 1802 p.lr_items = lr_items 1803 1804 # ----------------------------------------------------------------------------- 1805 # == Class LRTable == 1806 # 1807 # This basic class represents a basic table of LR parsing information. 1808 # Methods for generating the tables are not defined here. They are defined 1809 # in the derived class LRGeneratedTable. 1810 # ----------------------------------------------------------------------------- 1811 1812 class VersionError(YaccError): pass 1813 1814 class LRTable(object): 1815 def __init__(self): 1816 self.lr_action = None 1817 self.lr_goto = None 1818 self.lr_productions = None 1819 self.lr_method = None 1820 1821 def read_table(self,module): 1822 if isinstance(module,types.ModuleType): 1823 parsetab = module 1824 else: 1825 if sys.version_info[0] < 3: 1826 exec("import %s as parsetab" % module) 1827 else: 1828 env = { } 1829 exec("import %s as parsetab" % module, env, env) 1830 parsetab = env['parsetab'] 1831 1832 if parsetab._tabversion != __tabversion__: 1833 raise VersionError("yacc table file version is out of date") 1834 1835 self.lr_action = parsetab._lr_action 1836 self.lr_goto = parsetab._lr_goto 1837 1838 self.lr_productions = [] 1839 for p in parsetab._lr_productions: 1840 self.lr_productions.append(MiniProduction(*p)) 1841 1842 self.lr_method = parsetab._lr_method 1843 return parsetab._lr_signature 1844 1845 def read_pickle(self,filename): 1846 try: 1847 import cPickle as pickle 1848 except ImportError: 1849 import pickle 1850 1851 in_f = open(filename,"rb") 1852 1853 tabversion = pickle.load(in_f) 1854 if tabversion != __tabversion__: 1855 raise VersionError("yacc table file version is out of date") 1856 self.lr_method = pickle.load(in_f) 1857 signature = pickle.load(in_f) 1858 self.lr_action = pickle.load(in_f) 1859 self.lr_goto = pickle.load(in_f) 1860 productions = pickle.load(in_f) 1861 1862 self.lr_productions = [] 1863 for p in productions: 1864 self.lr_productions.append(MiniProduction(*p)) 1865 1866 in_f.close() 1867 return signature 1868 1869 # Bind all production function names to callable objects in pdict 1870 def bind_callables(self,pdict): 1871 for p in self.lr_productions: 1872 p.bind(pdict) 1873 1874 # ----------------------------------------------------------------------------- 1875 # === LR Generator === 1876 # 1877 # The following classes and functions are used to generate LR parsing tables on 1878 # a grammar. 1879 # ----------------------------------------------------------------------------- 1880 1881 # ----------------------------------------------------------------------------- 1882 # digraph() 1883 # traverse() 1884 # 1885 # The following two functions are used to compute set valued functions 1886 # of the form: 1887 # 1888 # F(x) = F'(x) U U{F(y) | x R y} 1889 # 1890 # This is used to compute the values of Read() sets as well as FOLLOW sets 1891 # in LALR(1) generation. 1892 # 1893 # Inputs: X - An input set 1894 # R - A relation 1895 # FP - Set-valued function 1896 # ------------------------------------------------------------------------------ 1897 1898 def digraph(X,R,FP): 1899 N = { } 1900 for x in X: 1901 N[x] = 0 1902 stack = [] 1903 F = { } 1904 for x in X: 1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1906 return F 1907 1908 def traverse(x,N,stack,F,X,R,FP): 1909 stack.append(x) 1910 d = len(stack) 1911 N[x] = d 1912 F[x] = FP(x) # F(X) <- F'(x) 1913 1914 rel = R(x) # Get y's related to x 1915 for y in rel: 1916 if N[y] == 0: 1917 traverse(y,N,stack,F,X,R,FP) 1918 N[x] = min(N[x],N[y]) 1919 for a in F.get(y,[]): 1920 if a not in F[x]: F[x].append(a) 1921 if N[x] == d: 1922 N[stack[-1]] = MAXINT 1923 F[stack[-1]] = F[x] 1924 element = stack.pop() 1925 while element != x: 1926 N[stack[-1]] = MAXINT 1927 F[stack[-1]] = F[x] 1928 element = stack.pop() 1929 1930 class LALRError(YaccError): pass 1931 1932 # ----------------------------------------------------------------------------- 1933 # == LRGeneratedTable == 1934 # 1935 # This class implements the LR table generation algorithm. There are no 1936 # public methods except for write() 1937 # ----------------------------------------------------------------------------- 1938 1939 class LRGeneratedTable(LRTable): 1940 def __init__(self,grammar,method='LALR',log=None): 1941 if method not in ['SLR','LALR']: 1942 raise LALRError("Unsupported method %s" % method) 1943 1944 self.grammar = grammar 1945 self.lr_method = method 1946 1947 # Set up the logger 1948 if not log: 1949 log = NullLogger() 1950 self.log = log 1951 1952 # Internal attributes 1953 self.lr_action = {} # Action table 1954 self.lr_goto = {} # Goto table 1955 self.lr_productions = grammar.Productions # Copy of grammar Production array 1956 self.lr_goto_cache = {} # Cache of computed gotos 1957 self.lr0_cidhash = {} # Cache of closures 1958 1959 self._add_count = 0 # Internal counter used to detect cycles 1960 1961 # Diagonistic information filled in by the table generator 1962 self.sr_conflict = 0 1963 self.rr_conflict = 0 1964 self.conflicts = [] # List of conflicts 1965 1966 self.sr_conflicts = [] 1967 self.rr_conflicts = [] 1968 1969 # Build the tables 1970 self.grammar.build_lritems() 1971 self.grammar.compute_first() 1972 self.grammar.compute_follow() 1973 self.lr_parse_table() 1974 1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1976 1977 def lr0_closure(self,I): 1978 self._add_count += 1 1979 1980 # Add everything in I to J 1981 J = I[:] 1982 didadd = 1 1983 while didadd: 1984 didadd = 0 1985 for j in J: 1986 for x in j.lr_after: 1987 if getattr(x,"lr0_added",0) == self._add_count: continue 1988 # Add B --> .G to J 1989 J.append(x.lr_next) 1990 x.lr0_added = self._add_count 1991 didadd = 1 1992 1993 return J 1994 1995 # Compute the LR(0) goto function goto(I,X) where I is a set 1996 # of LR(0) items and X is a grammar symbol. This function is written 1997 # in a way that guarantees uniqueness of the generated goto sets 1998 # (i.e. the same goto set will never be returned as two different Python 1999 # objects). With uniqueness, we can later do fast set comparisons using 2000 # id(obj) instead of element-wise comparison. 2001 2002 def lr0_goto(self,I,x): 2003 # First we look for a previously cached entry 2004 g = self.lr_goto_cache.get((id(I),x),None) 2005 if g: return g 2006 2007 # Now we generate the goto set in a way that guarantees uniqueness 2008 # of the result 2009 2010 s = self.lr_goto_cache.get(x,None) 2011 if not s: 2012 s = { } 2013 self.lr_goto_cache[x] = s 2014 2015 gs = [ ] 2016 for p in I: 2017 n = p.lr_next 2018 if n and n.lr_before == x: 2019 s1 = s.get(id(n),None) 2020 if not s1: 2021 s1 = { } 2022 s[id(n)] = s1 2023 gs.append(n) 2024 s = s1 2025 g = s.get('$end',None) 2026 if not g: 2027 if gs: 2028 g = self.lr0_closure(gs) 2029 s['$end'] = g 2030 else: 2031 s['$end'] = gs 2032 self.lr_goto_cache[(id(I),x)] = g 2033 return g 2034 2035 # Compute the LR(0) sets of item function 2036 def lr0_items(self): 2037 2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 2039 i = 0 2040 for I in C: 2041 self.lr0_cidhash[id(I)] = i 2042 i += 1 2043 2044 # Loop over the items in C and each grammar symbols 2045 i = 0 2046 while i < len(C): 2047 I = C[i] 2048 i += 1 2049 2050 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2051 asyms = { } 2052 for ii in I: 2053 for s in ii.usyms: 2054 asyms[s] = None 2055 2056 for x in asyms: 2057 g = self.lr0_goto(I,x) 2058 if not g: continue 2059 if id(g) in self.lr0_cidhash: continue 2060 self.lr0_cidhash[id(g)] = len(C) 2061 C.append(g) 2062 2063 return C 2064 2065 # ----------------------------------------------------------------------------- 2066 # ==== LALR(1) Parsing ==== 2067 # 2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2069 # relying upon Follow() sets when performing reductions, a more selective 2070 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2071 # Thus, we mainly just have to focus on calculating the lookahead sets. 2072 # 2073 # The method used here is due to DeRemer and Pennelo (1982). 2074 # 2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2078 # 2079 # Further details can also be found in: 2080 # 2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2082 # McGraw-Hill Book Company, (1985). 2083 # 2084 # ----------------------------------------------------------------------------- 2085 2086 # ----------------------------------------------------------------------------- 2087 # compute_nullable_nonterminals() 2088 # 2089 # Creates a dictionary containing all of the non-terminals that might produce 2090 # an empty production. 2091 # ----------------------------------------------------------------------------- 2092 2093 def compute_nullable_nonterminals(self): 2094 nullable = {} 2095 num_nullable = 0 2096 while 1: 2097 for p in self.grammar.Productions[1:]: 2098 if p.len == 0: 2099 nullable[p.name] = 1 2100 continue 2101 for t in p.prod: 2102 if not t in nullable: break 2103 else: 2104 nullable[p.name] = 1 2105 if len(nullable) == num_nullable: break 2106 num_nullable = len(nullable) 2107 return nullable 2108 2109 # ----------------------------------------------------------------------------- 2110 # find_nonterminal_trans(C) 2111 # 2112 # Given a set of LR(0) items, this functions finds all of the non-terminal 2113 # transitions. These are transitions in which a dot appears immediately before 2114 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2115 # is the state number and N is the nonterminal symbol. 2116 # 2117 # The input C is the set of LR(0) items. 2118 # ----------------------------------------------------------------------------- 2119 2120 def find_nonterminal_transitions(self,C): 2121 trans = [] 2122 for state in range(len(C)): 2123 for p in C[state]: 2124 if p.lr_index < p.len - 1: 2125 t = (state,p.prod[p.lr_index+1]) 2126 if t[1] in self.grammar.Nonterminals: 2127 if t not in trans: trans.append(t) 2128 state = state + 1 2129 return trans 2130 2131 # ----------------------------------------------------------------------------- 2132 # dr_relation() 2133 # 2134 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2136 # 2137 # Returns a list of terminals. 2138 # ----------------------------------------------------------------------------- 2139 2140 def dr_relation(self,C,trans,nullable): 2141 dr_set = { } 2142 state,N = trans 2143 terms = [] 2144 2145 g = self.lr0_goto(C[state],N) 2146 for p in g: 2147 if p.lr_index < p.len - 1: 2148 a = p.prod[p.lr_index+1] 2149 if a in self.grammar.Terminals: 2150 if a not in terms: terms.append(a) 2151 2152 # This extra bit is to handle the start state 2153 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2154 terms.append('$end') 2155 2156 return terms 2157 2158 # ----------------------------------------------------------------------------- 2159 # reads_relation() 2160 # 2161 # Computes the READS() relation (p,A) READS (t,C). 2162 # ----------------------------------------------------------------------------- 2163 2164 def reads_relation(self,C, trans, empty): 2165 # Look for empty transitions 2166 rel = [] 2167 state, N = trans 2168 2169 g = self.lr0_goto(C[state],N) 2170 j = self.lr0_cidhash.get(id(g),-1) 2171 for p in g: 2172 if p.lr_index < p.len - 1: 2173 a = p.prod[p.lr_index + 1] 2174 if a in empty: 2175 rel.append((j,a)) 2176 2177 return rel 2178 2179 # ----------------------------------------------------------------------------- 2180 # compute_lookback_includes() 2181 # 2182 # Determines the lookback and includes relations 2183 # 2184 # LOOKBACK: 2185 # 2186 # This relation is determined by running the LR(0) state machine forward. 2187 # For example, starting with a production "N : . A B C", we run it forward 2188 # to obtain "N : A B C ." We then build a relationship between this final 2189 # state and the starting state. These relationships are stored in a dictionary 2190 # lookdict. 2191 # 2192 # INCLUDES: 2193 # 2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2195 # 2196 # This relation is used to determine non-terminal transitions that occur 2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2198 # if the following holds: 2199 # 2200 # B -> LAT, where T -> epsilon and p' -L-> p 2201 # 2202 # L is essentially a prefix (which may be empty), T is a suffix that must be 2203 # able to derive an empty string. State p' must lead to state p with the string L. 2204 # 2205 # ----------------------------------------------------------------------------- 2206 2207 def compute_lookback_includes(self,C,trans,nullable): 2208 2209 lookdict = {} # Dictionary of lookback relations 2210 includedict = {} # Dictionary of include relations 2211 2212 # Make a dictionary of non-terminal transitions 2213 dtrans = {} 2214 for t in trans: 2215 dtrans[t] = 1 2216 2217 # Loop over all transitions and compute lookbacks and includes 2218 for state,N in trans: 2219 lookb = [] 2220 includes = [] 2221 for p in C[state]: 2222 if p.name != N: continue 2223 2224 # Okay, we have a name match. We now follow the production all the way 2225 # through the state machine until we get the . on the right hand side 2226 2227 lr_index = p.lr_index 2228 j = state 2229 while lr_index < p.len - 1: 2230 lr_index = lr_index + 1 2231 t = p.prod[lr_index] 2232 2233 # Check to see if this symbol and state are a non-terminal transition 2234 if (j,t) in dtrans: 2235 # Yes. Okay, there is some chance that this is an includes relation 2236 # the only way to know for certain is whether the rest of the 2237 # production derives empty 2238 2239 li = lr_index + 1 2240 while li < p.len: 2241 if p.prod[li] in self.grammar.Terminals: break # No forget it 2242 if not p.prod[li] in nullable: break 2243 li = li + 1 2244 else: 2245 # Appears to be a relation between (j,t) and (state,N) 2246 includes.append((j,t)) 2247 2248 g = self.lr0_goto(C[j],t) # Go to next set 2249 j = self.lr0_cidhash.get(id(g),-1) # Go to next state 2250 2251 # When we get here, j is the final state, now we have to locate the production 2252 for r in C[j]: 2253 if r.name != p.name: continue 2254 if r.len != p.len: continue 2255 i = 0 2256 # This look is comparing a production ". A B C" with "A B C ." 2257 while i < r.lr_index: 2258 if r.prod[i] != p.prod[i+1]: break 2259 i = i + 1 2260 else: 2261 lookb.append((j,r)) 2262 for i in includes: 2263 if not i in includedict: includedict[i] = [] 2264 includedict[i].append((state,N)) 2265 lookdict[(state,N)] = lookb 2266 2267 return lookdict,includedict 2268 2269 # ----------------------------------------------------------------------------- 2270 # compute_read_sets() 2271 # 2272 # Given a set of LR(0) items, this function computes the read sets. 2273 # 2274 # Inputs: C = Set of LR(0) items 2275 # ntrans = Set of nonterminal transitions 2276 # nullable = Set of empty transitions 2277 # 2278 # Returns a set containing the read sets 2279 # ----------------------------------------------------------------------------- 2280 2281 def compute_read_sets(self,C, ntrans, nullable): 2282 FP = lambda x: self.dr_relation(C,x,nullable) 2283 R = lambda x: self.reads_relation(C,x,nullable) 2284 F = digraph(ntrans,R,FP) 2285 return F 2286 2287 # ----------------------------------------------------------------------------- 2288 # compute_follow_sets() 2289 # 2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2291 # and an include set, this function computes the follow sets 2292 # 2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2294 # 2295 # Inputs: 2296 # ntrans = Set of nonterminal transitions 2297 # readsets = Readset (previously computed) 2298 # inclsets = Include sets (previously computed) 2299 # 2300 # Returns a set containing the follow sets 2301 # ----------------------------------------------------------------------------- 2302 2303 def compute_follow_sets(self,ntrans,readsets,inclsets): 2304 FP = lambda x: readsets[x] 2305 R = lambda x: inclsets.get(x,[]) 2306 F = digraph(ntrans,R,FP) 2307 return F 2308 2309 # ----------------------------------------------------------------------------- 2310 # add_lookaheads() 2311 # 2312 # Attaches the lookahead symbols to grammar rules. 2313 # 2314 # Inputs: lookbacks - Set of lookback relations 2315 # followset - Computed follow set 2316 # 2317 # This function directly attaches the lookaheads to productions contained 2318 # in the lookbacks set 2319 # ----------------------------------------------------------------------------- 2320 2321 def add_lookaheads(self,lookbacks,followset): 2322 for trans,lb in lookbacks.items(): 2323 # Loop over productions in lookback 2324 for state,p in lb: 2325 if not state in p.lookaheads: 2326 p.lookaheads[state] = [] 2327 f = followset.get(trans,[]) 2328 for a in f: 2329 if a not in p.lookaheads[state]: p.lookaheads[state].append(a) 2330 2331 # ----------------------------------------------------------------------------- 2332 # add_lalr_lookaheads() 2333 # 2334 # This function does all of the work of adding lookahead information for use 2335 # with LALR parsing 2336 # ----------------------------------------------------------------------------- 2337 2338 def add_lalr_lookaheads(self,C): 2339 # Determine all of the nullable nonterminals 2340 nullable = self.compute_nullable_nonterminals() 2341 2342 # Find all non-terminal transitions 2343 trans = self.find_nonterminal_transitions(C) 2344 2345 # Compute read sets 2346 readsets = self.compute_read_sets(C,trans,nullable) 2347 2348 # Compute lookback/includes relations 2349 lookd, included = self.compute_lookback_includes(C,trans,nullable) 2350 2351 # Compute LALR FOLLOW sets 2352 followsets = self.compute_follow_sets(trans,readsets,included) 2353 2354 # Add all of the lookaheads 2355 self.add_lookaheads(lookd,followsets) 2356 2357 # ----------------------------------------------------------------------------- 2358 # lr_parse_table() 2359 # 2360 # This function constructs the parse tables for SLR or LALR 2361 # ----------------------------------------------------------------------------- 2362 def lr_parse_table(self): 2363 Productions = self.grammar.Productions 2364 Precedence = self.grammar.Precedence 2365 goto = self.lr_goto # Goto array 2366 action = self.lr_action # Action array 2367 log = self.log # Logger for output 2368 2369 actionp = { } # Action production array (temporary) 2370 2371 log.info("Parsing method: %s", self.lr_method) 2372 2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2374 # This determines the number of states 2375 2376 C = self.lr0_items() 2377 2378 if self.lr_method == 'LALR': 2379 self.add_lalr_lookaheads(C) 2380 2381 # Build the parser table, state by state 2382 st = 0 2383 for I in C: 2384 # Loop over each production in I 2385 actlist = [ ] # List of actions 2386 st_action = { } 2387 st_actionp = { } 2388 st_goto = { } 2389 log.info("") 2390 log.info("state %d", st) 2391 log.info("") 2392 for p in I: 2393 log.info(" (%d) %s", p.number, str(p)) 2394 log.info("") 2395 2396 for p in I: 2397 if p.len == p.lr_index + 1: 2398 if p.name == "S'": 2399 # Start symbol. Accept! 2400 st_action["$end"] = 0 2401 st_actionp["$end"] = p 2402 else: 2403 # We are at the end of a production. Reduce! 2404 if self.lr_method == 'LALR': 2405 laheads = p.lookaheads[st] 2406 else: 2407 laheads = self.grammar.Follow[p.name] 2408 for a in laheads: 2409 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 2410 r = st_action.get(a,None) 2411 if r is not None: 2412 # Whoa. Have a shift/reduce or reduce/reduce conflict 2413 if r > 0: 2414 # Need to decide on shift or reduce here 2415 # By default we favor shifting. Need to add 2416 # some precedence rules here. 2417 sprec,slevel = Productions[st_actionp[a].number].prec 2418 rprec,rlevel = Precedence.get(a,('right',0)) 2419 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2420 # We really need to reduce here. 2421 st_action[a] = -p.number 2422 st_actionp[a] = p 2423 if not slevel and not rlevel: 2424 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2425 self.sr_conflicts.append((st,a,'reduce')) 2426 Productions[p.number].reduced += 1 2427 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2428 st_action[a] = None 2429 else: 2430 # Hmmm. Guess we'll keep the shift 2431 if not rlevel: 2432 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2433 self.sr_conflicts.append((st,a,'shift')) 2434 elif r < 0: 2435 # Reduce/reduce conflict. In this case, we favor the rule 2436 # that was defined first in the grammar file 2437 oldp = Productions[-r] 2438 pp = Productions[p.number] 2439 if oldp.line > pp.line: 2440 st_action[a] = -p.number 2441 st_actionp[a] = p 2442 chosenp,rejectp = pp,oldp 2443 Productions[p.number].reduced += 1 2444 Productions[oldp.number].reduced -= 1 2445 else: 2446 chosenp,rejectp = oldp,pp 2447 self.rr_conflicts.append((st,chosenp,rejectp)) 2448 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 2449 else: 2450 raise LALRError("Unknown conflict in state %d" % st) 2451 else: 2452 st_action[a] = -p.number 2453 st_actionp[a] = p 2454 Productions[p.number].reduced += 1 2455 else: 2456 i = p.lr_index 2457 a = p.prod[i+1] # Get symbol right after the "." 2458 if a in self.grammar.Terminals: 2459 g = self.lr0_goto(I,a) 2460 j = self.lr0_cidhash.get(id(g),-1) 2461 if j >= 0: 2462 # We are in a shift state 2463 actlist.append((a,p,"shift and go to state %d" % j)) 2464 r = st_action.get(a,None) 2465 if r is not None: 2466 # Whoa have a shift/reduce or shift/shift conflict 2467 if r > 0: 2468 if r != j: 2469 raise LALRError("Shift/shift conflict in state %d" % st) 2470 elif r < 0: 2471 # Do a precedence check. 2472 # - if precedence of reduce rule is higher, we reduce. 2473 # - if precedence of reduce is same and left assoc, we reduce. 2474 # - otherwise we shift 2475 rprec,rlevel = Productions[st_actionp[a].number].prec 2476 sprec,slevel = Precedence.get(a,('right',0)) 2477 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2478 # We decide to shift here... highest precedence to shift 2479 Productions[st_actionp[a].number].reduced -= 1 2480 st_action[a] = j 2481 st_actionp[a] = p 2482 if not rlevel: 2483 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2484 self.sr_conflicts.append((st,a,'shift')) 2485 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2486 st_action[a] = None 2487 else: 2488 # Hmmm. Guess we'll keep the reduce 2489 if not slevel and not rlevel: 2490 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2491 self.sr_conflicts.append((st,a,'reduce')) 2492 2493 else: 2494 raise LALRError("Unknown conflict in state %d" % st) 2495 else: 2496 st_action[a] = j 2497 st_actionp[a] = p 2498 2499 # Print the actions associated with each terminal 2500 _actprint = { } 2501 for a,p,m in actlist: 2502 if a in st_action: 2503 if p is st_actionp[a]: 2504 log.info(" %-15s %s",a,m) 2505 _actprint[(a,m)] = 1 2506 log.info("") 2507 # Print the actions that were not used. (debugging) 2508 not_used = 0 2509 for a,p,m in actlist: 2510 if a in st_action: 2511 if p is not st_actionp[a]: 2512 if not (a,m) in _actprint: 2513 log.debug(" ! %-15s [ %s ]",a,m) 2514 not_used = 1 2515 _actprint[(a,m)] = 1 2516 if not_used: 2517 log.debug("") 2518 2519 # Construct the goto table for this state 2520 2521 nkeys = { } 2522 for ii in I: 2523 for s in ii.usyms: 2524 if s in self.grammar.Nonterminals: 2525 nkeys[s] = None 2526 for n in nkeys: 2527 g = self.lr0_goto(I,n) 2528 j = self.lr0_cidhash.get(id(g),-1) 2529 if j >= 0: 2530 st_goto[n] = j 2531 log.info(" %-30s shift and go to state %d",n,j) 2532 2533 action[st] = st_action 2534 actionp[st] = st_actionp 2535 goto[st] = st_goto 2536 st += 1 2537 2538 2539 # ----------------------------------------------------------------------------- 2540 # write() 2541 # 2542 # This function writes the LR parsing tables to a file 2543 # ----------------------------------------------------------------------------- 2544 2545 def write_table(self,modulename,outputdir='',signature=""): 2546 basemodulename = modulename.split(".")[-1] 2547 filename = os.path.join(outputdir,basemodulename) + ".py" 2548 try: 2549 f = open(filename,"w") 2550 2551 f.write(""" 2552 # %s 2553 # This file is automatically generated. Do not edit. 2554 _tabversion = %r 2555 2556 _lr_method = %r 2557 2558 _lr_signature = %r 2559 """ % (filename, __tabversion__, self.lr_method, signature)) 2560 2561 # Change smaller to 0 to go back to original tables 2562 smaller = 1 2563 2564 # Factor out names to try and make smaller 2565 if smaller: 2566 items = { } 2567 2568 for s,nd in self.lr_action.items(): 2569 for name,v in nd.items(): 2570 i = items.get(name) 2571 if not i: 2572 i = ([],[]) 2573 items[name] = i 2574 i[0].append(s) 2575 i[1].append(v) 2576 2577 f.write("\n_lr_action_items = {") 2578 for k,v in items.items(): 2579 f.write("%r:([" % k) 2580 for i in v[0]: 2581 f.write("%r," % i) 2582 f.write("],[") 2583 for i in v[1]: 2584 f.write("%r," % i) 2585 2586 f.write("]),") 2587 f.write("}\n") 2588 2589 f.write(""" 2590 _lr_action = { } 2591 for _k, _v in _lr_action_items.items(): 2592 for _x,_y in zip(_v[0],_v[1]): 2593 if not _x in _lr_action: _lr_action[_x] = { } 2594 _lr_action[_x][_k] = _y 2595 del _lr_action_items 2596 """) 2597 2598 else: 2599 f.write("\n_lr_action = { "); 2600 for k,v in self.lr_action.items(): 2601 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2602 f.write("}\n"); 2603 2604 if smaller: 2605 # Factor out names to try and make smaller 2606 items = { } 2607 2608 for s,nd in self.lr_goto.items(): 2609 for name,v in nd.items(): 2610 i = items.get(name) 2611 if not i: 2612 i = ([],[]) 2613 items[name] = i 2614 i[0].append(s) 2615 i[1].append(v) 2616 2617 f.write("\n_lr_goto_items = {") 2618 for k,v in items.items(): 2619 f.write("%r:([" % k) 2620 for i in v[0]: 2621 f.write("%r," % i) 2622 f.write("],[") 2623 for i in v[1]: 2624 f.write("%r," % i) 2625 2626 f.write("]),") 2627 f.write("}\n") 2628 2629 f.write(""" 2630 _lr_goto = { } 2631 for _k, _v in _lr_goto_items.items(): 2632 for _x,_y in zip(_v[0],_v[1]): 2633 if not _x in _lr_goto: _lr_goto[_x] = { } 2634 _lr_goto[_x][_k] = _y 2635 del _lr_goto_items 2636 """) 2637 else: 2638 f.write("\n_lr_goto = { "); 2639 for k,v in self.lr_goto.items(): 2640 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2641 f.write("}\n"); 2642 2643 # Write production table 2644 f.write("_lr_productions = [\n") 2645 for p in self.lr_productions: 2646 if p.func: 2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 2648 else: 2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 2650 f.write("]\n") 2651 f.close() 2652 2653 except IOError: 2654 e = sys.exc_info()[1] 2655 sys.stderr.write("Unable to create '%s'\n" % filename) 2656 sys.stderr.write(str(e)+"\n") 2657 return 2658 2659 2660 # ----------------------------------------------------------------------------- 2661 # pickle_table() 2662 # 2663 # This function pickles the LR parsing tables to a supplied file object 2664 # ----------------------------------------------------------------------------- 2665 2666 def pickle_table(self,filename,signature=""): 2667 try: 2668 import cPickle as pickle 2669 except ImportError: 2670 import pickle 2671 outf = open(filename,"wb") 2672 pickle.dump(__tabversion__,outf,pickle_protocol) 2673 pickle.dump(self.lr_method,outf,pickle_protocol) 2674 pickle.dump(signature,outf,pickle_protocol) 2675 pickle.dump(self.lr_action,outf,pickle_protocol) 2676 pickle.dump(self.lr_goto,outf,pickle_protocol) 2677 2678 outp = [] 2679 for p in self.lr_productions: 2680 if p.func: 2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) 2682 else: 2683 outp.append((str(p),p.name,p.len,None,None,None)) 2684 pickle.dump(outp,outf,pickle_protocol) 2685 outf.close() 2686 2687 # ----------------------------------------------------------------------------- 2688 # === INTROSPECTION === 2689 # 2690 # The following functions and classes are used to implement the PLY 2691 # introspection features followed by the yacc() function itself. 2692 # ----------------------------------------------------------------------------- 2693 2694 # ----------------------------------------------------------------------------- 2695 # get_caller_module_dict() 2696 # 2697 # This function returns a dictionary containing all of the symbols defined within 2698 # a caller further down the call stack. This is used to get the environment 2699 # associated with the yacc() call if none was provided. 2700 # ----------------------------------------------------------------------------- 2701 2702 def get_caller_module_dict(levels): 2703 try: 2704 raise RuntimeError 2705 except RuntimeError: 2706 e,b,t = sys.exc_info() 2707 f = t.tb_frame 2708 while levels > 0: 2709 f = f.f_back 2710 levels -= 1 2711 ldict = f.f_globals.copy() 2712 if f.f_globals != f.f_locals: 2713 ldict.update(f.f_locals) 2714 2715 return ldict 2716 2717 # ----------------------------------------------------------------------------- 2718 # parse_grammar() 2719 # 2720 # This takes a raw grammar rule string and parses it into production data 2721 # ----------------------------------------------------------------------------- 2722 def parse_grammar(doc,file,line): 2723 grammar = [] 2724 # Split the doc string into lines 2725 pstrings = doc.splitlines() 2726 lastp = None 2727 dline = line 2728 for ps in pstrings: 2729 dline += 1 2730 p = ps.split() 2731 if not p: continue 2732 try: 2733 if p[0] == '|': 2734 # This is a continuation of a previous rule 2735 if not lastp: 2736 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 2737 prodname = lastp 2738 syms = p[1:] 2739 else: 2740 prodname = p[0] 2741 lastp = prodname 2742 syms = p[2:] 2743 assign = p[1] 2744 if assign != ':' and assign != '::=': 2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 2746 2747 grammar.append((file,dline,prodname,syms)) 2748 except SyntaxError: 2749 raise 2750 except Exception: 2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 2752 2753 return grammar 2754 2755 # ----------------------------------------------------------------------------- 2756 # ParserReflect() 2757 # 2758 # This class represents information extracted for building a parser including 2759 # start symbol, error function, tokens, precedence list, action functions, 2760 # etc. 2761 # ----------------------------------------------------------------------------- 2762 class ParserReflect(object): 2763 def __init__(self,pdict,log=None): 2764 self.pdict = pdict 2765 self.start = None 2766 self.error_func = None 2767 self.tokens = None 2768 self.files = {} 2769 self.grammar = [] 2770 self.error = 0 2771 2772 if log is None: 2773 self.log = PlyLogger(sys.stderr) 2774 else: 2775 self.log = log 2776 2777 # Get all of the basic information 2778 def get_all(self): 2779 self.get_start() 2780 self.get_error_func() 2781 self.get_tokens() 2782 self.get_precedence() 2783 self.get_pfunctions() 2784 2785 # Validate all of the information 2786 def validate_all(self): 2787 self.validate_start() 2788 self.validate_error_func() 2789 self.validate_tokens() 2790 self.validate_precedence() 2791 self.validate_pfunctions() 2792 self.validate_files() 2793 return self.error 2794 2795 # Compute a signature over the grammar 2796 def signature(self): 2797 try: 2798 from hashlib import md5 2799 except ImportError: 2800 from md5 import md5 2801 try: 2802 sig = md5() 2803 if self.start: 2804 sig.update(self.start.encode('latin-1')) 2805 if self.prec: 2806 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) 2807 if self.tokens: 2808 sig.update(" ".join(self.tokens).encode('latin-1')) 2809 for f in self.pfuncs: 2810 if f[3]: 2811 sig.update(f[3].encode('latin-1')) 2812 except (TypeError,ValueError): 2813 pass 2814 return sig.digest() 2815 2816 # ----------------------------------------------------------------------------- 2817 # validate_file() 2818 # 2819 # This method checks to see if there are duplicated p_rulename() functions 2820 # in the parser module file. Without this function, it is really easy for 2821 # users to make mistakes by cutting and pasting code fragments (and it's a real 2822 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2823 # we just do a little regular expression pattern matching of def statements 2824 # to try and detect duplicates. 2825 # ----------------------------------------------------------------------------- 2826 2827 def validate_files(self): 2828 # Match def p_funcname( 2829 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2830 2831 for filename in self.files.keys(): 2832 base,ext = os.path.splitext(filename) 2833 if ext != '.py': return 1 # No idea. Assume it's okay. 2834 2835 try: 2836 f = open(filename) 2837 lines = f.readlines() 2838 f.close() 2839 except IOError: 2840 continue 2841 2842 counthash = { } 2843 for linen,l in enumerate(lines): 2844 linen += 1 2845 m = fre.match(l) 2846 if m: 2847 name = m.group(1) 2848 prev = counthash.get(name) 2849 if not prev: 2850 counthash[name] = linen 2851 else: 2852 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) 2853 2854 # Get the start symbol 2855 def get_start(self): 2856 self.start = self.pdict.get('start') 2857 2858 # Validate the start symbol 2859 def validate_start(self): 2860 if self.start is not None: 2861 if not isinstance(self.start,str): 2862 self.log.error("'start' must be a string") 2863 2864 # Look for error handler 2865 def get_error_func(self): 2866 self.error_func = self.pdict.get('p_error') 2867 2868 # Validate the error function 2869 def validate_error_func(self): 2870 if self.error_func: 2871 if isinstance(self.error_func,types.FunctionType): 2872 ismethod = 0 2873 elif isinstance(self.error_func, types.MethodType): 2874 ismethod = 1 2875 else: 2876 self.log.error("'p_error' defined, but is not a function or method") 2877 self.error = 1 2878 return 2879 2880 eline = func_code(self.error_func).co_firstlineno 2881 efile = func_code(self.error_func).co_filename 2882 self.files[efile] = 1 2883 2884 if (func_code(self.error_func).co_argcount != 1+ismethod): 2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 2886 self.error = 1 2887 2888 # Get the tokens map 2889 def get_tokens(self): 2890 tokens = self.pdict.get("tokens",None) 2891 if not tokens: 2892 self.log.error("No token list is defined") 2893 self.error = 1 2894 return 2895 2896 if not isinstance(tokens,(list, tuple)): 2897 self.log.error("tokens must be a list or tuple") 2898 self.error = 1 2899 return 2900 2901 if not tokens: 2902 self.log.error("tokens is empty") 2903 self.error = 1 2904 return 2905 2906 self.tokens = tokens 2907 2908 # Validate the tokens 2909 def validate_tokens(self): 2910 # Validate the tokens. 2911 if 'error' in self.tokens: 2912 self.log.error("Illegal token name 'error'. Is a reserved word") 2913 self.error = 1 2914 return 2915 2916 terminals = {} 2917 for n in self.tokens: 2918 if n in terminals: 2919 self.log.warning("Token '%s' multiply defined", n) 2920 terminals[n] = 1 2921 2922 # Get the precedence map (if any) 2923 def get_precedence(self): 2924 self.prec = self.pdict.get("precedence",None) 2925 2926 # Validate and parse the precedence map 2927 def validate_precedence(self): 2928 preclist = [] 2929 if self.prec: 2930 if not isinstance(self.prec,(list,tuple)): 2931 self.log.error("precedence must be a list or tuple") 2932 self.error = 1 2933 return 2934 for level,p in enumerate(self.prec): 2935 if not isinstance(p,(list,tuple)): 2936 self.log.error("Bad precedence table") 2937 self.error = 1 2938 return 2939 2940 if len(p) < 2: 2941 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 2942 self.error = 1 2943 return 2944 assoc = p[0] 2945 if not isinstance(assoc,str): 2946 self.log.error("precedence associativity must be a string") 2947 self.error = 1 2948 return 2949 for term in p[1:]: 2950 if not isinstance(term,str): 2951 self.log.error("precedence items must be strings") 2952 self.error = 1 2953 return 2954 preclist.append((term,assoc,level+1)) 2955 self.preclist = preclist 2956 2957 # Get all p_functions from the grammar 2958 def get_pfunctions(self): 2959 p_functions = [] 2960 for name, item in self.pdict.items(): 2961 if name[:2] != 'p_': continue 2962 if name == 'p_error': continue 2963 if isinstance(item,(types.FunctionType,types.MethodType)): 2964 line = func_code(item).co_firstlineno 2965 file = func_code(item).co_filename 2966 p_functions.append((line,file,name,item.__doc__)) 2967 2968 # Sort all of the actions by line number 2969 p_functions.sort() 2970 self.pfuncs = p_functions 2971 2972 2973 # Validate all of the p_functions 2974 def validate_pfunctions(self): 2975 grammar = [] 2976 # Check for non-empty symbols 2977 if len(self.pfuncs) == 0: 2978 self.log.error("no rules of the form p_rulename are defined") 2979 self.error = 1 2980 return 2981 2982 for line, file, name, doc in self.pfuncs: 2983 func = self.pdict[name] 2984 if isinstance(func, types.MethodType): 2985 reqargs = 2 2986 else: 2987 reqargs = 1 2988 if func_code(func).co_argcount > reqargs: 2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 2990 self.error = 1 2991 elif func_code(func).co_argcount < reqargs: 2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 2993 self.error = 1 2994 elif not func.__doc__: 2995 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 2996 else: 2997 try: 2998 parsed_g = parse_grammar(doc,file,line) 2999 for g in parsed_g: 3000 grammar.append((name, g)) 3001 except SyntaxError: 3002 e = sys.exc_info()[1] 3003 self.log.error(str(e)) 3004 self.error = 1 3005 3006 # Looks like a valid grammar rule 3007 # Mark the file in which defined. 3008 self.files[file] = 1 3009 3010 # Secondary validation step that looks for p_ definitions that are not functions 3011 # or functions that look like they might be grammar rules. 3012 3013 for n,v in self.pdict.items(): 3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 3015 if n[0:2] == 't_': continue 3016 if n[0:2] == 'p_' and n != 'p_error': 3017 self.log.warning("'%s' not defined as a function", n) 3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 3020 try: 3021 doc = v.__doc__.split(" ") 3022 if doc[1] == ':': 3023 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 3024 func_code(v).co_filename, func_code(v).co_firstlineno,n) 3025 except Exception: 3026 pass 3027 3028 self.grammar = grammar 3029 3030 # ----------------------------------------------------------------------------- 3031 # yacc(module) 3032 # 3033 # Build a parser 3034 # ----------------------------------------------------------------------------- 3035 3036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 3038 debuglog=None, errorlog = None, picklefile=None): 3039 3040 global parse # Reference to the parsing method of the last built parser 3041 3042 # If pickling is enabled, table files are not created 3043 3044 if picklefile: 3045 write_tables = 0 3046 3047 if errorlog is None: 3048 errorlog = PlyLogger(sys.stderr) 3049 3050 # Get the module dictionary used for the parser 3051 if module: 3052 _items = [(k,getattr(module,k)) for k in dir(module)] 3053 pdict = dict(_items) 3054 else: 3055 pdict = get_caller_module_dict(2) 3056 3057 # Collect parser information from the dictionary 3058 pinfo = ParserReflect(pdict,log=errorlog) 3059 pinfo.get_all() 3060 3061 if pinfo.error: 3062 raise YaccError("Unable to build parser") 3063 3064 # Check signature against table files (if any) 3065 signature = pinfo.signature() 3066 3067 # Read the tables 3068 try: 3069 lr = LRTable() 3070 if picklefile: 3071 read_signature = lr.read_pickle(picklefile) 3072 else: 3073 read_signature = lr.read_table(tabmodule) 3074 if optimize or (read_signature == signature): 3075 try: 3076 lr.bind_callables(pinfo.pdict) 3077 parser = LRParser(lr,pinfo.error_func) 3078 parse = parser.parse 3079 return parser 3080 except Exception: 3081 e = sys.exc_info()[1] 3082 errorlog.warning("There was a problem loading the table file: %s", repr(e)) 3083 except VersionError: 3084 e = sys.exc_info() 3085 errorlog.warning(str(e)) 3086 except Exception: 3087 pass 3088 3089 if debuglog is None: 3090 if debug: 3091 debuglog = PlyLogger(open(debugfile,"w")) 3092 else: 3093 debuglog = NullLogger() 3094 3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 3096 3097 3098 errors = 0 3099 3100 # Validate the parser information 3101 if pinfo.validate_all(): 3102 raise YaccError("Unable to build parser") 3103 3104 if not pinfo.error_func: 3105 errorlog.warning("no p_error() function is defined") 3106 3107 # Create a grammar object 3108 grammar = Grammar(pinfo.tokens) 3109 3110 # Set precedence level for terminals 3111 for term, assoc, level in pinfo.preclist: 3112 try: 3113 grammar.set_precedence(term,assoc,level) 3114 except GrammarError: 3115 e = sys.exc_info()[1] 3116 errorlog.warning("%s",str(e)) 3117 3118 # Add productions to the grammar 3119 for funcname, gram in pinfo.grammar: 3120 file, line, prodname, syms = gram 3121 try: 3122 grammar.add_production(prodname,syms,funcname,file,line) 3123 except GrammarError: 3124 e = sys.exc_info()[1] 3125 errorlog.error("%s",str(e)) 3126 errors = 1 3127 3128 # Set the grammar start symbols 3129 try: 3130 if start is None: 3131 grammar.set_start(pinfo.start) 3132 else: 3133 grammar.set_start(start) 3134 except GrammarError: 3135 e = sys.exc_info()[1] 3136 errorlog.error(str(e)) 3137 errors = 1 3138 3139 if errors: 3140 raise YaccError("Unable to build parser") 3141 3142 # Verify the grammar structure 3143 undefined_symbols = grammar.undefined_symbols() 3144 for sym, prod in undefined_symbols: 3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 3146 errors = 1 3147 3148 unused_terminals = grammar.unused_terminals() 3149 if unused_terminals: 3150 debuglog.info("") 3151 debuglog.info("Unused terminals:") 3152 debuglog.info("") 3153 for term in unused_terminals: 3154 errorlog.warning("Token '%s' defined, but not used", term) 3155 debuglog.info(" %s", term) 3156 3157 # Print out all productions to the debug log 3158 if debug: 3159 debuglog.info("") 3160 debuglog.info("Grammar") 3161 debuglog.info("") 3162 for n,p in enumerate(grammar.Productions): 3163 debuglog.info("Rule %-5d %s", n, p) 3164 3165 # Find unused non-terminals 3166 unused_rules = grammar.unused_rules() 3167 for prod in unused_rules: 3168 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 3169 3170 if len(unused_terminals) == 1: 3171 errorlog.warning("There is 1 unused token") 3172 if len(unused_terminals) > 1: 3173 errorlog.warning("There are %d unused tokens", len(unused_terminals)) 3174 3175 if len(unused_rules) == 1: 3176 errorlog.warning("There is 1 unused rule") 3177 if len(unused_rules) > 1: 3178 errorlog.warning("There are %d unused rules", len(unused_rules)) 3179 3180 if debug: 3181 debuglog.info("") 3182 debuglog.info("Terminals, with rules where they appear") 3183 debuglog.info("") 3184 terms = list(grammar.Terminals) 3185 terms.sort() 3186 for term in terms: 3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 3188 3189 debuglog.info("") 3190 debuglog.info("Nonterminals, with rules where they appear") 3191 debuglog.info("") 3192 nonterms = list(grammar.Nonterminals) 3193 nonterms.sort() 3194 for nonterm in nonterms: 3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 3196 debuglog.info("") 3197 3198 if check_recursion: 3199 unreachable = grammar.find_unreachable() 3200 for u in unreachable: 3201 errorlog.warning("Symbol '%s' is unreachable",u) 3202 3203 infinite = grammar.infinite_cycles() 3204 for inf in infinite: 3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf) 3206 errors = 1 3207 3208 unused_prec = grammar.unused_precedence() 3209 for term, assoc in unused_prec: 3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 3211 errors = 1 3212 3213 if errors: 3214 raise YaccError("Unable to build parser") 3215 3216 # Run the LRGeneratedTable on the grammar 3217 if debug: 3218 errorlog.debug("Generating %s tables", method) 3219 3220 lr = LRGeneratedTable(grammar,method,debuglog) 3221 3222 if debug: 3223 num_sr = len(lr.sr_conflicts) 3224 3225 # Report shift/reduce and reduce/reduce conflicts 3226 if num_sr == 1: 3227 errorlog.warning("1 shift/reduce conflict") 3228 elif num_sr > 1: 3229 errorlog.warning("%d shift/reduce conflicts", num_sr) 3230 3231 num_rr = len(lr.rr_conflicts) 3232 if num_rr == 1: 3233 errorlog.warning("1 reduce/reduce conflict") 3234 elif num_rr > 1: 3235 errorlog.warning("%d reduce/reduce conflicts", num_rr) 3236 3237 # Write out conflicts to the output file 3238 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3239 debuglog.warning("") 3240 debuglog.warning("Conflicts:") 3241 debuglog.warning("") 3242 3243 for state, tok, resolution in lr.sr_conflicts: 3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 3245 3246 already_reported = {} 3247 for state, rule, rejected in lr.rr_conflicts: 3248 if (state,id(rule),id(rejected)) in already_reported: 3249 continue 3250 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state) 3252 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state) 3254 already_reported[state,id(rule),id(rejected)] = 1 3255 3256 warned_never = [] 3257 for state, rule, rejected in lr.rr_conflicts: 3258 if not rejected.reduced and (rejected not in warned_never): 3259 debuglog.warning("Rule (%s) is never reduced", rejected) 3260 errorlog.warning("Rule (%s) is never reduced", rejected) 3261 warned_never.append(rejected) 3262 3263 # Write the table file if requested 3264 if write_tables: 3265 lr.write_table(tabmodule,outputdir,signature) 3266 3267 # Write a pickled version of the tables 3268 if picklefile: 3269 lr.pickle_table(picklefile,signature) 3270 3271 # Build the parser 3272 lr.bind_callables(pinfo.pdict) 3273 parser = LRParser(lr,pinfo.error_func) 3274 3275 parse = parser.parse 3276 return parser 3277