Home | History | Annotate | Download | only in antlr3
      1 """ANTLR3 runtime package"""
      2 
      3 # begin[licence]
      4 #
      5 # [The "BSD licence"]
      6 # Copyright (c) 2005-2008 Terence Parr
      7 # All rights reserved.
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 3. The name of the author may not be used to endorse or promote products
     18 #    derived from this software without specific prior written permission.
     19 #
     20 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21 # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     23 # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     24 # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     25 # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 #
     31 # end[licence]
     32 
     33 import sys
     34 import inspect
     35 
     36 from antlr3 import compatible_api_versions
     37 from antlr3.constants import DEFAULT_CHANNEL, HIDDEN_CHANNEL, EOF, \
     38      EOR_TOKEN_TYPE, INVALID_TOKEN_TYPE
     39 from antlr3.exceptions import RecognitionException, MismatchedTokenException, \
     40      MismatchedRangeException, MismatchedTreeNodeException, \
     41      NoViableAltException, EarlyExitException, MismatchedSetException, \
     42      MismatchedNotSetException, FailedPredicateException, \
     43      BacktrackingFailed, UnwantedTokenException, MissingTokenException
     44 from antlr3.tokens import CommonToken, SKIP_TOKEN
     45 from antlr3.compat import set, frozenset, reversed
     46 
     47 
     48 class RecognizerSharedState(object):
     49     """
     50     The set of fields needed by an abstract recognizer to recognize input
     51     and recover from errors etc...  As a separate state object, it can be
     52     shared among multiple grammars; e.g., when one grammar imports another.
     53 
     54     These fields are publically visible but the actual state pointer per
     55     parser is protected.
     56     """
     57 
     58     def __init__(self):
     59         # Track the set of token types that can follow any rule invocation.
     60         # Stack grows upwards.
     61         self.following = []
     62 
     63         # This is true when we see an error and before having successfully
     64         # matched a token.  Prevents generation of more than one error message
     65         # per error.
     66         self.errorRecovery = False
     67 
     68         # The index into the input stream where the last error occurred.
     69         # This is used to prevent infinite loops where an error is found
     70         # but no token is consumed during recovery...another error is found,
     71         # ad naseum.  This is a failsafe mechanism to guarantee that at least
     72         # one token/tree node is consumed for two errors.
     73         self.lastErrorIndex = -1
     74 
     75         # If 0, no backtracking is going on.  Safe to exec actions etc...
     76         # If >0 then it's the level of backtracking.
     77         self.backtracking = 0
     78 
     79         # An array[size num rules] of Map<Integer,Integer> that tracks
     80         # the stop token index for each rule.  ruleMemo[ruleIndex] is
     81         # the memoization table for ruleIndex.  For key ruleStartIndex, you
     82         # get back the stop token for associated rule or MEMO_RULE_FAILED.
     83         #
     84         # This is only used if rule memoization is on (which it is by default).
     85         self.ruleMemo = None
     86 
     87         ## Did the recognizer encounter a syntax error?  Track how many.
     88         self.syntaxErrors = 0
     89 
     90 
     91         # LEXER FIELDS (must be in same state object to avoid casting
     92         # constantly in generated code and Lexer object) :(
     93 
     94 
     95 	## The goal of all lexer rules/methods is to create a token object.
     96         # This is an instance variable as multiple rules may collaborate to
     97         # create a single token.  nextToken will return this object after
     98         # matching lexer rule(s).  If you subclass to allow multiple token
     99         # emissions, then set this to the last token to be matched or
    100         # something nonnull so that the auto token emit mechanism will not
    101         # emit another token.
    102         self.token = None
    103 
    104         ## What character index in the stream did the current token start at?
    105         # Needed, for example, to get the text for current token.  Set at
    106         # the start of nextToken.
    107         self.tokenStartCharIndex = -1
    108 
    109         ## The line on which the first character of the token resides
    110         self.tokenStartLine = None
    111 
    112         ## The character position of first character within the line
    113         self.tokenStartCharPositionInLine = None
    114 
    115         ## The channel number for the current token
    116         self.channel = None
    117 
    118         ## The token type for the current token
    119         self.type = None
    120 
    121         ## You can set the text for the current token to override what is in
    122         # the input char buffer.  Use setText() or can set this instance var.
    123         self.text = None
    124 
    125 
    126 class BaseRecognizer(object):
    127     """
    128     @brief Common recognizer functionality.
    129 
    130     A generic recognizer that can handle recognizers generated from
    131     lexer, parser, and tree grammars.  This is all the parsing
    132     support code essentially; most of it is error recovery stuff and
    133     backtracking.
    134     """
    135 
    136     MEMO_RULE_FAILED = -2
    137     MEMO_RULE_UNKNOWN = -1
    138 
    139     # copies from Token object for convenience in actions
    140     DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
    141 
    142     # for convenience in actions
    143     HIDDEN = HIDDEN_CHANNEL
    144 
    145     # overridden by generated subclasses
    146     tokenNames = None
    147 
    148     # The api_version attribute has been introduced in 3.3. If it is not
    149     # overwritten in the generated recognizer, we assume a default of v0.
    150     api_version = 0
    151 
    152     def __init__(self, state=None):
    153         # Input stream of the recognizer. Must be initialized by a subclass.
    154         self.input = None
    155 
    156         ## State of a lexer, parser, or tree parser are collected into a state
    157         # object so the state can be shared.  This sharing is needed to
    158         # have one grammar import others and share same error variables
    159         # and other state variables.  It's a kind of explicit multiple
    160         # inheritance via delegation of methods and shared state.
    161         if state is None:
    162             state = RecognizerSharedState()
    163         self._state = state
    164 
    165         if self.api_version not in compatible_api_versions:
    166             raise RuntimeError(
    167                 ("ANTLR version mismatch: "
    168                  "The recognizer has been generated with API V%s, "
    169                  "but this runtime does not support this.")
    170                 % self.api_version)
    171 
    172     # this one only exists to shut up pylint :(
    173     def setInput(self, input):
    174         self.input = input
    175 
    176 
    177     def reset(self):
    178         """
    179         reset the parser's state; subclasses must rewinds the input stream
    180         """
    181 
    182         # wack everything related to error recovery
    183         if self._state is None:
    184             # no shared state work to do
    185             return
    186 
    187         self._state.following = []
    188         self._state.errorRecovery = False
    189         self._state.lastErrorIndex = -1
    190         self._state.syntaxErrors = 0
    191         # wack everything related to backtracking and memoization
    192         self._state.backtracking = 0
    193         if self._state.ruleMemo is not None:
    194             self._state.ruleMemo = {}
    195 
    196 
    197     def match(self, input, ttype, follow):
    198         """
    199         Match current input symbol against ttype.  Attempt
    200         single token insertion or deletion error recovery.  If
    201         that fails, throw MismatchedTokenException.
    202 
    203         To turn off single token insertion or deletion error
    204         recovery, override recoverFromMismatchedToken() and have it
    205         throw an exception. See TreeParser.recoverFromMismatchedToken().
    206         This way any error in a rule will cause an exception and
    207         immediate exit from rule.  Rule would recover by resynchronizing
    208         to the set of symbols that can follow rule ref.
    209         """
    210 
    211         matchedSymbol = self.getCurrentInputSymbol(input)
    212         if self.input.LA(1) == ttype:
    213             self.input.consume()
    214             self._state.errorRecovery = False
    215             return matchedSymbol
    216 
    217         if self._state.backtracking > 0:
    218             # FIXME: need to return matchedSymbol here as well. damn!!
    219             raise BacktrackingFailed
    220 
    221         matchedSymbol = self.recoverFromMismatchedToken(input, ttype, follow)
    222         return matchedSymbol
    223 
    224 
    225     def matchAny(self, input):
    226         """Match the wildcard: in a symbol"""
    227 
    228         self._state.errorRecovery = False
    229         self.input.consume()
    230 
    231 
    232     def mismatchIsUnwantedToken(self, input, ttype):
    233         return input.LA(2) == ttype
    234 
    235 
    236     def mismatchIsMissingToken(self, input, follow):
    237         if follow is None:
    238             # we have no information about the follow; we can only consume
    239             # a single token and hope for the best
    240             return False
    241 
    242         # compute what can follow this grammar element reference
    243         if EOR_TOKEN_TYPE in follow:
    244             viableTokensFollowingThisRule = self.computeContextSensitiveRuleFOLLOW()
    245             follow = follow | viableTokensFollowingThisRule
    246 
    247             if len(self._state.following) > 0:
    248                 # remove EOR if we're not the start symbol
    249                 follow = follow - set([EOR_TOKEN_TYPE])
    250 
    251         # if current token is consistent with what could come after set
    252         # then we know we're missing a token; error recovery is free to
    253         # "insert" the missing token
    254         if input.LA(1) in follow or EOR_TOKEN_TYPE in follow:
    255             return True
    256 
    257         return False
    258 
    259 
    260     def reportError(self, e):
    261         """Report a recognition problem.
    262 
    263         This method sets errorRecovery to indicate the parser is recovering
    264         not parsing.  Once in recovery mode, no errors are generated.
    265         To get out of recovery mode, the parser must successfully match
    266         a token (after a resync).  So it will go:
    267 
    268         1. error occurs
    269         2. enter recovery mode, report error
    270         3. consume until token found in resynch set
    271         4. try to resume parsing
    272         5. next match() will reset errorRecovery mode
    273 
    274         If you override, make sure to update syntaxErrors if you care about
    275         that.
    276 
    277         """
    278 
    279         # if we've already reported an error and have not matched a token
    280         # yet successfully, don't report any errors.
    281         if self._state.errorRecovery:
    282             return
    283 
    284         self._state.syntaxErrors += 1 # don't count spurious
    285         self._state.errorRecovery = True
    286 
    287         self.displayRecognitionError(self.tokenNames, e)
    288 
    289 
    290     def displayRecognitionError(self, tokenNames, e):
    291         hdr = self.getErrorHeader(e)
    292         msg = self.getErrorMessage(e, tokenNames)
    293         self.emitErrorMessage(hdr+" "+msg)
    294 
    295 
    296     def getErrorMessage(self, e, tokenNames):
    297         """
    298         What error message should be generated for the various
    299         exception types?
    300 
    301         Not very object-oriented code, but I like having all error message
    302         generation within one method rather than spread among all of the
    303         exception classes. This also makes it much easier for the exception
    304         handling because the exception classes do not have to have pointers back
    305         to this object to access utility routines and so on. Also, changing
    306         the message for an exception type would be difficult because you
    307         would have to subclassing exception, but then somehow get ANTLR
    308         to make those kinds of exception objects instead of the default.
    309         This looks weird, but trust me--it makes the most sense in terms
    310         of flexibility.
    311 
    312         For grammar debugging, you will want to override this to add
    313         more information such as the stack frame with
    314         getRuleInvocationStack(e, this.getClass().getName()) and,
    315         for no viable alts, the decision description and state etc...
    316 
    317         Override this to change the message generated for one or more
    318         exception types.
    319         """
    320 
    321         if isinstance(e, UnwantedTokenException):
    322             tokenName = "<unknown>"
    323             if e.expecting == EOF:
    324                 tokenName = "EOF"
    325 
    326             else:
    327                 tokenName = self.tokenNames[e.expecting]
    328 
    329             msg = "extraneous input %s expecting %s" % (
    330                 self.getTokenErrorDisplay(e.getUnexpectedToken()),
    331                 tokenName
    332                 )
    333 
    334         elif isinstance(e, MissingTokenException):
    335             tokenName = "<unknown>"
    336             if e.expecting == EOF:
    337                 tokenName = "EOF"
    338 
    339             else:
    340                 tokenName = self.tokenNames[e.expecting]
    341 
    342             msg = "missing %s at %s" % (
    343                 tokenName, self.getTokenErrorDisplay(e.token)
    344                 )
    345 
    346         elif isinstance(e, MismatchedTokenException):
    347             tokenName = "<unknown>"
    348             if e.expecting == EOF:
    349                 tokenName = "EOF"
    350             else:
    351                 tokenName = self.tokenNames[e.expecting]
    352 
    353             msg = "mismatched input " \
    354                   + self.getTokenErrorDisplay(e.token) \
    355                   + " expecting " \
    356                   + tokenName
    357 
    358         elif isinstance(e, MismatchedTreeNodeException):
    359             tokenName = "<unknown>"
    360             if e.expecting == EOF:
    361                 tokenName = "EOF"
    362             else:
    363                 tokenName = self.tokenNames[e.expecting]
    364 
    365             msg = "mismatched tree node: %s expecting %s" \
    366                   % (e.node, tokenName)
    367 
    368         elif isinstance(e, NoViableAltException):
    369             msg = "no viable alternative at input " \
    370                   + self.getTokenErrorDisplay(e.token)
    371 
    372         elif isinstance(e, EarlyExitException):
    373             msg = "required (...)+ loop did not match anything at input " \
    374                   + self.getTokenErrorDisplay(e.token)
    375 
    376         elif isinstance(e, MismatchedSetException):
    377             msg = "mismatched input " \
    378                   + self.getTokenErrorDisplay(e.token) \
    379                   + " expecting set " \
    380                   + repr(e.expecting)
    381 
    382         elif isinstance(e, MismatchedNotSetException):
    383             msg = "mismatched input " \
    384                   + self.getTokenErrorDisplay(e.token) \
    385                   + " expecting set " \
    386                   + repr(e.expecting)
    387 
    388         elif isinstance(e, FailedPredicateException):
    389             msg = "rule " \
    390                   + e.ruleName \
    391                   + " failed predicate: {" \
    392                   + e.predicateText \
    393                   + "}?"
    394 
    395         else:
    396             msg = str(e)
    397 
    398         return msg
    399 
    400 
    401     def getNumberOfSyntaxErrors(self):
    402         """
    403         Get number of recognition errors (lexer, parser, tree parser).  Each
    404         recognizer tracks its own number.  So parser and lexer each have
    405         separate count.  Does not count the spurious errors found between
    406         an error and next valid token match
    407 
    408         See also reportError()
    409 	"""
    410         return self._state.syntaxErrors
    411 
    412 
    413     def getErrorHeader(self, e):
    414         """
    415         What is the error header, normally line/character position information?
    416         """
    417 
    418         source_name = self.getSourceName()
    419         if source_name is not None:
    420             return "%s line %d:%d" % (source_name, e.line, e.charPositionInLine)
    421         return "line %d:%d" % (e.line, e.charPositionInLine)
    422 
    423 
    424     def getTokenErrorDisplay(self, t):
    425         """
    426         How should a token be displayed in an error message? The default
    427         is to display just the text, but during development you might
    428         want to have a lot of information spit out.  Override in that case
    429         to use t.toString() (which, for CommonToken, dumps everything about
    430         the token). This is better than forcing you to override a method in
    431         your token objects because you don't have to go modify your lexer
    432         so that it creates a new Java type.
    433         """
    434 
    435         s = t.text
    436         if s is None:
    437             if t.type == EOF:
    438                 s = "<EOF>"
    439             else:
    440                 s = "<"+t.type+">"
    441 
    442         return repr(s)
    443 
    444 
    445     def emitErrorMessage(self, msg):
    446         """Override this method to change where error messages go"""
    447         sys.stderr.write(msg + '\n')
    448 
    449 
    450     def recover(self, input, re):
    451         """
    452         Recover from an error found on the input stream.  This is
    453         for NoViableAlt and mismatched symbol exceptions.  If you enable
    454         single token insertion and deletion, this will usually not
    455         handle mismatched symbol exceptions but there could be a mismatched
    456         token that the match() routine could not recover from.
    457         """
    458 
    459         # PROBLEM? what if input stream is not the same as last time
    460         # perhaps make lastErrorIndex a member of input
    461         if self._state.lastErrorIndex == input.index():
    462             # uh oh, another error at same token index; must be a case
    463             # where LT(1) is in the recovery token set so nothing is
    464             # consumed; consume a single token so at least to prevent
    465             # an infinite loop; this is a failsafe.
    466             input.consume()
    467 
    468         self._state.lastErrorIndex = input.index()
    469         followSet = self.computeErrorRecoverySet()
    470 
    471         self.beginResync()
    472         self.consumeUntil(input, followSet)
    473         self.endResync()
    474 
    475 
    476     def beginResync(self):
    477         """
    478         A hook to listen in on the token consumption during error recovery.
    479         The DebugParser subclasses this to fire events to the listenter.
    480         """
    481 
    482         pass
    483 
    484 
    485     def endResync(self):
    486         """
    487         A hook to listen in on the token consumption during error recovery.
    488         The DebugParser subclasses this to fire events to the listenter.
    489         """
    490 
    491         pass
    492 
    493 
    494     def computeErrorRecoverySet(self):
    495         """
    496         Compute the error recovery set for the current rule.  During
    497         rule invocation, the parser pushes the set of tokens that can
    498         follow that rule reference on the stack; this amounts to
    499         computing FIRST of what follows the rule reference in the
    500         enclosing rule. This local follow set only includes tokens
    501         from within the rule; i.e., the FIRST computation done by
    502         ANTLR stops at the end of a rule.
    503 
    504         EXAMPLE
    505 
    506         When you find a "no viable alt exception", the input is not
    507         consistent with any of the alternatives for rule r.  The best
    508         thing to do is to consume tokens until you see something that
    509         can legally follow a call to r *or* any rule that called r.
    510         You don't want the exact set of viable next tokens because the
    511         input might just be missing a token--you might consume the
    512         rest of the input looking for one of the missing tokens.
    513 
    514         Consider grammar:
    515 
    516         a : '[' b ']'
    517           | '(' b ')'
    518           ;
    519         b : c '^' INT ;
    520         c : ID
    521           | INT
    522           ;
    523 
    524         At each rule invocation, the set of tokens that could follow
    525         that rule is pushed on a stack.  Here are the various "local"
    526         follow sets:
    527 
    528         FOLLOW(b1_in_a) = FIRST(']') = ']'
    529         FOLLOW(b2_in_a) = FIRST(')') = ')'
    530         FOLLOW(c_in_b) = FIRST('^') = '^'
    531 
    532         Upon erroneous input "[]", the call chain is
    533 
    534         a -> b -> c
    535 
    536         and, hence, the follow context stack is:
    537 
    538         depth  local follow set     after call to rule
    539           0         \<EOF>                    a (from main())
    540           1          ']'                     b
    541           3          '^'                     c
    542 
    543         Notice that ')' is not included, because b would have to have
    544         been called from a different context in rule a for ')' to be
    545         included.
    546 
    547         For error recovery, we cannot consider FOLLOW(c)
    548         (context-sensitive or otherwise).  We need the combined set of
    549         all context-sensitive FOLLOW sets--the set of all tokens that
    550         could follow any reference in the call chain.  We need to
    551         resync to one of those tokens.  Note that FOLLOW(c)='^' and if
    552         we resync'd to that token, we'd consume until EOF.  We need to
    553         sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
    554         In this case, for input "[]", LA(1) is in this set so we would
    555         not consume anything and after printing an error rule c would
    556         return normally.  It would not find the required '^' though.
    557         At this point, it gets a mismatched token error and throws an
    558         exception (since LA(1) is not in the viable following token
    559         set).  The rule exception handler tries to recover, but finds
    560         the same recovery set and doesn't consume anything.  Rule b
    561         exits normally returning to rule a.  Now it finds the ']' (and
    562         with the successful match exits errorRecovery mode).
    563 
    564         So, you cna see that the parser walks up call chain looking
    565         for the token that was a member of the recovery set.
    566 
    567         Errors are not generated in errorRecovery mode.
    568 
    569         ANTLR's error recovery mechanism is based upon original ideas:
    570 
    571         "Algorithms + Data Structures = Programs" by Niklaus Wirth
    572 
    573         and
    574 
    575         "A note on error recovery in recursive descent parsers":
    576         http://portal.acm.org/citation.cfm?id=947902.947905
    577 
    578         Later, Josef Grosch had some good ideas:
    579 
    580         "Efficient and Comfortable Error Recovery in Recursive Descent
    581         Parsers":
    582         ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
    583 
    584         Like Grosch I implemented local FOLLOW sets that are combined
    585         at run-time upon error to avoid overhead during parsing.
    586         """
    587 
    588         return self.combineFollows(False)
    589 
    590 
    591     def computeContextSensitiveRuleFOLLOW(self):
    592         """
    593         Compute the context-sensitive FOLLOW set for current rule.
    594         This is set of token types that can follow a specific rule
    595         reference given a specific call chain.  You get the set of
    596         viable tokens that can possibly come next (lookahead depth 1)
    597         given the current call chain.  Contrast this with the
    598         definition of plain FOLLOW for rule r:
    599 
    600          FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
    601 
    602         where x in T* and alpha, beta in V*; T is set of terminals and
    603         V is the set of terminals and nonterminals.  In other words,
    604         FOLLOW(r) is the set of all tokens that can possibly follow
    605         references to r in *any* sentential form (context).  At
    606         runtime, however, we know precisely which context applies as
    607         we have the call chain.  We may compute the exact (rather
    608         than covering superset) set of following tokens.
    609 
    610         For example, consider grammar:
    611 
    612         stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
    613              | "return" expr '.'
    614              ;
    615         expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
    616         atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
    617              | '(' expr ')'
    618              ;
    619 
    620         The FOLLOW sets are all inclusive whereas context-sensitive
    621         FOLLOW sets are precisely what could follow a rule reference.
    622         For input input "i=(3);", here is the derivation:
    623 
    624         stat => ID '=' expr ';'
    625              => ID '=' atom ('+' atom)* ';'
    626              => ID '=' '(' expr ')' ('+' atom)* ';'
    627              => ID '=' '(' atom ')' ('+' atom)* ';'
    628              => ID '=' '(' INT ')' ('+' atom)* ';'
    629              => ID '=' '(' INT ')' ';'
    630 
    631         At the "3" token, you'd have a call chain of
    632 
    633           stat -> expr -> atom -> expr -> atom
    634 
    635         What can follow that specific nested ref to atom?  Exactly ')'
    636         as you can see by looking at the derivation of this specific
    637         input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
    638 
    639         You want the exact viable token set when recovering from a
    640         token mismatch.  Upon token mismatch, if LA(1) is member of
    641         the viable next token set, then you know there is most likely
    642         a missing token in the input stream.  "Insert" one by just not
    643         throwing an exception.
    644         """
    645 
    646         return self.combineFollows(True)
    647 
    648 
    649     def combineFollows(self, exact):
    650         followSet = set()
    651         for idx, localFollowSet in reversed(list(enumerate(self._state.following))):
    652             followSet |= localFollowSet
    653             if exact:
    654                 # can we see end of rule?
    655                 if EOR_TOKEN_TYPE in localFollowSet:
    656                     # Only leave EOR in set if at top (start rule); this lets
    657                     # us know if have to include follow(start rule); i.e., EOF
    658                     if idx > 0:
    659                         followSet.remove(EOR_TOKEN_TYPE)
    660 
    661                 else:
    662                     # can't see end of rule, quit
    663                     break
    664 
    665         return followSet
    666 
    667 
    668     def recoverFromMismatchedToken(self, input, ttype, follow):
    669         """Attempt to recover from a single missing or extra token.
    670 
    671         EXTRA TOKEN
    672 
    673         LA(1) is not what we are looking for.  If LA(2) has the right token,
    674         however, then assume LA(1) is some extra spurious token.  Delete it
    675         and LA(2) as if we were doing a normal match(), which advances the
    676         input.
    677 
    678         MISSING TOKEN
    679 
    680         If current token is consistent with what could come after
    681         ttype then it is ok to 'insert' the missing token, else throw
    682         exception For example, Input 'i=(3;' is clearly missing the
    683         ')'.  When the parser returns from the nested call to expr, it
    684         will have call chain:
    685 
    686           stat -> expr -> atom
    687 
    688         and it will be trying to match the ')' at this point in the
    689         derivation:
    690 
    691              => ID '=' '(' INT ')' ('+' atom)* ';'
    692                                 ^
    693         match() will see that ';' doesn't match ')' and report a
    694         mismatched token error.  To recover, it sees that LA(1)==';'
    695         is in the set of tokens that can follow the ')' token
    696         reference in rule atom.  It can assume that you forgot the ')'.
    697         """
    698 
    699         e = None
    700 
    701         # if next token is what we are looking for then "delete" this token
    702         if self.mismatchIsUnwantedToken(input, ttype):
    703             e = UnwantedTokenException(ttype, input)
    704 
    705             self.beginResync()
    706             input.consume() # simply delete extra token
    707             self.endResync()
    708 
    709             # report after consuming so AW sees the token in the exception
    710             self.reportError(e)
    711 
    712             # we want to return the token we're actually matching
    713             matchedSymbol = self.getCurrentInputSymbol(input)
    714 
    715             # move past ttype token as if all were ok
    716             input.consume()
    717             return matchedSymbol
    718 
    719         # can't recover with single token deletion, try insertion
    720         if self.mismatchIsMissingToken(input, follow):
    721             inserted = self.getMissingSymbol(input, e, ttype, follow)
    722             e = MissingTokenException(ttype, input, inserted)
    723 
    724             # report after inserting so AW sees the token in the exception
    725             self.reportError(e)
    726             return inserted
    727 
    728         # even that didn't work; must throw the exception
    729         e = MismatchedTokenException(ttype, input)
    730         raise e
    731 
    732 
    733     def recoverFromMismatchedSet(self, input, e, follow):
    734         """Not currently used"""
    735 
    736         if self.mismatchIsMissingToken(input, follow):
    737             self.reportError(e)
    738             # we don't know how to conjure up a token for sets yet
    739             return self.getMissingSymbol(input, e, INVALID_TOKEN_TYPE, follow)
    740 
    741         # TODO do single token deletion like above for Token mismatch
    742         raise e
    743 
    744 
    745     def getCurrentInputSymbol(self, input):
    746         """
    747         Match needs to return the current input symbol, which gets put
    748         into the label for the associated token ref; e.g., x=ID.  Token
    749         and tree parsers need to return different objects. Rather than test
    750         for input stream type or change the IntStream interface, I use
    751         a simple method to ask the recognizer to tell me what the current
    752         input symbol is.
    753 
    754         This is ignored for lexers.
    755         """
    756 
    757         return None
    758 
    759 
    760     def getMissingSymbol(self, input, e, expectedTokenType, follow):
    761         """Conjure up a missing token during error recovery.
    762 
    763         The recognizer attempts to recover from single missing
    764         symbols. But, actions might refer to that missing symbol.
    765         For example, x=ID {f($x);}. The action clearly assumes
    766         that there has been an identifier matched previously and that
    767         $x points at that token. If that token is missing, but
    768         the next token in the stream is what we want we assume that
    769         this token is missing and we keep going. Because we
    770         have to return some token to replace the missing token,
    771         we have to conjure one up. This method gives the user control
    772         over the tokens returned for missing tokens. Mostly,
    773         you will want to create something special for identifier
    774         tokens. For literals such as '{' and ',', the default
    775         action in the parser or tree parser works. It simply creates
    776         a CommonToken of the appropriate type. The text will be the token.
    777         If you change what tokens must be created by the lexer,
    778         override this method to create the appropriate tokens.
    779         """
    780 
    781         return None
    782 
    783 
    784 ##     def recoverFromMissingElement(self, input, e, follow):
    785 ##         """
    786 ##         This code is factored out from mismatched token and mismatched set
    787 ##         recovery.  It handles "single token insertion" error recovery for
    788 ##         both.  No tokens are consumed to recover from insertions.  Return
    789 ##         true if recovery was possible else return false.
    790 ##         """
    791 
    792 ##         if self.mismatchIsMissingToken(input, follow):
    793 ##             self.reportError(e)
    794 ##             return True
    795 
    796 ##         # nothing to do; throw exception
    797 ##         return False
    798 
    799 
    800     def consumeUntil(self, input, tokenTypes):
    801         """
    802         Consume tokens until one matches the given token or token set
    803 
    804         tokenTypes can be a single token type or a set of token types
    805 
    806         """
    807 
    808         if not isinstance(tokenTypes, (set, frozenset)):
    809             tokenTypes = frozenset([tokenTypes])
    810 
    811         ttype = input.LA(1)
    812         while ttype != EOF and ttype not in tokenTypes:
    813             input.consume()
    814             ttype = input.LA(1)
    815 
    816 
    817     def getRuleInvocationStack(self):
    818         """
    819         Return List<String> of the rules in your parser instance
    820         leading up to a call to this method.  You could override if
    821         you want more details such as the file/line info of where
    822         in the parser java code a rule is invoked.
    823 
    824         This is very useful for error messages and for context-sensitive
    825         error recovery.
    826 
    827         You must be careful, if you subclass a generated recognizers.
    828         The default implementation will only search the module of self
    829         for rules, but the subclass will not contain any rules.
    830         You probably want to override this method to look like
    831 
    832         def getRuleInvocationStack(self):
    833             return self._getRuleInvocationStack(<class>.__module__)
    834 
    835         where <class> is the class of the generated recognizer, e.g.
    836         the superclass of self.
    837         """
    838 
    839         return self._getRuleInvocationStack(self.__module__)
    840 
    841 
    842     def _getRuleInvocationStack(cls, module):
    843         """
    844         A more general version of getRuleInvocationStack where you can
    845         pass in, for example, a RecognitionException to get it's rule
    846         stack trace.  This routine is shared with all recognizers, hence,
    847         static.
    848 
    849         TODO: move to a utility class or something; weird having lexer call
    850         this
    851         """
    852 
    853         # mmmhhh,... perhaps look at the first argument
    854         # (f_locals[co_varnames[0]]?) and test if it's a (sub)class of
    855         # requested recognizer...
    856 
    857         rules = []
    858         for frame in reversed(inspect.stack()):
    859             code = frame[0].f_code
    860             codeMod = inspect.getmodule(code)
    861             if codeMod is None:
    862                 continue
    863 
    864             # skip frames not in requested module
    865             if codeMod.__name__ != module:
    866                 continue
    867 
    868             # skip some unwanted names
    869             if code.co_name in ('nextToken', '<module>'):
    870                 continue
    871 
    872             rules.append(code.co_name)
    873 
    874         return rules
    875 
    876     _getRuleInvocationStack = classmethod(_getRuleInvocationStack)
    877 
    878 
    879     def getBacktrackingLevel(self):
    880         return self._state.backtracking
    881 
    882     def setBacktrackingLevel(self, n):
    883         self._state.backtracking = n
    884 
    885 
    886     def getGrammarFileName(self):
    887         """For debugging and other purposes, might want the grammar name.
    888 
    889         Have ANTLR generate an implementation for this method.
    890         """
    891 
    892         return self.grammarFileName
    893 
    894 
    895     def getSourceName(self):
    896         raise NotImplementedError
    897 
    898 
    899     def toStrings(self, tokens):
    900         """A convenience method for use most often with template rewrites.
    901 
    902         Convert a List<Token> to List<String>
    903         """
    904 
    905         if tokens is None:
    906             return None
    907 
    908         return [token.text for token in tokens]
    909 
    910 
    911     def getRuleMemoization(self, ruleIndex, ruleStartIndex):
    912         """
    913         Given a rule number and a start token index number, return
    914         MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
    915         start index.  If this rule has parsed input starting from the
    916         start index before, then return where the rule stopped parsing.
    917         It returns the index of the last token matched by the rule.
    918         """
    919 
    920         if ruleIndex not in self._state.ruleMemo:
    921             self._state.ruleMemo[ruleIndex] = {}
    922 
    923         return self._state.ruleMemo[ruleIndex].get(
    924             ruleStartIndex, self.MEMO_RULE_UNKNOWN
    925             )
    926 
    927 
    928     def alreadyParsedRule(self, input, ruleIndex):
    929         """
    930         Has this rule already parsed input at the current index in the
    931         input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
    932         If we attempted but failed to parse properly before, return
    933         MEMO_RULE_FAILED.
    934 
    935         This method has a side-effect: if we have seen this input for
    936         this rule and successfully parsed before, then seek ahead to
    937         1 past the stop token matched for this rule last time.
    938         """
    939 
    940         stopIndex = self.getRuleMemoization(ruleIndex, input.index())
    941         if stopIndex == self.MEMO_RULE_UNKNOWN:
    942             return False
    943 
    944         if stopIndex == self.MEMO_RULE_FAILED:
    945             raise BacktrackingFailed
    946 
    947         else:
    948             input.seek(stopIndex + 1)
    949 
    950         return True
    951 
    952 
    953     def memoize(self, input, ruleIndex, ruleStartIndex, success):
    954         """
    955         Record whether or not this rule parsed the input at this position
    956         successfully.
    957         """
    958 
    959         if success:
    960             stopTokenIndex = input.index() - 1
    961         else:
    962             stopTokenIndex = self.MEMO_RULE_FAILED
    963 
    964         if ruleIndex in self._state.ruleMemo:
    965             self._state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex
    966 
    967 
    968     def traceIn(self, ruleName, ruleIndex, inputSymbol):
    969         sys.stdout.write("enter %s %s" % (ruleName, inputSymbol))
    970 
    971         if self._state.backtracking > 0:
    972             sys.stdout.write(" backtracking=%s" % self._state.backtracking)
    973 
    974         sys.stdout.write('\n')
    975 
    976 
    977     def traceOut(self, ruleName, ruleIndex, inputSymbol):
    978         sys.stdout.write("exit %s %s" % (ruleName, inputSymbol))
    979 
    980         if self._state.backtracking > 0:
    981             sys.stdout.write(" backtracking=%s" % self._state.backtracking)
    982 
    983         # mmmm... we use BacktrackingFailed exceptions now. So how could we
    984         # get that information here?
    985         #if self._state.failed:
    986         #    sys.stdout.write(" failed")
    987         #else:
    988         #    sys.stdout.write(" succeeded")
    989 
    990         sys.stdout.write('\n')
    991 
    992 
    993 class TokenSource(object):
    994     """
    995     @brief Abstract baseclass for token producers.
    996 
    997     A source of tokens must provide a sequence of tokens via nextToken()
    998     and also must reveal it's source of characters; CommonToken's text is
    999     computed from a CharStream; it only store indices into the char stream.
   1000 
   1001     Errors from the lexer are never passed to the parser.  Either you want
   1002     to keep going or you do not upon token recognition error.  If you do not
   1003     want to continue lexing then you do not want to continue parsing.  Just
   1004     throw an exception not under RecognitionException and Java will naturally
   1005     toss you all the way out of the recognizers.  If you want to continue
   1006     lexing then you should not throw an exception to the parser--it has already
   1007     requested a token.  Keep lexing until you get a valid one.  Just report
   1008     errors and keep going, looking for a valid token.
   1009     """
   1010 
   1011     def nextToken(self):
   1012         """Return a Token object from your input stream (usually a CharStream).
   1013 
   1014         Do not fail/return upon lexing error; keep chewing on the characters
   1015         until you get a good one; errors are not passed through to the parser.
   1016         """
   1017 
   1018         raise NotImplementedError
   1019 
   1020 
   1021     def __iter__(self):
   1022         """The TokenSource is an interator.
   1023 
   1024         The iteration will not include the final EOF token, see also the note
   1025         for the next() method.
   1026 
   1027         """
   1028 
   1029         return self
   1030 
   1031 
   1032     def next(self):
   1033         """Return next token or raise StopIteration.
   1034 
   1035         Note that this will raise StopIteration when hitting the EOF token,
   1036         so EOF will not be part of the iteration.
   1037 
   1038         """
   1039 
   1040         token = self.nextToken()
   1041         if token is None or token.type == EOF:
   1042             raise StopIteration
   1043         return token
   1044 
   1045 
   1046 class Lexer(BaseRecognizer, TokenSource):
   1047     """
   1048     @brief Baseclass for generated lexer classes.
   1049 
   1050     A lexer is recognizer that draws input symbols from a character stream.
   1051     lexer grammars result in a subclass of this object. A Lexer object
   1052     uses simplified match() and error recovery mechanisms in the interest
   1053     of speed.
   1054     """
   1055 
   1056     def __init__(self, input, state=None):
   1057         BaseRecognizer.__init__(self, state)
   1058         TokenSource.__init__(self)
   1059 
   1060         # Where is the lexer drawing characters from?
   1061         self.input = input
   1062 
   1063 
   1064     def reset(self):
   1065         BaseRecognizer.reset(self) # reset all recognizer state variables
   1066 
   1067         if self.input is not None:
   1068             # rewind the input
   1069             self.input.seek(0)
   1070 
   1071         if self._state is None:
   1072             # no shared state work to do
   1073             return
   1074 
   1075         # wack Lexer state variables
   1076         self._state.token = None
   1077         self._state.type = INVALID_TOKEN_TYPE
   1078         self._state.channel = DEFAULT_CHANNEL
   1079         self._state.tokenStartCharIndex = -1
   1080         self._state.tokenStartLine = -1
   1081         self._state.tokenStartCharPositionInLine = -1
   1082         self._state.text = None
   1083 
   1084 
   1085     def makeEOFToken(self):
   1086         eof = CommonToken(
   1087             type=EOF, channel=DEFAULT_CHANNEL,
   1088             input=self.input,
   1089             start=self.input.index(), stop=self.input.index())
   1090         eof.line = self.input.line
   1091         eof.charPositionInLine = self.input.charPositionInLine
   1092         return eof
   1093 
   1094     def nextToken(self):
   1095         """
   1096         Return a token from this source; i.e., match a token on the char
   1097         stream.
   1098         """
   1099 
   1100         while 1:
   1101             self._state.token = None
   1102             self._state.channel = DEFAULT_CHANNEL
   1103             self._state.tokenStartCharIndex = self.input.index()
   1104             self._state.tokenStartCharPositionInLine = self.input.charPositionInLine
   1105             self._state.tokenStartLine = self.input.line
   1106             self._state.text = None
   1107             if self.input.LA(1) == EOF:
   1108                 return self.makeEOFToken()
   1109 
   1110             try:
   1111                 self.mTokens()
   1112 
   1113                 if self._state.token is None:
   1114                     self.emit()
   1115 
   1116                 elif self._state.token == SKIP_TOKEN:
   1117                     continue
   1118 
   1119                 return self._state.token
   1120 
   1121             except NoViableAltException, re:
   1122                 self.reportError(re)
   1123                 self.recover(re) # throw out current char and try again
   1124 
   1125             except RecognitionException, re:
   1126                 self.reportError(re)
   1127                 # match() routine has already called recover()
   1128 
   1129 
   1130     def skip(self):
   1131         """
   1132         Instruct the lexer to skip creating a token for current lexer rule
   1133         and look for another token.  nextToken() knows to keep looking when
   1134         a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
   1135         if token==null at end of any token rule, it creates one for you
   1136         and emits it.
   1137         """
   1138 
   1139         self._state.token = SKIP_TOKEN
   1140 
   1141 
   1142     def mTokens(self):
   1143         """This is the lexer entry point that sets instance var 'token'"""
   1144 
   1145         # abstract method
   1146         raise NotImplementedError
   1147 
   1148 
   1149     def setCharStream(self, input):
   1150         """Set the char stream and reset the lexer"""
   1151         self.input = None
   1152         self.reset()
   1153         self.input = input
   1154 
   1155 
   1156     def getSourceName(self):
   1157         return self.input.getSourceName()
   1158 
   1159 
   1160     def emit(self, token=None):
   1161         """
   1162         The standard method called to automatically emit a token at the
   1163         outermost lexical rule.  The token object should point into the
   1164         char buffer start..stop.  If there is a text override in 'text',
   1165         use that to set the token's text.  Override this method to emit
   1166         custom Token objects.
   1167 
   1168         If you are building trees, then you should also override
   1169         Parser or TreeParser.getMissingSymbol().
   1170         """
   1171 
   1172         if token is None:
   1173             token = CommonToken(
   1174                 input=self.input,
   1175                 type=self._state.type,
   1176                 channel=self._state.channel,
   1177                 start=self._state.tokenStartCharIndex,
   1178                 stop=self.getCharIndex()-1
   1179                 )
   1180             token.line = self._state.tokenStartLine
   1181             token.text = self._state.text
   1182             token.charPositionInLine = self._state.tokenStartCharPositionInLine
   1183 
   1184         self._state.token = token
   1185 
   1186         return token
   1187 
   1188 
   1189     def match(self, s):
   1190         if isinstance(s, basestring):
   1191             for c in s:
   1192                 if self.input.LA(1) != ord(c):
   1193                     if self._state.backtracking > 0:
   1194                         raise BacktrackingFailed
   1195 
   1196                     mte = MismatchedTokenException(c, self.input)
   1197                     self.recover(mte)
   1198                     raise mte
   1199 
   1200                 self.input.consume()
   1201 
   1202         else:
   1203             if self.input.LA(1) != s:
   1204                 if self._state.backtracking > 0:
   1205                     raise BacktrackingFailed
   1206 
   1207                 mte = MismatchedTokenException(unichr(s), self.input)
   1208                 self.recover(mte) # don't really recover; just consume in lexer
   1209                 raise mte
   1210 
   1211             self.input.consume()
   1212 
   1213 
   1214     def matchAny(self):
   1215         self.input.consume()
   1216 
   1217 
   1218     def matchRange(self, a, b):
   1219         if self.input.LA(1) < a or self.input.LA(1) > b:
   1220             if self._state.backtracking > 0:
   1221                 raise BacktrackingFailed
   1222 
   1223             mre = MismatchedRangeException(unichr(a), unichr(b), self.input)
   1224             self.recover(mre)
   1225             raise mre
   1226 
   1227         self.input.consume()
   1228 
   1229 
   1230     def getLine(self):
   1231         return self.input.line
   1232 
   1233 
   1234     def getCharPositionInLine(self):
   1235         return self.input.charPositionInLine
   1236 
   1237 
   1238     def getCharIndex(self):
   1239         """What is the index of the current character of lookahead?"""
   1240 
   1241         return self.input.index()
   1242 
   1243 
   1244     def getText(self):
   1245         """
   1246         Return the text matched so far for the current token or any
   1247         text override.
   1248         """
   1249         if self._state.text is not None:
   1250             return self._state.text
   1251 
   1252         return self.input.substring(
   1253             self._state.tokenStartCharIndex,
   1254             self.getCharIndex()-1
   1255             )
   1256 
   1257 
   1258     def setText(self, text):
   1259         """
   1260         Set the complete text of this token; it wipes any previous
   1261         changes to the text.
   1262         """
   1263         self._state.text = text
   1264 
   1265 
   1266     text = property(getText, setText)
   1267 
   1268 
   1269     def reportError(self, e):
   1270         ## TODO: not thought about recovery in lexer yet.
   1271 
   1272         ## # if we've already reported an error and have not matched a token
   1273         ## # yet successfully, don't report any errors.
   1274         ## if self.errorRecovery:
   1275         ##     #System.err.print("[SPURIOUS] ");
   1276         ##     return;
   1277         ##
   1278         ## self.errorRecovery = True
   1279 
   1280         self.displayRecognitionError(self.tokenNames, e)
   1281 
   1282 
   1283     def getErrorMessage(self, e, tokenNames):
   1284         msg = None
   1285 
   1286         if isinstance(e, MismatchedTokenException):
   1287             msg = "mismatched character " \
   1288                   + self.getCharErrorDisplay(e.c) \
   1289                   + " expecting " \
   1290                   + self.getCharErrorDisplay(e.expecting)
   1291 
   1292         elif isinstance(e, NoViableAltException):
   1293             msg = "no viable alternative at character " \
   1294                   + self.getCharErrorDisplay(e.c)
   1295 
   1296         elif isinstance(e, EarlyExitException):
   1297             msg = "required (...)+ loop did not match anything at character " \
   1298                   + self.getCharErrorDisplay(e.c)
   1299 
   1300         elif isinstance(e, MismatchedNotSetException):
   1301             msg = "mismatched character " \
   1302                   + self.getCharErrorDisplay(e.c) \
   1303                   + " expecting set " \
   1304                   + repr(e.expecting)
   1305 
   1306         elif isinstance(e, MismatchedSetException):
   1307             msg = "mismatched character " \
   1308                   + self.getCharErrorDisplay(e.c) \
   1309                   + " expecting set " \
   1310                   + repr(e.expecting)
   1311 
   1312         elif isinstance(e, MismatchedRangeException):
   1313             msg = "mismatched character " \
   1314                   + self.getCharErrorDisplay(e.c) \
   1315                   + " expecting set " \
   1316                   + self.getCharErrorDisplay(e.a) \
   1317                   + ".." \
   1318                   + self.getCharErrorDisplay(e.b)
   1319 
   1320         else:
   1321             msg = BaseRecognizer.getErrorMessage(self, e, tokenNames)
   1322 
   1323         return msg
   1324 
   1325 
   1326     def getCharErrorDisplay(self, c):
   1327         if c == EOF:
   1328             c = '<EOF>'
   1329         return repr(c)
   1330 
   1331 
   1332     def recover(self, re):
   1333         """
   1334         Lexers can normally match any char in it's vocabulary after matching
   1335         a token, so do the easy thing and just kill a character and hope
   1336         it all works out.  You can instead use the rule invocation stack
   1337         to do sophisticated error recovery if you are in a fragment rule.
   1338         """
   1339 
   1340         self.input.consume()
   1341 
   1342 
   1343     def traceIn(self, ruleName, ruleIndex):
   1344         inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
   1345                                          self.getLine(),
   1346                                          self.getCharPositionInLine()
   1347                                          )
   1348 
   1349         BaseRecognizer.traceIn(self, ruleName, ruleIndex, inputSymbol)
   1350 
   1351 
   1352     def traceOut(self, ruleName, ruleIndex):
   1353         inputSymbol = "%s line=%d:%s" % (self.input.LT(1),
   1354                                          self.getLine(),
   1355                                          self.getCharPositionInLine()
   1356                                          )
   1357 
   1358         BaseRecognizer.traceOut(self, ruleName, ruleIndex, inputSymbol)
   1359 
   1360 
   1361 
   1362 class Parser(BaseRecognizer):
   1363     """
   1364     @brief Baseclass for generated parser classes.
   1365     """
   1366 
   1367     def __init__(self, lexer, state=None):
   1368         BaseRecognizer.__init__(self, state)
   1369 
   1370         self.input = lexer
   1371 
   1372 
   1373     def reset(self):
   1374         BaseRecognizer.reset(self) # reset all recognizer state variables
   1375         if self.input is not None:
   1376             self.input.seek(0) # rewind the input
   1377 
   1378 
   1379     def getCurrentInputSymbol(self, input):
   1380         return input.LT(1)
   1381 
   1382 
   1383     def getMissingSymbol(self, input, e, expectedTokenType, follow):
   1384         if expectedTokenType == EOF:
   1385             tokenText = "<missing EOF>"
   1386         else:
   1387             tokenText = "<missing " + self.tokenNames[expectedTokenType] + ">"
   1388         t = CommonToken(type=expectedTokenType, text=tokenText)
   1389         current = input.LT(1)
   1390         if current.type == EOF:
   1391             current = input.LT(-1)
   1392 
   1393         if current is not None:
   1394             t.line = current.line
   1395             t.charPositionInLine = current.charPositionInLine
   1396         t.channel = DEFAULT_CHANNEL
   1397         return t
   1398 
   1399 
   1400     def setTokenStream(self, input):
   1401         """Set the token stream and reset the parser"""
   1402 
   1403         self.input = None
   1404         self.reset()
   1405         self.input = input
   1406 
   1407 
   1408     def getTokenStream(self):
   1409         return self.input
   1410 
   1411 
   1412     def getSourceName(self):
   1413         return self.input.getSourceName()
   1414 
   1415 
   1416     def traceIn(self, ruleName, ruleIndex):
   1417         BaseRecognizer.traceIn(self, ruleName, ruleIndex, self.input.LT(1))
   1418 
   1419 
   1420     def traceOut(self, ruleName, ruleIndex):
   1421         BaseRecognizer.traceOut(self, ruleName, ruleIndex, self.input.LT(1))
   1422 
   1423 
   1424 class RuleReturnScope(object):
   1425     """
   1426     Rules can return start/stop info as well as possible trees and templates.
   1427     """
   1428 
   1429     def getStart(self):
   1430         """Return the start token or tree."""
   1431         return None
   1432 
   1433 
   1434     def getStop(self):
   1435         """Return the stop token or tree."""
   1436         return None
   1437 
   1438 
   1439     def getTree(self):
   1440         """Has a value potentially if output=AST."""
   1441         return None
   1442 
   1443 
   1444     def getTemplate(self):
   1445         """Has a value potentially if output=template."""
   1446         return None
   1447 
   1448 
   1449 class ParserRuleReturnScope(RuleReturnScope):
   1450     """
   1451     Rules that return more than a single value must return an object
   1452     containing all the values.  Besides the properties defined in
   1453     RuleLabelScope.predefinedRulePropertiesScope there may be user-defined
   1454     return values.  This class simply defines the minimum properties that
   1455     are always defined and methods to access the others that might be
   1456     available depending on output option such as template and tree.
   1457 
   1458     Note text is not an actual property of the return value, it is computed
   1459     from start and stop using the input stream's toString() method.  I
   1460     could add a ctor to this so that we can pass in and store the input
   1461     stream, but I'm not sure we want to do that.  It would seem to be undefined
   1462     to get the .text property anyway if the rule matches tokens from multiple
   1463     input streams.
   1464 
   1465     I do not use getters for fields of objects that are used simply to
   1466     group values such as this aggregate.  The getters/setters are there to
   1467     satisfy the superclass interface.
   1468     """
   1469 
   1470     def __init__(self):
   1471         self.start = None
   1472         self.stop = None
   1473         self.tree = None  # only used when output=AST
   1474 
   1475 
   1476     def getStart(self):
   1477         return self.start
   1478 
   1479 
   1480     def getStop(self):
   1481         return self.stop
   1482 
   1483 
   1484     def getTree(self):
   1485         return self.tree
   1486