Home | History | Annotate | Download | only in runtime
      1 /** A generic recognizer that can handle recognizers generated from
      2  *  lexer, parser, and tree grammars.  This is all the parsing
      3  *  support code essentially; most of it is error recovery stuff and
      4  *  backtracking.
      5  *
      6  *  <p>This class should not be instantiated directly.  Instead, use one of its
      7  *  subclasses.</p>
      8  *
      9  *  @class
     10  *  @param {org.antlr.runtime.RecognizerSharedState} [state] state object with
     11  *      which to initialize this recognizer.
     12  */
     13 org.antlr.runtime.BaseRecognizer = function(state) {
     14     /** State of a lexer, parser, or tree parser are collected into a state
     15      *  object so the state can be shared.  This sharing is needed to
     16      *  have one grammar import others and share same error variables
     17      *  and other state variables.  It's a kind of explicit multiple
     18      *  inheritance via delegation of methods and shared state.
     19      *  @type org.antlr.runtime.RecognizerSharedState
     20      */
     21     this.state = state || new org.antlr.runtime.RecognizerSharedState();
     22 };
     23 
     24 org.antlr.lang.augmentObject(org.antlr.runtime.BaseRecognizer, {
     25     /**
     26      * @memberOf org.antlr.runtime.BaseRecognizer
     27      * @type Number
     28      */
     29     MEMO_RULE_FAILED: -2,
     30 
     31     /**
     32      * @memberOf org.antlr.runtime.BaseRecognizer
     33      * @type Number
     34      */
     35     MEMO_RULE_UNKNOWN: -1,
     36 
     37     /**
     38      * @memberOf org.antlr.runtime.BaseRecognizer
     39      * @type Number
     40      */
     41     INITIAL_FOLLOW_STACK_SIZE: 100,
     42 
     43     /**
     44      * @memberOf org.antlr.runtime.BaseRecognizer
     45      * @type Number
     46      */
     47     MEMO_RULE_FAILED_I: -2,
     48 
     49     /**
     50      * @memberOf org.antlr.runtime.BaseRecognizer
     51      * @type Number
     52      */
     53     DEFAULT_TOKEN_CHANNEL: org.antlr.runtime.Token.DEFAULT_CHANNEL,
     54 
     55     /**
     56      * @memberOf org.antlr.runtime.BaseRecognizer
     57      * @type Number
     58      */
     59     HIDDEN: org.antlr.runtime.Token.HIDDEN_CHANNEL,
     60 
     61     /**
     62      * @memberOf org.antlr.runtime.BaseRecognizer
     63      * @type String
     64      */
     65     NEXT_TOKEN_RULE_NAME: "nextToken"
     66 });
     67 
     68 org.antlr.runtime.BaseRecognizer.prototype = {
     69     /** Reset the parser's state.  Subclasses must rewinds the input stream */
     70     reset: function() {
     71         var i, len;
     72 
     73         // wack everything related to error recovery
     74         if (!this.state) {
     75             return; // no shared state work to do
     76         }
     77         this.state._fsp = -1;
     78         this.state.errorRecovery = false;
     79         this.state.lastErrorIndex = -1;
     80         this.state.failed = false;
     81         this.state.syntaxErrors = 0;
     82         // wack everything related to backtracking and memoization
     83         this.state.backtracking = 0;
     84         // wipe cache
     85         if (this.state.ruleMemo) {
     86             for (i=0, len=this.state.ruleMemo.length; i<len; i++) {
     87                 this.state.ruleMemo[i] = null;
     88             }
     89         }
     90     },
     91 
     92     /** Match current input symbol against ttype.  Attempt
     93      *  single token insertion or deletion error recovery.  If
     94      *  that fails, throw {@link org.antlr.runtime.MismatchedTokenException}.
     95      *
     96      *  <p>To turn off single token insertion or deletion error
     97      *  recovery, override {@link #mismatchRecover} and have it call
     98      *  plain {@link #mismatch}, which does not recover.  Then any error
     99      *  in a rule will cause an exception and immediate exit from
    100      *  rule.  Rule would recover by resynchronizing to the set of
    101      *  symbols that can follow rule ref.</p>
    102      *
    103      *  @param {org.antlr.runtime.IntStream} input input stream to match against.
    104      *  @param {Number} ttype  input type to match.
    105      *  @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
    106      *      matched token.
    107      *  @returns {Object} the matched symbol
    108      */
    109     match: function(input, ttype, follow) {
    110         var matchedSymbol = this.getCurrentInputSymbol(input);
    111         if ( input.LA(1)===ttype ) {
    112             input.consume();
    113             this.state.errorRecovery = false;
    114             this.state.failed = false;
    115             return matchedSymbol;
    116         }
    117         if ( this.state.backtracking>0 ) {
    118             this.state.failed = true;
    119             return matchedSymbol;
    120         }
    121         matchedSymbol = this.recoverFromMismatchedToken(input, ttype, follow);
    122         return matchedSymbol;
    123     },
    124 
    125     /**
    126      * Match any token.
    127      * @param {org.antlr.runtime.IntStream} input input stream to match against.
    128      */
    129     matchAny: function(input) {
    130         this.state.errorRecovery = false;
    131         this.state.failed = false;
    132         input.consume();
    133     },
    134 
    135     /**
    136      * Is the following token (LA(2)) the unwanted type (ttype)?
    137      * @param {org.antlr.runtime.IntStream} input input stream to match against.
    138      * @param {Number} ttype the undesired token type.
    139      * @returns {Boolean} true if and only if the following token is the
    140      *      unwanted type.
    141      */
    142     mismatchIsUnwantedToken: function(input, ttype) {
    143         return input.LA(2)===ttype;
    144     },
    145 
    146     /**
    147      * Does the stream appear to be missing a single token?
    148      * @param {org.antlr.runtime.IntStream} input input stream to match against.
    149      * @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
    150      *      matched token.
    151      * @returns {Boolean} true if and only if it appears that the stream is
    152      *      missing a single token.
    153      */
    154     mismatchIsMissingToken: function(input, follow) {
    155         if ( !follow ) {
    156             // we have no information about the follow; we can only consume
    157             // a single token and hope for the best
    158             return false;
    159         }
    160         // compute what can follow this grammar element reference
    161         if ( follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) ) {
    162             var viableTokensFollowingThisRule = this.computeContextSensitiveRuleFOLLOW();
    163             follow = follow.or(this.viableTokensFollowingThisRule);
    164             if ( this.state._fsp>=0 ) { // remove EOR if we're not the start symbol
    165                 follow.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
    166             }
    167         }
    168         // if current token is consistent with what could come after set
    169         // then we know we're missing a token; error recovery is free to
    170         // "insert" the missing token
    171 
    172         // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    173         // in follow set to indicate that the fall of the start symbol is
    174         // in the set (EOF can follow).
    175         if ( follow.member(input.LA(1)) ||
    176              follow.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
    177         {
    178             return true;
    179         }
    180         return false;
    181     },
    182 
    183     /** Factor out what to do upon token mismatch so tree parsers can behave
    184      *  differently.  Override and call {@link #mismatchRecover}
    185      *  to get single token insertion and deletion.
    186      *
    187      *  @param {org.antlr.runtime.IntStream} input input stream to match against.
    188      *  @param {Number} ttype  input type to match.
    189      *  @param {org.antlr.runtime.BitSet} [follow] set of tokens that can follow the
    190      *      matched token.
    191      */
    192     mismatch: function(input, ttype, follow) {
    193         if ( this.mismatchIsUnwantedToken(input, ttype) ) {
    194             throw new org.antlr.runtime.UnwantedTokenException(ttype, input);
    195         } else if ( this.mismatchIsMissingToken(input, follow) ) {
    196             throw new org.antlr.runtime.MissingTokenException(ttype, input, null);
    197         }
    198         throw new org.antlr.runtime.MismatchedTokenException(ttype, input);
    199     },
    200 
    201     /** Report a recognition problem.
    202      *
    203      *  <p>This method sets errorRecovery to indicate the parser is recovering
    204      *  not parsing.  Once in recovery mode, no errors are generated.
    205      *  To get out of recovery mode, the parser must successfully match
    206      *  a token (after a resync).  So it will go:</p>
    207      *  <ol>
    208      *      <li>error occurs</li>
    209      *      <li>enter recovery mode, report error</li>
    210      *      <li>consume until token found in resynch set</li>
    211      *      <li>try to resume parsing</li>
    212      *      <li>next match() will reset errorRecovery mode</li>
    213      *  </ol>
    214      *
    215      *  <p>If you override, make sure to update this.state.syntaxErrors if you
    216      *  care about that.</p>
    217      *  @param {org.antlr.runtime.RecognitionException} e the error to be reported.
    218      */
    219     reportError: function(e) {
    220         // if we've already reported an error and have not matched a token
    221         // yet successfully, don't report any errors.
    222         if ( this.state.errorRecovery ) {
    223             return;
    224         }
    225         this.state.syntaxErrors++;
    226         this.state.errorRecovery = true;
    227 
    228         this.displayRecognitionError(this.getTokenNames(), e);
    229     },
    230 
    231     /**
    232      * Assemble recognition error message.
    233      * @param {Array} tokenNames array of token names (strings).
    234      * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
    235      */
    236     displayRecognitionError: function(tokenNames, e) {
    237         var hdr = this.getErrorHeader(e),
    238             msg = this.getErrorMessage(e, tokenNames);
    239         this.emitErrorMessage(hdr+" "+msg);
    240     },
    241 
    242     /**
    243      * Create error header message.  Format is <q>line
    244      * lineNumber:positionInLine</q>.
    245      * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
    246      * @returns {String} The error header.
    247      */
    248     getErrorHeader: function(e) {
    249         /* handle null input */
    250         if (!org.antlr.lang.isNumber(e.line)) {
    251             e.line = 0;
    252         }
    253         return "line "+e.line+":"+e.charPositionInLine;
    254     },
    255 
    256     /**
    257      * Override this method to change where error messages go.
    258      * Defaults to "alert"-ing the error in browsers and "print"-ing the error
    259      * in other environments (e.g. Rhino, SpiderMonkey).
    260      * @param {String} msg the error message to be displayed.
    261      */
    262     emitErrorMessage: function(msg) {
    263         if (typeof(window) != 'undefined' && window.alert) {
    264             alert(msg);
    265         } else {
    266             print(msg);
    267         }
    268     },
    269 
    270     /** What error message should be generated for the various
    271      *  exception types?
    272      *
    273      *  <p>Not very object-oriented code, but I like having all error message
    274      *  generation within one method rather than spread among all of the
    275      *  exception classes. This also makes it much easier for the exception
    276      *  handling because the exception classes do not have to have pointers back
    277      *  to this object to access utility routines and so on. Also, changing
    278      *  the message for an exception type would be difficult because you
    279      *  would have to be subclassing exceptions, but then somehow get ANTLR
    280      *  to make those kinds of exception objects instead of the default.</p>
    281      *
    282      *  <p>For grammar debugging, you will want to override this to add
    283      *  more information such as the stack frame and no viable alts.</p>
    284      *
    285      *  <p>Override this to change the message generated for one or more
    286      *  exception types.</p>
    287      *
    288      * @param {Array} tokenNames array of token names (strings).
    289      * @param {org.antlr.runtime.RecognitionException} e the error to be reported.
    290      * @returns {String} the error message to be emitted.
    291      */
    292     getErrorMessage: function(e, tokenNames) {
    293         var msg = (e && e.getMessage) ? e.getMessage() : null,
    294             mte,
    295             tokenName;
    296         if ( e instanceof org.antlr.runtime.UnwantedTokenException ) {
    297             var ute = e;
    298             tokenName="<unknown>";
    299             if ( ute.expecting== org.antlr.runtime.Token.EOF ) {
    300                 tokenName = "EOF";
    301             } else {
    302                 tokenName = tokenNames[ute.expecting];
    303             }
    304             msg = "extraneous input "+this.getTokenErrorDisplay(ute.getUnexpectedToken())+
    305                 " expecting "+tokenName;
    306         }
    307         else if ( e instanceof org.antlr.runtime.MissingTokenException ) {
    308             mte = e;
    309             tokenName="<unknown>";
    310             if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
    311                 tokenName = "EOF";
    312             } else {
    313                 tokenName = tokenNames[mte.expecting];
    314             }
    315             msg = "missing "+tokenName+" at "+this.getTokenErrorDisplay(e.token);
    316         }
    317         else if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
    318             mte = e;
    319             tokenName="<unknown>";
    320             if ( mte.expecting== org.antlr.runtime.Token.EOF ) {
    321                 tokenName = "EOF";
    322             }
    323             else {
    324                 tokenName = tokenNames[mte.expecting];
    325             }
    326             msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
    327                 " expecting "+tokenName;
    328         }
    329         else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
    330             msg = "no viable alternative at input "+this.getTokenErrorDisplay(e.token);
    331         }
    332         else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
    333             msg = "required (...)+ loop did not match anything at input "+
    334                 this.getTokenErrorDisplay(e.token);
    335         }
    336         else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
    337             msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
    338                 " expecting set "+e.expecting;
    339         }
    340         else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
    341             msg = "mismatched input "+this.getTokenErrorDisplay(e.token)+
    342                 " expecting set "+e.expecting;
    343         }
    344         else if ( e instanceof org.antlr.runtime.FailedPredicateException ) {
    345             msg = "rule "+e.ruleName+" failed predicate: {"+
    346                 e.predicateText+"}?";
    347         }
    348         return msg;
    349     },
    350 
    351     /** <p>Get number of recognition errors (lexer, parser, tree parser).  Each
    352      *  recognizer tracks its own number.  So parser and lexer each have
    353      *  separate count.  Does not count the spurious errors found between
    354      *  an error and next valid token match.</p>
    355      *
    356      *  <p>See also {@link #reportError}()
    357      *  @returns {Number} number of syntax errors encountered
    358      */
    359     getNumberOfSyntaxErrors: function() {
    360         return this.state.syntaxErrors;
    361     },
    362 
    363     /** How should a token be displayed in an error message? The default
    364      *  is to display just the text, but during development you might
    365      *  want to have a lot of information spit out.  Override in that case
    366      *  to use t.toString() (which, for CommonToken, dumps everything about
    367      *  the token).
    368      * @param {org.antlr.runtime.Token} t token that will be displayed in an error message
    369      * @return {String} the string representation of the token
    370      */
    371     getTokenErrorDisplay: function(t) {
    372         var s = t.getText();
    373         if ( !org.antlr.lang.isValue(s) ) {
    374             if ( t.getType()==org.antlr.runtime.Token.EOF ) {
    375                 s = "<EOF>";
    376             }
    377             else {
    378                 s = "<"+t.getType()+">";
    379             }
    380         }
    381         s = s.replace(/\n/g,"\\n");
    382         s = s.replace(/\r/g,"\\r");
    383         s = s.replace(/\t/g,"\\t");
    384         return "'"+s+"'";
    385     },
    386 
    387     /** Recover from an error found on the input stream.  This is
    388      *  for NoViableAlt and mismatched symbol exceptions.  If you enable
    389      *  single token insertion and deletion, this will usually not
    390      *  handle mismatched symbol exceptions but there could be a mismatched
    391      *  token that the match() routine could not recover from.
    392      *  @param {org.antlr.runtime.IntStream} input the intput stream
    393      *  @param {org.antlr.runtime.RecogntionException} the error found on the input stream
    394      */
    395     recover: function(input, re) {
    396         if ( this.state.lastErrorIndex==input.index() ) {
    397             // uh oh, another error at same token index; must be a case
    398             // where LT(1) is in the recovery token set so nothing is
    399             // consumed; consume a single token so at least to prevent
    400             // an infinite loop; this is a failsafe.
    401             input.consume();
    402         }
    403         this.state.lastErrorIndex = input.index();
    404         var followSet = this.computeErrorRecoverySet();
    405         this.beginResync();
    406         this.consumeUntil(input, followSet);
    407         this.endResync();
    408     },
    409 
    410     /** A hook to listen in on the token consumption during error recovery.
    411      */
    412     beginResync: function() {
    413     },
    414 
    415     /** A hook to listen in on the token consumption during error recovery.
    416      */
    417     endResync: function() {
    418     },
    419 
    420     /** Compute the error recovery set for the current rule.
    421      *  <p>During rule invocation, the parser pushes the set of tokens that can
    422      *  follow that rule reference on the stack; this amounts to
    423      *  computing FIRST of what follows the rule reference in the
    424      *  enclosing rule. This local follow set only includes tokens
    425      *  from within the rule; i.e., the FIRST computation done by
    426      *  ANTLR stops at the end of a rule.</p>
    427      *
    428      *  <p>EXAMPLE</p>
    429      *
    430      *  <p>When you find a "no viable alt exception", the input is not
    431      *  consistent with any of the alternatives for rule r.  The best
    432      *  thing to do is to consume tokens until you see something that
    433      *  can legally follow a call to r *or* any rule that called r.
    434      *  You don't want the exact set of viable next tokens because the
    435      *  input might just be missing a token--you might consume the
    436      *  rest of the input looking for one of the missing tokens.</p>
    437      *
    438      *  <p>Consider grammar:</p>
    439      *  <code><pre>
    440      *  a : '[' b ']'
    441      *    | '(' b ')'
    442      *    ;
    443      *  b : c '^' INT ;
    444      *  c : ID
    445      *    | INT
    446      *    ;
    447      *  </pre></code>
    448      *
    449      *  <p>At each rule invocation, the set of tokens that could follow
    450      *  that rule is pushed on a stack.  Here are the various "local"
    451      *  follow sets:</p>
    452      *
    453      *  <code><pre>
    454      *  FOLLOW(b1_in_a) = FIRST(']') = ']'
    455      *  FOLLOW(b2_in_a) = FIRST(')') = ')'
    456      *  FOLLOW(c_in_b) = FIRST('^') = '^'
    457      *  </pre></code>
    458      *
    459      *  <p>Upon erroneous input "[]", the call chain is</p>
    460      *
    461      *  <code>a -> b -> c</code>
    462      *
    463      *  <p>and, hence, the follow context stack is:</p>
    464      *
    465      *  <code><pre>
    466      *  depth  local follow set     after call to rule
    467      *    0         <EOF>                    a (from main())
    468      *    1          ']'                     b
    469      *    3          '^'                     c
    470      *  </pre></code>
    471      *
    472      *  <p>Notice that ')' is not included, because b would have to have
    473      *  been called from a different context in rule a for ')' to be
    474      *  included.</p>
    475      *
    476      *  <p>For error recovery, we cannot consider FOLLOW(c)
    477      *  (context-sensitive or otherwise).  We need the combined set of
    478      *  all context-sensitive FOLLOW sets--the set of all tokens that
    479      *  could follow any reference in the call chain.  We need to
    480      *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
    481      *  we resync'd to that token, we'd consume until EOF.  We need to
    482      *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
    483      *  In this case, for input "[]", LA(1) is in this set so we would
    484      *  not consume anything and after printing an error rule c would
    485      *  return normally.  It would not find the required '^' though.
    486      *  At this point, it gets a mismatched token error and throws an
    487      *  exception (since LA(1) is not in the viable following token
    488      *  set).  The rule exception handler tries to recover, but finds
    489      *  the same recovery set and doesn't consume anything.  Rule b
    490      *  exits normally returning to rule a.  Now it finds the ']' (and
    491      *  with the successful match exits errorRecovery mode).</p>
    492      *
    493      *  <p>So, you cna see that the parser walks up call chain looking
    494      *  for the token that was a member of the recovery set.</p>
    495      *
    496      *  <p>Errors are not generated in errorRecovery mode.</p>
    497      *
    498      *  <p>ANTLR's error recovery mechanism is based upon original ideas:</p>
    499      *
    500      *  <p>"Algorithms + Data Structures = Programs" by Niklaus Wirth</p>
    501      *
    502      *  <p>and</p>
    503      *
    504      *  <p>"A note on error recovery in recursive descent parsers":
    505      *  http://portal.acm.org/citation.cfm?id=947902.947905</p>
    506      *
    507      *  <p>Later, Josef Grosch had some good ideas:</p>
    508      *
    509      *  <p>"Efficient and Comfortable Error Recovery in Recursive Descent
    510      *  Parsers":
    511      *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip</p>
    512      *
    513      *  <p>Like Grosch I implemented local FOLLOW sets that are combined
    514      *  at run-time upon error to avoid overhead during parsing.</p>
    515      *  @returns {org.antlr.runtime.BitSet}
    516      */
    517     computeErrorRecoverySet: function() {
    518         return this.combineFollows(false);
    519     },
    520 
    521 
    522     /** Compute the context-sensitive FOLLOW set for current rule.
    523      *  <p>This is set of token types that can follow a specific rule
    524      *  reference given a specific call chain.  You get the set of
    525      *  viable tokens that can possibly come next (lookahead depth 1)
    526      *  given the current call chain.  Contrast this with the
    527      *  definition of plain FOLLOW for rule r:</p>
    528      *
    529      *   <code>FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}</code>
    530      *
    531      *  <p>where x in T* and alpha, beta in V*; T is set of terminals and
    532      *  V is the set of terminals and nonterminals.  In other words,
    533      *  FOLLOW(r) is the set of all tokens that can possibly follow
    534      *  references to r in *any* sentential form (context).  At
    535      *  runtime, however, we know precisely which context applies as
    536      *  we have the call chain.  We may compute the exact (rather
    537      *  than covering superset) set of following tokens.</p>
    538      *
    539      *  <p>For example, consider grammar:</p>
    540      *
    541      *  <code><pre>
    542      *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
    543      *       | "return" expr '.'
    544      *       ;
    545      *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
    546      *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
    547      *       | '(' expr ')'
    548      *       ;
    549      *  </pre></code>
    550      *
    551      *  <p>The FOLLOW sets are all inclusive whereas context-sensitive
    552      *  FOLLOW sets are precisely what could follow a rule reference.
    553      *  For input input "i=(3);", here is the derivation:</p>
    554      *
    555      *  <code><pre>
    556      *  stat => ID '=' expr ';'
    557      *       => ID '=' atom ('+' atom)* ';'
    558      *       => ID '=' '(' expr ')' ('+' atom)* ';'
    559      *       => ID '=' '(' atom ')' ('+' atom)* ';'
    560      *       => ID '=' '(' INT ')' ('+' atom)* ';'
    561      *       => ID '=' '(' INT ')' ';'
    562      *  </pre></code>
    563      *
    564      *  <p>At the "3" token, you'd have a call chain of</p>
    565      *
    566      *  <code>  stat -> expr -> atom -> expr -> atom</code>
    567      *
    568      *  <p>What can follow that specific nested ref to atom?  Exactly ')'
    569      *  as you can see by looking at the derivation of this specific
    570      *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.</p>
    571      *
    572      *  <p>You want the exact viable token set when recovering from a
    573      *  token mismatch.  Upon token mismatch, if LA(1) is member of
    574      *  the viable next token set, then you know there is most likely
    575      *  a missing token in the input stream.  "Insert" one by just not
    576      *  throwing an exception.</p>
    577      *  @returns {org.antlr.runtime.BitSet}
    578      */
    579     computeContextSensitiveRuleFOLLOW: function() {
    580         return this.combineFollows(true);
    581     },
    582 
    583     /**
    584      * Helper method for {@link #computeErrorRecoverySet} and
    585      * {@link computeContextSensitiveRuleFOLLO}.
    586      * @param {Boolean} exact
    587      * @returns {org.antlr.runtime.BitSet}
    588      */
    589     combineFollows: function(exact) {
    590         var top = this.state._fsp,
    591             i,
    592             localFollowSet,
    593             followSet = new org.antlr.runtime.BitSet();
    594         for (i=top; i>=0; i--) {
    595             localFollowSet = this.state.following[i];
    596             followSet.orInPlace(localFollowSet);
    597             if ( exact ) {
    598                 // can we see end of rule?
    599                 if ( localFollowSet.member(org.antlr.runtime.Token.EOR_TOKEN_TYPE) )
    600                 {
    601                     // Only leave EOR in set if at top (start rule); this lets
    602                     // us know if have to include follow(start rule); i.e., EOF
    603                     if ( i>0 ) {
    604                         followSet.remove(org.antlr.runtime.Token.EOR_TOKEN_TYPE);
    605                     }
    606                 }
    607                 else { // can't see end of rule, quit
    608                     break;
    609                 }
    610             }
    611         }
    612         return followSet;
    613     },
    614 
    615     /** Attempt to recover from a single missing or extra token.
    616      *
    617      *  <p>EXTRA TOKEN</p>
    618      *
    619      *  <p>LA(1) is not what we are looking for.  If LA(2) has the right token,
    620      *  however, then assume LA(1) is some extra spurious token.  Delete it
    621      *  and LA(2) as if we were doing a normal match(), which advances the
    622      *  input.</p>
    623      *
    624      *  <p>MISSING TOKEN</p>
    625      *
    626      *  <p>If current token is consistent with what could come after
    627      *  ttype then it is ok to "insert" the missing token, else throw
    628      *  exception For example, Input "i=(3;" is clearly missing the
    629      *  ')'.  When the parser returns from the nested call to expr, it
    630      *  will have call chain:</p>
    631      *
    632      *  <pre><code>  stat -> expr -> atom</code></pre>
    633      *
    634      *  <p>and it will be trying to match the ')' at this point in the
    635      *  derivation:</p>
    636      *
    637      *  <pre><code>     => ID '=' '(' INT ')' ('+' atom)* ';'</code></pre>
    638      *                          ^
    639      *  <p>match() will see that ';' doesn't match ')' and report a
    640      *  mismatched token error.  To recover, it sees that LA(1)==';'
    641      *  is in the set of tokens that can follow the ')' token
    642      *  reference in rule atom.  It can assume that you forgot the ')'.</p>
    643      *
    644      *  @param {org.antlr.runtime.IntStream} input
    645      *  @param {Number} ttype
    646      *  @param {org.antlr.runtime.BitSet} follow
    647      *  @returns {Object}
    648      */
    649     recoverFromMismatchedToken: function(input,
    650                                          ttype,
    651                                          follow)
    652     {
    653         var e = null;
    654         // if next token is what we are looking for then "delete" this token
    655         if ( this.mismatchIsUnwantedToken(input, ttype) ) {
    656             e = new org.antlr.runtime.UnwantedTokenException(ttype, input);
    657             this.beginResync();
    658             input.consume(); // simply delete extra token
    659             this.endResync();
    660             this.reportError(e);  // report after consuming so AW sees the token in the exception
    661             // we want to return the token we're actually matching
    662             var matchedSymbol = this.getCurrentInputSymbol(input);
    663             input.consume(); // move past ttype token as if all were ok
    664             return matchedSymbol;
    665         }
    666         // can't recover with single token deletion, try insertion
    667         if ( this.mismatchIsMissingToken(input, follow) ) {
    668             var inserted = this.getMissingSymbol(input, e, ttype, follow);
    669             e = new org.antlr.runtime.MissingTokenException(ttype, input, inserted);
    670             this.reportError(e);  // report after inserting so AW sees the token in the exception
    671             return inserted;
    672         }
    673         // even that didn't work; must throw the exception
    674         e = new org.antlr.runtime.MismatchedTokenException(ttype, input);
    675         throw e;
    676     },
    677 
    678     /**
    679      * Recover from a mismatched set exception.
    680      * @param {org.antlr.runtime.IntStream} input
    681      * @param {org.antlr.runtime.RecognitionException} e
    682      * @param {org.antlr.runtime.BitSet} follow
    683      * @returns {Object}
    684      */
    685     recoverFromMismatchedSet: function(input,
    686                                        e,
    687                                        follow)
    688     {
    689         if ( this.mismatchIsMissingToken(input, follow) ) {
    690             // System.out.println("missing token");
    691             this.reportError(e);
    692             // we don't know how to conjure up a token for sets yet
    693             return this.getMissingSymbol(input, e, org.antlr.runtime.Token.INVALID_TOKEN_TYPE, follow);
    694         }
    695         throw e;
    696     },
    697 
    698     /** Match needs to return the current input symbol, which gets put
    699      *  into the label for the associated token ref; e.g., x=ID.  Token
    700      *  and tree parsers need to return different objects. Rather than test
    701      *  for input stream type or change the IntStream interface, I use
    702      *  a simple method to ask the recognizer to tell me what the current
    703      *  input symbol is.
    704      *
    705      *  <p>This is ignored for lexers.</p>
    706      *  @param {org.antlr.runtime.IntStream} input
    707      *  @returns {Object}
    708      */
    709     getCurrentInputSymbol: function(input) { return null; },
    710 
    711     /** Conjure up a missing token during error recovery.
    712      *
    713      *  <p>The recognizer attempts to recover from single missing
    714      *  symbols. But, actions might refer to that missing symbol.
    715      *  For example, x=ID {f($x);}. The action clearly assumes
    716      *  that there has been an identifier matched previously and that
    717      *  $x points at that token. If that token is missing, but
    718      *  the next token in the stream is what we want we assume that
    719      *  this token is missing and we keep going. Because we
    720      *  have to return some token to replace the missing token,
    721      *  we have to conjure one up. This method gives the user control
    722      *  over the tokens returned for missing tokens. Mostly,
    723      *  you will want to create something special for identifier
    724      *  tokens. For literals such as '{' and ',', the default
    725      *  action in the parser or tree parser works. It simply creates
    726      *  a CommonToken of the appropriate type. The text will be the token.
    727      *  If you change what tokens must be created by the lexer,
    728      *  override this method to create the appropriate tokens.</p>
    729      *
    730      *  @param {org.antlr.runtime.IntStream} input
    731      *  @param {org.antlr.runtime.RecognitionException} e
    732      *  @param {Number} expectedTokenType
    733      *  @param {org.antlr.runtime.BitSet} follow
    734      *  @returns {Object}
    735      */
    736     getMissingSymbol: function(input,
    737                                e,
    738                                expectedTokenType,
    739                                follow)
    740     {
    741         return null;
    742     },
    743 
    744 
    745     /**
    746      * Consume tokens until one matches the given token or token set
    747      * @param {org.antlr.runtime.IntStream} input
    748      * @param {Number|org.antlr.runtime.BitSet} set
    749      */
    750     consumeUntil: function(input, set) {
    751         var ttype = input.LA(1);
    752         while (ttype != org.antlr.runtime.Token.EOF && !set.member(ttype) ) {
    753             input.consume();
    754             ttype = input.LA(1);
    755         }
    756     },
    757 
    758     /**
    759      * Push a rule's follow set using our own hardcoded stack.
    760      * @param {org.antlr.runtime.BitSet} fset
    761      */
    762     pushFollow: function(fset) {
    763         if ( (this.state._fsp +1)>=this.state.following.length ) {
    764             var f = [];
    765             var i;
    766             for (i=this.state.following.length-1; i>=0; i--) {
    767                 f[i] = this.state.following[i];
    768             }
    769             this.state.following = f;
    770         }
    771         this.state._fsp++;
    772         this.state.following[this.state._fsp] = fset;
    773     },
    774 
    775     /**
    776      * Sadly JavaScript doesn't provide a robust mechanism for runtime stack reflection.
    777      * This makes implementing this function impossible without maintaining an auxillary
    778      * stack data structure, which would be crazy expensive, especially in Lexers.  As such,
    779      * this method remains unimplemented.
    780      * @deprecated
    781      */
    782     getRuleInvocationStack: function(e, recognizerClassName)
    783     {
    784         throw new Error("Not implemented.");
    785     },
    786 
    787     /**
    788      * Get this recognizer's backtracking level.
    789      * @returns {Number} backtracking level
    790      */
    791     getBacktrackingLevel: function() {
    792         return this.state.backtracking;
    793     },
    794 
    795     /** Used to print out token names like ID during debugging and
    796      *  error reporting.  The generated parsers implement a method
    797      *  that overrides this to point to their String[] tokenNames.
    798      *  @returns {Array} String array of token names.
    799      */
    800     getTokenNames: function() {
    801         return null;
    802     },
    803 
    804     /** For debugging and other purposes, might want the grammar name.
    805      *  Have ANTLR generate an implementation for this method.
    806      *  @returns {String} the grammar name.
    807      */
    808     getGrammarFileName: function() {
    809         return null;
    810     },
    811 
    812     /** A convenience method for use most often with template rewrites.
    813      *  Convert an array of Tokens to an array of Strings.
    814      *  @param {Array} array of org.antlr.runtime.Token objects.
    815      *  @returns {Array} array of string representations of the argument.
    816      */
    817     toStrings: function(tokens) {
    818         if ( !tokens ) {
    819             return null;
    820         }
    821         var strings = [];
    822         var i;
    823         for (i=0; i<tokens.length; i++) {
    824             strings.push(tokens[i].getText());
    825         }
    826         return strings;
    827     },
    828 
    829     /** Given a rule number and a start token index number, return
    830      *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
    831      *  start index.  If this rule has parsed input starting from the
    832      *  start index before, then return where the rule stopped parsing.
    833      *  It returns the index of the last token matched by the rule.
    834      *
    835      *  <p>For now we use a hashtable and just the slow Object-based one.
    836      *  Later, we can make a special one for ints and also one that
    837      *  tosses out data after we commit past input position i.</p>
    838      *  @param {Number} ruleIndex
    839      *  @param {Number} ruleStartIndex
    840      *  @returns {Number}
    841      */
    842     getRuleMemoization: function(ruleIndex, ruleStartIndex) {
    843         if ( !this.state.ruleMemo[ruleIndex] ) {
    844             this.state.ruleMemo[ruleIndex] = {};
    845         }
    846         var stopIndexI =
    847             this.state.ruleMemo[ruleIndex][ruleStartIndex];
    848         if ( !org.antlr.lang.isNumber(stopIndexI) ) {
    849             return org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN;
    850         }
    851         return stopIndexI;
    852     },
    853 
    854     /** Has this rule already parsed input at the current index in the
    855      *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
    856      *  If we attempted but failed to parse properly before, return
    857      *  MEMO_RULE_FAILED.
    858      *
    859      *  <p>This method has a side-effect: if we have seen this input for
    860      *  this rule and successfully parsed before, then seek ahead to
    861      *  1 past the stop token matched for this rule last time.</p>
    862      *  @param {org.antlr.runtime.IntStream} input
    863      *  @param {Number} ruleIndex
    864      *  @returns {Boolean}
    865      */
    866     alreadyParsedRule: function(input, ruleIndex) {
    867         var stopIndex = this.getRuleMemoization(ruleIndex, input.index());
    868         if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_UNKNOWN ) {
    869             return false;
    870         }
    871         if ( stopIndex==org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED ) {
    872             //System.out.println("rule "+ruleIndex+" will never succeed");
    873             this.state.failed=true;
    874         }
    875         else {
    876             input.seek(stopIndex+1); // jump to one past stop token
    877         }
    878         return true;
    879     },
    880 
    881     /** Record whether or not this rule parsed the input at this position
    882      *  successfully.  Use a standard java hashtable for now.
    883      *  @param {org.antlr.runtime.IntStream} input
    884      *  @param {Number} ruleIndex
    885      *  @param {Number} ruleStartIndex
    886      */
    887     memoize: function(input,
    888                       ruleIndex,
    889                       ruleStartIndex)
    890     {
    891         var stopTokenIndex = this.state.failed ?
    892             org.antlr.runtime.BaseRecognizer.MEMO_RULE_FAILED : input.index()-1;
    893         if ( !org.antlr.lang.isValue(this.state.ruleMemo) ) {
    894             throw new Error("!!!!!!!!! memo array is null for "+ this.getGrammarFileName());
    895         }
    896         if ( ruleIndex >= this.state.ruleMemo.length ) {
    897             throw new Error("!!!!!!!!! memo size is "+this.state.ruleMemo.length+", but rule index is "+ruleIndex);
    898         }
    899         if ( org.antlr.lang.isValue(this.state.ruleMemo[ruleIndex]) ) {
    900             this.state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
    901         }
    902     },
    903 
    904     /** return how many rule/input-index pairs there are in total.
    905      *  TODO: this includes synpreds.
    906      *  @returns {Number}
    907      */
    908     getRuleMemoizationCacheSize: function() {
    909         var n = 0, i;
    910         for (i = 0; this.state.ruleMemo && i < this.state.ruleMemo.length; i++) {
    911             var ruleMap = this.state.ruleMemo[i];
    912             if ( ruleMap ) {
    913                 // @todo need to get size of rulemap?
    914                 n += ruleMap.length; // how many input indexes are recorded?
    915             }
    916         }
    917         return n;
    918     },
    919 
    920     /**
    921      * When a grammar is compiled with the tracing flag enabled, this method is invoked
    922      * at the start of each rule.
    923      * @param {String} ruleName the current ruleName
    924      * @param {Number} ruleIndex
    925      * @param {Object} inputSymbol
    926      */
    927     traceIn: function(ruleName, ruleIndex, inputSymbol)  {
    928         this.emitErrorMessage("enter "+ruleName+" "+inputSymbol);
    929         if ( this.state.failed ) {
    930             this.emitErrorMessage(" failed="+this.failed);
    931         }
    932         if ( this.state.backtracking>0 ) {
    933             this.emitErrorMessage(" backtracking="+this.state.backtracking);
    934         }
    935         // System.out.println();
    936     },
    937 
    938     /**
    939      * When a grammar is compiled with the tracing flag enabled, this method is invoked
    940      * at the end of each rule.
    941      * @param {String} ruleName the current ruleName
    942      * @param {Number} ruleIndex
    943      * @param {Object} inputSymbol
    944      */
    945     traceOut: function(ruleName, ruleIndex, inputSymbol) {
    946         this.emitErrorMessage("exit "+ruleName+" "+inputSymbol);
    947         if ( this.state.failed ) {
    948             this.emitErrorMessage(" failed="+this.state.failed);
    949         }
    950         if ( this.state.backtracking>0 ) {
    951             this.emitErrorMessage(" backtracking="+this.state.backtracking);
    952         }
    953     }
    954 };
    955