antlr/runtime/Lexer.js

/** A lexer is recognizer that draws input symbols from a character stream.
 *  lexer grammars result in a subclass of this object. A Lexer object
 *  uses simplified match() and error recovery mechanisms in the interest
 *  of speed.
 */
org.antlr.runtime.Lexer = function(input, state) {
    if (state) {
        org.antlr.runtime.Lexer.superclass.constructor.call(this, state);
    }
    if (input) {
        this.input = input;
    }
};

org.antlr.lang.extend(org.antlr.runtime.Lexer, org.antlr.runtime.BaseRecognizer, {
    reset: function() {
        // reset all recognizer state variables
        org.antlr.runtime.Lexer.superclass.reset.call(this);
        if ( org.antlr.lang.isValue(this.input) ) {
            this.input.seek(0); // rewind the input
        }
        if ( !org.antlr.lang.isValue(this.state) ) {
            return; // no shared state work to do
        }
        this.state.token = null;
        this.state.type = org.antlr.runtime.Token.INVALID_TOKEN_TYPE;
        this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
        this.state.tokenStartCharIndex = -1;
        this.state.tokenStartCharPositionInLine = -1;
        this.state.tokenStartLine = -1;
        this.state.text = null;
    },

    /** Return a token from this source; i.e., match a token on the char
     *  stream.
     */
    nextToken: function() {
        while (true) {
            this.state.token = null;
            this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
            this.state.tokenStartCharIndex = this.input.index();
            this.state.tokenStartCharPositionInLine = this.input.getCharPositionInLine();
            this.state.tokenStartLine = this.input.getLine();
            this.state.text = null;
            if ( this.input.LA(1)===org.antlr.runtime.CharStream.EOF ) {
                return org.antlr.runtime.Token.EOF_TOKEN;
            }
            try {
                this.mTokens();
                if ( !org.antlr.lang.isValue(this.state.token) ) {
                    this.emit();
                }
                else if ( this.state.token==org.antlr.runtime.Token.SKIP_TOKEN ) {
                    continue;
                }
                return this.state.token;
            }
            catch (re) {
                if (re instanceof org.antlr.runtime.NoViableAltException) {
                    this.reportError(re);
                    this.recover(re);
                } else if ( re instanceof org.antlr.runtime.RecognitionException ) {
                    this.reportError(re);
                } else {
                    throw re;
                }
            }
        }
    },

    /** Instruct the lexer to skip creating a token for current lexer rule
     *  and look for another token.  nextToken() knows to keep looking when
     *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
     *  if token==null at end of any token rule, it creates one for you
     *  and emits it.
     */
    skip: function() {
        this.state.token = org.antlr.runtime.Token.SKIP_TOKEN;
    },

    /** Set the char stream and reset the lexer */
    setCharStream: function(input) {
        this.input = null;
        this.reset();
        this.input = input;
    },

    getCharStream: function() {
        return this.input;
    },

    getSourceName: function() {
        return this.input.getSourceName();
    },

    /** Currently does not support multiple emits per nextToken invocation
     *  for efficiency reasons.  Subclass and override this method and
     *  nextToken (to push tokens into a list and pull from that list rather
     *  than a single variable as this implementation does).
     *
     *  The standard method called to automatically emit a token at the
     *  outermost lexical rule.  The token object should point into the
     *  char buffer start..stop.  If there is a text override in 'text',
     *  use that to set the token's text.  Override this method to emit
     *  custom Token objects.
     *
     *  If you are building trees, then you should also override
     *  Parser or TreeParser.getMissingSymbol().
     */
    emit: function() {
        if (arguments.length===0) {
            var t = new org.antlr.runtime.CommonToken(this.input, this.state.type, this.state.channel, this.state.tokenStartCharIndex, this.getCharIndex()-1);
            t.setLine(this.state.tokenStartLine);
            t.setText(this.state.text);
            t.setCharPositionInLine(this.state.tokenStartCharPositionInLine);
            this.state.token = t;
            return t;
        } else {
            this.state.token = arguments[0];
        }
    },

    match: function(s) {
        var i = 0,
            mte;

        if (org.antlr.lang.isString(s)) {
            while ( i<s.length ) {
                if ( this.input.LA(1)!=s.charAt(i) ) {
                    if ( this.state.backtracking>0 ) {
                        this.state.failed = true;
                        return;
                    }
                    mte = new org.antlr.runtime.MismatchedTokenException(s.charAt(i), this.input);
                    this.recover(mte);
                    throw mte;
                }
                i++;
                this.input.consume();
                this.state.failed = false;
            }
        } else if (org.antlr.lang.isNumber(s)) {
            if ( this.input.LA(1)!=s ) {
                if ( this.state.backtracking>0 ) {
                    this.state.failed = true;
                    return;
                }
                mte = new org.antlr.runtime.MismatchedTokenException(s, this.input);
                this.recover(mte);
                throw mte;
            }
            this.input.consume();
            this.state.failed = false;
        }
    },

    matchAny: function() {
        this.input.consume();
    },

    matchRange: function(a, b) {
        if ( this.input.LA(1)<a || this.input.LA(1)>b ) {
            if ( this.state.backtracking>0 ) {
                this.state.failed = true;
                return;
            }
            var mre = new org.antlr.runtime.MismatchedRangeException(a,b,this.input);
            this.recover(mre);
            throw mre;
        }
        this.input.consume();
        this.state.failed = false;
    },

    getLine: function() {
        return this.input.getLine();
    },

    getCharPositionInLine: function() {
        return this.input.getCharPositionInLine();
    },

    /** What is the index of the current character of lookahead? */
    getCharIndex: function() {
        return this.input.index();
    },

    /** Return the text matched so far for the current token or any
     *  text override.
     */
    getText: function() {
        if ( org.antlr.lang.isString(this.state.text) ) {
            return this.state.text;
        }
        return this.input.substring(this.state.tokenStartCharIndex,this.getCharIndex()-1);
    },

    /** Set the complete text of this token; it wipes any previous
     *  changes to the text.
     */
    setText: function(text) {
        this.state.text = text;
    },

    reportError: function(e) {
        /** TODO: not thought about recovery in lexer yet.
         *
        // if we've already reported an error and have not matched a token
        // yet successfully, don't report any errors.
        if ( errorRecovery ) {
            //System.err.print("[SPURIOUS] ");
            return;
        }
        errorRecovery = true;
         */

        this.displayRecognitionError(this.getTokenNames(), e);
    },

    getErrorMessage: function(e, tokenNames) {
        var msg = null;
        if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting "+this.getCharErrorDisplay(e.expecting);
        }
        else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
            msg = "no viable alternative at character "+this.getCharErrorDisplay(e.c);
        }
        else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
            msg = "required (...)+ loop did not match anything at character "+this.getCharErrorDisplay(e.c);
        }
        else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
        }
        else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
        }
        else if ( e instanceof org.antlr.runtime.MismatchedRangeException ) {
            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+
                this.getCharErrorDisplay(e.a)+".."+this.getCharErrorDisplay(e.b);
        }
        else {
            msg = org.antlr.runtime.Lexer.superclass.getErrorMessage.call(this, e, tokenNames);
        }
        return msg;
    },

    getCharErrorDisplay: function(c) {
        var s = c; //String.fromCharCode(c);
        switch ( s ) {
            case org.antlr.runtime.Token.EOF :
                s = "<EOF>";
                break;
            case "\n" :
                s = "\\n";
                break;
            case "\t" :
                s = "\\t";
                break;
            case "\r" :
                s = "\\r";
                break;
        }
        return "'"+s+"'";
    },

    /** Lexers can normally match any char in it's vocabulary after matching
     *  a token, so do the easy thing and just kill a character and hope
     *  it all works out.  You can instead use the rule invocation stack
     *  to do sophisticated error recovery if you are in a fragment rule.
     */
    recover: function(re) {
        this.input.consume();
    },

    traceIn: function(ruleName, ruleIndex)  {
        var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
        org.antlr.runtime.Lexer.superclass.traceIn.call(this, ruleName, ruleIndex, inputSymbol);
    },

    traceOut: function(ruleName, ruleIndex)  {
		var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
		org.antlr.runtime.Lexer.superclass.traceOut.call(this, ruleName, ruleIndex, inputSymbol);
	}
});