Home | History | Annotate | Download | only in include
      1 /** \file
      2  * While the C runtime does not need to model the state of
      3  * multiple lexers and parsers in the same way as the Java runtime does
      4  * it is no overhead to reflect that model. In fact the
      5  * C runtime has always been able to share recognizer state.
      6  *
      7  * This 'class' therefore defines all the elements of a recognizer
      8  * (either lexer, parser or tree parser) that are need to
      9  * track the current recognition state. Multiple recognizers
     10  * may then share this state, for instance when one grammar
     11  * imports another.
     12  */
     13 
     14 #ifndef	_ANTLR3_RECOGNIZER_SHARED_STATE_H
     15 #define	_ANTLR3_RECOGNIZER_SHARED_STATE_H
     16 
     17 // [The "BSD licence"]
     18 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
     19 // http://www.temporal-wave.com
     20 // http://www.linkedin.com/in/jimidle
     21 //
     22 // All rights reserved.
     23 //
     24 // Redistribution and use in source and binary forms, with or without
     25 // modification, are permitted provided that the following conditions
     26 // are met:
     27 // 1. Redistributions of source code must retain the above copyright
     28 //    notice, this list of conditions and the following disclaimer.
     29 // 2. Redistributions in binary form must reproduce the above copyright
     30 //    notice, this list of conditions and the following disclaimer in the
     31 //    documentation and/or other materials provided with the distribution.
     32 // 3. The name of the author may not be used to endorse or promote products
     33 //    derived from this software without specific prior written permission.
     34 //
     35 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     36 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     37 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     38 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     39 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     40 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     41 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     42 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     43 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     44 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     45 
     46 #include    <antlr3defs.h>
     47 
     48 #ifdef __cplusplus
     49 extern "C" {
     50 #endif
     51 
     52 /** All the data elements required to track the current state
     53  *  of any recognizer (lexer, parser, tree parser).
     54  * May be share between multiple recognizers such that
     55  * grammar inheritance is easily supported.
     56  */
     57 typedef	struct ANTLR3_RECOGNIZER_SHARED_STATE_struct
     58 {
     59     /** If set to ANTLR3_TRUE then the recognizer has an exception
     60      * condition (this is tested by the generated code for the rules of
     61      * the grammar).
     62      */
     63     ANTLR3_BOOLEAN	    error;
     64 
     65     /** Points to the first in a possible chain of exceptions that the
     66      *  recognizer has discovered.
     67      */
     68     pANTLR3_EXCEPTION	    exception;
     69 
     70     /** Track around a hint from the creator of the recognizer as to how big this
     71      *  thing is going to get, as the actress said to the bishop. This allows us
     72      *  to tune hash tables accordingly. This might not be the best place for this
     73      *  in the end but we will see.
     74      */
     75     ANTLR3_UINT32	sizeHint;
     76 
     77     /** Track the set of token types that can follow any rule invocation.
     78      *  Stack structure, to support: List<BitSet>.
     79      */
     80     pANTLR3_STACK	following;
     81 
     82 
     83     /** This is true when we see an error and before having successfully
     84      *  matched a token.  Prevents generation of more than one error message
     85      *  per error.
     86      */
     87     ANTLR3_BOOLEAN	errorRecovery;
     88 
     89     /** The index into the input stream where the last error occurred.
     90      * 	This is used to prevent infinite loops where an error is found
     91      *  but no token is consumed during recovery...another error is found,
     92      *  ad nauseam.  This is a failsafe mechanism to guarantee that at least
     93      *  one token/tree node is consumed for two errors.
     94      */
     95     ANTLR3_MARKER	lastErrorIndex;
     96 
     97     /** In lieu of a return value, this indicates that a rule or token
     98      *  has failed to match.  Reset to false upon valid token match.
     99      */
    100     ANTLR3_BOOLEAN	failed;
    101 
    102     /** When the recognizer terminates, the error handling functions
    103      *  will have incremented this value if any error occurred (that was displayed). It can then be
    104      *  used by the grammar programmer without having to use static globals.
    105      */
    106     ANTLR3_UINT32	errorCount;
    107 
    108     /** If 0, no backtracking is going on.  Safe to exec actions etc...
    109      *  If >0 then it's the level of backtracking.
    110      */
    111     ANTLR3_INT32	backtracking;
    112 
    113     /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
    114      *  Tracks  the stop token index for each rule.  ruleMemo[ruleIndex] is
    115      *  the memoization table for ruleIndex.  For key ruleStartIndex, you
    116      *  get back the stop token for associated rule or MEMO_RULE_FAILED.
    117      *
    118      *  This is only used if rule memoization is on.
    119      */
    120     pANTLR3_INT_TRIE	ruleMemo;
    121 
    122     /** Pointer to an array of token names
    123      *  that are generally useful in error reporting. The generated parsers install
    124      *  this pointer. The table it points to is statically allocated as 8 bit ascii
    125      *  at parser compile time - grammar token names are thus restricted in character
    126      *  sets, which does not seem to terrible.
    127      */
    128     pANTLR3_UINT8	* tokenNames;
    129 
    130     /** User programmable pointer that can be used for instance as a place to
    131      *  store some tracking structure specific to the grammar that would not normally
    132      *  be available to the error handling functions.
    133      */
    134     void		* userp;
    135 
    136 	    /** The goal of all lexer rules/methods is to create a token object.
    137      *  This is an instance variable as multiple rules may collaborate to
    138      *  create a single token.  For example, NUM : INT | FLOAT ;
    139      *  In this case, you want the INT or FLOAT rule to set token and not
    140      *  have it reset to a NUM token in rule NUM.
    141      */
    142     pANTLR3_COMMON_TOKEN	token;
    143 
    144     /** The goal of all lexer rules being to create a token, then a lexer
    145      *  needs to build a token factory to create them.
    146      */
    147     pANTLR3_TOKEN_FACTORY	tokFactory;
    148 
    149     /** A lexer is a source of tokens, produced by all the generated (or
    150      *  hand crafted if you like) matching rules. As such it needs to provide
    151      *  a token source interface implementation.
    152      */
    153     pANTLR3_TOKEN_SOURCE	tokSource;
    154 
    155     /** The channel number for the current token
    156      */
    157     ANTLR3_UINT32		channel;
    158 
    159     /** The token type for the current token
    160      */
    161     ANTLR3_UINT32		type;
    162 
    163     /** The input line (where it makes sense) on which the first character of the current
    164      *  token resides.
    165      */
    166     ANTLR3_INT32		tokenStartLine;
    167 
    168     /** The character position of the first character of the current token
    169      *  within the line specified by tokenStartLine
    170      */
    171     ANTLR3_INT32		tokenStartCharPositionInLine;
    172 
    173     /** What character index in the stream did the current token start at?
    174      *  Needed, for example, to get the text for current token.  Set at
    175      *  the start of nextToken.
    176      */
    177     ANTLR3_MARKER		tokenStartCharIndex;
    178 
    179     /** Text for the current token. This can be overridden by setting this
    180      *  variable directly or by using the SETTEXT() macro (preferred) in your
    181      *  lexer rules.
    182      */
    183     pANTLR3_STRING		text;
    184 
    185 	/** User controlled variables that will be installed in a newly created
    186 	 * token.
    187 	 */
    188 	ANTLR3_UINT32		user1, user2, user3;
    189 	void				* custom;
    190 
    191     /** Input stream stack, which allows the C programmer to switch input streams
    192      *  easily and allow the standard nextToken() implementation to deal with it
    193      *  as this is a common requirement.
    194      */
    195     pANTLR3_STACK		streams;
    196 
    197 	/// A stack of token/tree rewrite streams that are available for use
    198 	/// by a parser or tree parser that is using rewrites to generate
    199 	/// an AST. This saves each rule in the recongizer from having to
    200 	/// allocate and deallocate rewtire streams on entry and exit. As
    201 	/// the parser recurses throgh the rules it will reach a steady state
    202 	/// of the maximum number of allocated streams, which instead of
    203 	/// deallocating them at rule exit, it will place on this stack for
    204 	/// reuse. The streams are then all finally freed when this stack
    205 	/// is freed.
    206 	///
    207 	pANTLR3_VECTOR		rStreams;
    208 
    209 }
    210 	ANTLR3_RECOGNIZER_SHARED_STATE;
    211 
    212 #ifdef __cplusplus
    213 }
    214 #endif
    215 
    216 #endif
    217 
    218 
    219