Home | History | Annotate | Download | only in include
      1 /** \file
      2  * Defines the basic structure to support recognizing by either a lexer,
      3  * parser, or tree parser.
      4  * \addtogroup ANTLR3_BASE_RECOGNIZER
      5  * @{
      6  */
      7 #ifndef	_ANTLR3_BASERECOGNIZER_H
      8 #define	_ANTLR3_BASERECOGNIZER_H
      9 
     10 // [The "BSD licence"]
     11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
     12 // http://www.temporal-wave.com
     13 // http://www.linkedin.com/in/jimidle
     14 //
     15 // All rights reserved.
     16 //
     17 // Redistribution and use in source and binary forms, with or without
     18 // modification, are permitted provided that the following conditions
     19 // are met:
     20 // 1. Redistributions of source code must retain the above copyright
     21 //    notice, this list of conditions and the following disclaimer.
     22 // 2. Redistributions in binary form must reproduce the above copyright
     23 //    notice, this list of conditions and the following disclaimer in the
     24 //    documentation and/or other materials provided with the distribution.
     25 // 3. The name of the author may not be used to endorse or promote products
     26 //    derived from this software without specific prior written permission.
     27 //
     28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     38 
     39 #include    <antlr3defs.h>
     40 #include    <antlr3exception.h>
     41 #include    <antlr3input.h>
     42 #include    <antlr3tokenstream.h>
     43 #include    <antlr3commontoken.h>
     44 #include    <antlr3commontreenodestream.h>
     45 #include	<antlr3debugeventlistener.h>
     46 #include	<antlr3recognizersharedstate.h>
     47 
     48 /** Type indicator for a lexer recognizer
     49  */
     50 #define	    ANTLR3_TYPE_LEXER		0x0001
     51 
     52 /** Type indicator for a parser recognizer
     53  */
     54 #define	    ANTLR3_TYPE_PARSER		0x0002
     55 
     56 /** Type indicator for a tree parser recognizer
     57  */
     58 #define	    ANTLR3_TYPE_TREE_PARSER	0x0004
     59 
     60 #ifdef __cplusplus
     61 extern "C" {
     62 #endif
     63 
     64 /** \brief Base tracking context structure for all types of
     65  * recognizers.
     66  */
     67 typedef	struct ANTLR3_BASE_RECOGNIZER_struct
     68 {
     69     /// Whatever super structure is providing this interface needs a pointer to itself
     70     /// so that this can be passed back to it whenever the api functions
     71     /// are called back from here.
     72     ///
     73     void	      * super;
     74 
     75 	/// Indicates the type of recognizer that we are an instance of.
     76     /// The programmer may set this to anything of course, but the default
     77     /// implementations of the interface only really understand the built in
     78     /// types, so new error handlers etc would probably be required to as well.
     79     ///
     80     ///  Valid types are:
     81     ///
     82     ///   - #ANTLR3_TYPE_LEXER
     83 	///	  - #ANTLR3_TYPE_PARSER
     84     ///   - #ANTLR3_TYPE_TREE_PARSER
     85     ///
     86     ANTLR3_UINT32	type;
     87 
     88 	/// A pointer to the shared recognizer state, such that multiple
     89 	/// recognizers can use the same inputs streams and so on (in
     90 	/// the case of grammar inheritance for instance.
     91 	///
     92 	pANTLR3_RECOGNIZER_SHARED_STATE	state;
     93 
     94 	/// If set to something other than NULL, then this structure is
     95 	/// points to an instance of the debugger interface. In general, the
     96 	/// debugger is only referenced internally in recovery/error operations
     97 	/// so that it does not cause overhead by having to check this pointer
     98 	/// in every function/method
     99 	///
    100 	pANTLR3_DEBUG_EVENT_LISTENER	debugger;
    101 
    102 
    103     /// Pointer to a function that matches the current input symbol
    104     /// against the supplied type. the function causes an error if a
    105     /// match is not found and the default implementation will also
    106     /// attempt to perform one token insertion or deletion if that is
    107     /// possible with the input stream. You can override the default
    108     /// implementation by installing a pointer to your own function
    109     /// in this interface after the recognizer has initialized. This can
    110     /// perform different recovery options or not recover at all and so on.
    111     /// To ignore recovery altogether, see the comments in the default
    112     /// implementation of this function in antlr3baserecognizer.c
    113     ///
    114     /// Note that errors are signalled by setting the error flag below
    115     /// and creating a new exception structure and installing it in the
    116     /// exception pointer below (you can chain these if you like and handle them
    117     /// in some customized way).
    118     ///
    119     void *		(*match)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    120 							    ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
    121 
    122     /// Pointer to a function that matches the next token/char in the input stream
    123     /// regardless of what it actually is.
    124     ///
    125     void		(*matchAny)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    126 
    127 	/// Pointer to a function that decides if the token ahead of the current one is the
    128 	/// one we were loking for, in which case the curernt one is very likely extraneous
    129 	/// and can be reported that way.
    130 	///
    131 	ANTLR3_BOOLEAN
    132 				(*mismatchIsUnwantedToken)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, ANTLR3_UINT32 ttype);
    133 
    134 	/// Pointer to a function that decides if the current token is one that can logically
    135 	/// follow the one we were looking for, in which case the one we were looking for is
    136 	/// probably missing from the input.
    137 	///
    138 	ANTLR3_BOOLEAN
    139 				(*mismatchIsMissingToken)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, pANTLR3_BITSET_LIST follow);
    140 
    141     /** Pointer to a function that works out what to do when a token mismatch
    142      *  occurs, so that Tree parsers can behave differently to other recognizers.
    143      */
    144     void		(*mismatch)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    145 					    ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
    146 
    147     /** Pointer to a function to call to report a recognition problem. You may override
    148      *  this function with your own function, but refer to the standard implementation
    149      *  in antlr3baserecognizer.c for guidance. The function should recognize whether
    150      *  error recovery is in force, so that it does not print out more than one error messages
    151      *  for the same error. From the java comments in BaseRecognizer.java:
    152      *
    153      *  This method sets errorRecovery to indicate the parser is recovering
    154      *  not parsing.  Once in recovery mode, no errors are generated.
    155      *  To get out of recovery mode, the parser must successfully match
    156      *  a token (after a resync).  So it will go:
    157      *
    158      * 		1. error occurs
    159      * 		2. enter recovery mode, report error
    160      * 		3. consume until token found in resynch set
    161      * 		4. try to resume parsing
    162      * 		5. next match() will reset errorRecovery mode
    163      */
    164     void		(*reportError)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    165 
    166     /** Pointer to a function that is called to display a recognition error message. You may
    167      *  override this function independently of (*reportError)() above as that function calls
    168      *  this one to do the actual exception printing.
    169      */
    170     void		(*displayRecognitionError)  (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_UINT8 * tokenNames);
    171 
    172 	/// Get number of recognition errors (lexer, parser, tree parser).  Each
    173 	/// recognizer tracks its own number.  So parser and lexer each have
    174 	/// separate count.  Does not count the spurious errors found between
    175 	/// an error and next valid token match
    176 	///
    177 	/// \see reportError()
    178 	///
    179 	ANTLR3_UINT32
    180 				(*getNumberOfSyntaxErrors)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    181 
    182     /** Pointer to a function that recovers from an error found in the input stream.
    183      *  Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also
    184      *  be from a mismatched token that the (*match)() could not recover from.
    185      */
    186     void		(*recover)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    187 
    188     /** Pointer to a function that is a hook to listen to token consumption during error recovery.
    189      *  This is mainly used by the debug parser to send events to the listener.
    190      */
    191     void		(*beginResync)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    192 
    193     /** Pointer to a function that is a hook to listen to token consumption during error recovery.
    194      *  This is mainly used by the debug parser to send events to the listener.
    195      */
    196     void		(*endResync)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    197 
    198 	/** Pointer to a function that is a hook to listen to token consumption during error recovery.
    199      *  This is mainly used by the debug parser to send events to the listener.
    200      */
    201     void		(*beginBacktrack)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level);
    202 
    203     /** Pointer to a function that is a hook to listen to token consumption during error recovery.
    204      *  This is mainly used by the debug parser to send events to the listener.
    205      */
    206     void		(*endBacktrack)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
    207 
    208     /** Pointer to a function to computer the error recovery set for the current rule.
    209      *  \see antlr3ComputeErrorRecoverySet() for details.
    210      */
    211     pANTLR3_BITSET	(*computeErrorRecoverySet)  (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    212 
    213     /** Pointer to a function that computes the context-sensitive FOLLOW set for the
    214      *  current rule.
    215      * \see antlr3ComputeCSRuleFollow() for details.
    216      */
    217     pANTLR3_BITSET	(*computeCSRuleFollow)	    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    218 
    219     /** Pointer to a function to combine follow bitsets.
    220      * \see antlr3CombineFollows() for details.
    221      */
    222     pANTLR3_BITSET	(*combineFollows)	    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    223 							    ANTLR3_BOOLEAN exact);
    224 
    225     /** Pointer to a function that recovers from a mismatched token in the input stream.
    226      * \see antlr3RecoverMismatch() for details.
    227      */
    228     void		* (*recoverFromMismatchedToken)
    229 						    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    230 							    ANTLR3_UINT32	ttype,
    231 							    pANTLR3_BITSET_LIST	follow);
    232 
    233     /** Pointer to a function that recovers from a mismatched set in the token stream, in a similar manner
    234      *  to (*recoverFromMismatchedToken)
    235      */
    236     void		* (*recoverFromMismatchedSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    237 							    pANTLR3_BITSET_LIST	follow);
    238 
    239     /** Pointer to common routine to handle single token insertion for recovery functions.
    240      */
    241     ANTLR3_BOOLEAN	(*recoverFromMismatchedElement)
    242 						    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    243 							    pANTLR3_BITSET_LIST	follow);
    244 
    245     /** Pointer to function that consumes input until the next token matches
    246      *  the given token.
    247      */
    248     void		(*consumeUntil)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    249 							    ANTLR3_UINT32   tokenType);
    250 
    251     /** Pointer to function that consumes input until the next token matches
    252      *  one in the given set.
    253      */
    254     void		(*consumeUntilSet)	    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    255 							    pANTLR3_BITSET	set);
    256 
    257     /** Pointer to function that returns an ANTLR3_LIST of the strings that identify
    258      *  the rules in the parser that got you to this point. Can be overridden by installing your
    259      *	own function set.
    260      *
    261      * \todo Document how to override invocation stack functions.
    262      */
    263     pANTLR3_STACK	(*getRuleInvocationStack)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    264     pANTLR3_STACK	(*getRuleInvocationStackNamed)  (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    265 								pANTLR3_UINT8	    name);
    266 
    267     /** Pointer to a function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of
    268      *  string token names. As this is mostly used in string template processing it may not be useful
    269      *  in the C runtime.
    270      */
    271     pANTLR3_HASH_TABLE	(*toStrings)			(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    272 								pANTLR3_HASH_TABLE);
    273 
    274     /** Pointer to a function to return whether the rule has parsed input starting at the supplied
    275      *  start index before. If the rule has not parsed input starting from the supplied start index,
    276      *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
    277      *  then it will return the point where it last stopped parsing after that start point.
    278      */
    279     ANTLR3_MARKER	(*getRuleMemoization)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    280 								ANTLR3_INTKEY	ruleIndex,
    281 								ANTLR3_MARKER	ruleParseStart);
    282 
    283     /** Pointer to function that determines whether the rule has parsed input at the current index
    284      *  in the input stream
    285      */
    286     ANTLR3_BOOLEAN	(*alreadyParsedRule)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    287 								ANTLR3_MARKER	ruleIndex);
    288 
    289     /** Pointer to function that records whether the rule has parsed the input at a
    290      *  current position successfully or not.
    291      */
    292     void		(*memoize)			(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    293 								ANTLR3_MARKER	ruleIndex,
    294 								ANTLR3_MARKER	ruleParseStart);
    295 
    296 	/// Pointer to a function that returns the current input symbol.
    297     /// The is placed into any label for the associated token ref; e.g., x=ID.  Token
    298 	/// and tree parsers need to return different objects. Rather than test
    299 	/// for input stream type or change the IntStream interface, I use
    300 	/// a simple method to ask the recognizer to tell me what the current
    301 	/// input symbol is.
    302 	///
    303 	/// This is ignored for lexers and the lexer implementation of this
    304 	/// function should return NULL.
    305 	///
    306 	void *		(*getCurrentInputSymbol)	(	struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    307 												pANTLR3_INT_STREAM istream);
    308 
    309 	/// Conjure up a missing token during error recovery.
    310 	///
    311 	/// The recognizer attempts to recover from single missing
    312 	/// symbols. But, actions might refer to that missing symbol.
    313 	/// For example, x=ID {f($x);}. The action clearly assumes
    314 	/// that there has been an identifier matched previously and that
    315 	/// $x points at that token. If that token is missing, but
    316 	/// the next token in the stream is what we want we assume that
    317 	/// this token is missing and we keep going. Because we
    318 	/// have to return some token to replace the missing token,
    319 	/// we have to conjure one up. This method gives the user control
    320 	/// over the tokens returned for missing tokens. Mostly,
    321 	/// you will want to create something special for identifier
    322 	/// tokens. For literals such as '{' and ',', the default
    323 	/// action in the parser or tree parser works. It simply creates
    324 	/// a CommonToken of the appropriate type. The text will be the token.
    325 	/// If you change what tokens must be created by the lexer,
    326 	/// override this method to create the appropriate tokens.
    327 	///
    328 	void *		(*getMissingSymbol)			(	struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
    329 												pANTLR3_INT_STREAM		istream,
    330 												pANTLR3_EXCEPTION		e,
    331 												ANTLR3_UINT32			expectedTokenType,
    332 												pANTLR3_BITSET_LIST		follow);
    333 
    334     /** Pointer to a function that returns whether the supplied grammar function
    335      *  will parse the current input stream or not. This is the way that syntactic
    336      *  predicates are evaluated. Unlike java, C is perfectly happy to invoke code
    337      *  via a pointer to a function (hence that's what all the ANTLR3 C interfaces
    338      *  do.
    339      */
    340     ANTLR3_BOOLEAN	(*synpred)			(	struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,  void * ctx,
    341 											void (*predicate)(void * ctx));
    342 
    343     /** Pointer to a function that can construct a generic exception structure
    344      * with such information as the input stream can provide.
    345      */
    346     void		    (*exConstruct)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    347 
    348     /** Reset the recognizer
    349      */
    350     void		    (*reset)			(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    351 
    352     /** Pointer to a function that knows how to free the resources of a base recognizer.
    353      */
    354     void			(*free)				(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    355 
    356 }
    357     ANTLR3_BASE_RECOGNIZER;
    358 
    359 #ifdef __cplusplus
    360 }
    361 #endif
    362 
    363 #include    <antlr3lexer.h>
    364 #include    <antlr3parser.h>
    365 #include    <antlr3treeparser.h>
    366 
    367 /// @}
    368 ///
    369 
    370 #endif	    /* _ANTLR3_BASERECOGNIZER_H	*/
    371 
    372