1 /** \file 2 * Defines the basic structure to support recognizing by either a lexer, 3 * parser, or tree parser. 4 * \addtogroup ANTLR3_BASE_RECOGNIZER 5 * @{ 6 */ 7 #ifndef _ANTLR3_BASERECOGNIZER_H 8 #define _ANTLR3_BASERECOGNIZER_H 9 10 // [The "BSD licence"] 11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 12 // http://www.temporal-wave.com 13 // http://www.linkedin.com/in/jimidle 14 // 15 // All rights reserved. 16 // 17 // Redistribution and use in source and binary forms, with or without 18 // modification, are permitted provided that the following conditions 19 // are met: 20 // 1. Redistributions of source code must retain the above copyright 21 // notice, this list of conditions and the following disclaimer. 22 // 2. Redistributions in binary form must reproduce the above copyright 23 // notice, this list of conditions and the following disclaimer in the 24 // documentation and/or other materials provided with the distribution. 25 // 3. The name of the author may not be used to endorse or promote products 26 // derived from this software without specific prior written permission. 27 // 28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 39 #include <antlr3defs.h> 40 #include <antlr3exception.h> 41 #include <antlr3input.h> 42 #include <antlr3tokenstream.h> 43 #include <antlr3commontoken.h> 44 #include <antlr3commontreenodestream.h> 45 #include <antlr3debugeventlistener.h> 46 #include <antlr3recognizersharedstate.h> 47 48 /** Type indicator for a lexer recognizer 49 */ 50 #define ANTLR3_TYPE_LEXER 0x0001 51 52 /** Type indicator for a parser recognizer 53 */ 54 #define ANTLR3_TYPE_PARSER 0x0002 55 56 /** Type indicator for a tree parser recognizer 57 */ 58 #define ANTLR3_TYPE_TREE_PARSER 0x0004 59 60 #ifdef __cplusplus 61 extern "C" { 62 #endif 63 64 /** \brief Base tracking context structure for all types of 65 * recognizers. 66 */ 67 typedef struct ANTLR3_BASE_RECOGNIZER_struct 68 { 69 /// Whatever super structure is providing this interface needs a pointer to itself 70 /// so that this can be passed back to it whenever the api functions 71 /// are called back from here. 72 /// 73 void * super; 74 75 /// Indicates the type of recognizer that we are an instance of. 76 /// The programmer may set this to anything of course, but the default 77 /// implementations of the interface only really understand the built in 78 /// types, so new error handlers etc would probably be required to as well. 79 /// 80 /// Valid types are: 81 /// 82 /// - #ANTLR3_TYPE_LEXER 83 /// - #ANTLR3_TYPE_PARSER 84 /// - #ANTLR3_TYPE_TREE_PARSER 85 /// 86 ANTLR3_UINT32 type; 87 88 /// A pointer to the shared recognizer state, such that multiple 89 /// recognizers can use the same inputs streams and so on (in 90 /// the case of grammar inheritance for instance. 91 /// 92 pANTLR3_RECOGNIZER_SHARED_STATE state; 93 94 /// If set to something other than NULL, then this structure is 95 /// points to an instance of the debugger interface. In general, the 96 /// debugger is only referenced internally in recovery/error operations 97 /// so that it does not cause overhead by having to check this pointer 98 /// in every function/method 99 /// 100 pANTLR3_DEBUG_EVENT_LISTENER debugger; 101 102 103 /// Pointer to a function that matches the current input symbol 104 /// against the supplied type. the function causes an error if a 105 /// match is not found and the default implementation will also 106 /// attempt to perform one token insertion or deletion if that is 107 /// possible with the input stream. You can override the default 108 /// implementation by installing a pointer to your own function 109 /// in this interface after the recognizer has initialized. This can 110 /// perform different recovery options or not recover at all and so on. 111 /// To ignore recovery altogether, see the comments in the default 112 /// implementation of this function in antlr3baserecognizer.c 113 /// 114 /// Note that errors are signalled by setting the error flag below 115 /// and creating a new exception structure and installing it in the 116 /// exception pointer below (you can chain these if you like and handle them 117 /// in some customized way). 118 /// 119 void * (*match) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 120 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 121 122 /// Pointer to a function that matches the next token/char in the input stream 123 /// regardless of what it actually is. 124 /// 125 void (*matchAny) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 126 127 /// Pointer to a function that decides if the token ahead of the current one is the 128 /// one we were loking for, in which case the curernt one is very likely extraneous 129 /// and can be reported that way. 130 /// 131 ANTLR3_BOOLEAN 132 (*mismatchIsUnwantedToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, ANTLR3_UINT32 ttype); 133 134 /// Pointer to a function that decides if the current token is one that can logically 135 /// follow the one we were looking for, in which case the one we were looking for is 136 /// probably missing from the input. 137 /// 138 ANTLR3_BOOLEAN 139 (*mismatchIsMissingToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, pANTLR3_BITSET_LIST follow); 140 141 /** Pointer to a function that works out what to do when a token mismatch 142 * occurs, so that Tree parsers can behave differently to other recognizers. 143 */ 144 void (*mismatch) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 145 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 146 147 /** Pointer to a function to call to report a recognition problem. You may override 148 * this function with your own function, but refer to the standard implementation 149 * in antlr3baserecognizer.c for guidance. The function should recognize whether 150 * error recovery is in force, so that it does not print out more than one error messages 151 * for the same error. From the java comments in BaseRecognizer.java: 152 * 153 * This method sets errorRecovery to indicate the parser is recovering 154 * not parsing. Once in recovery mode, no errors are generated. 155 * To get out of recovery mode, the parser must successfully match 156 * a token (after a resync). So it will go: 157 * 158 * 1. error occurs 159 * 2. enter recovery mode, report error 160 * 3. consume until token found in resynch set 161 * 4. try to resume parsing 162 * 5. next match() will reset errorRecovery mode 163 */ 164 void (*reportError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 165 166 /** Pointer to a function that is called to display a recognition error message. You may 167 * override this function independently of (*reportError)() above as that function calls 168 * this one to do the actual exception printing. 169 */ 170 void (*displayRecognitionError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_UINT8 * tokenNames); 171 172 /// Get number of recognition errors (lexer, parser, tree parser). Each 173 /// recognizer tracks its own number. So parser and lexer each have 174 /// separate count. Does not count the spurious errors found between 175 /// an error and next valid token match 176 /// 177 /// \see reportError() 178 /// 179 ANTLR3_UINT32 180 (*getNumberOfSyntaxErrors) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 181 182 /** Pointer to a function that recovers from an error found in the input stream. 183 * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also 184 * be from a mismatched token that the (*match)() could not recover from. 185 */ 186 void (*recover) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 187 188 /** Pointer to a function that is a hook to listen to token consumption during error recovery. 189 * This is mainly used by the debug parser to send events to the listener. 190 */ 191 void (*beginResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 192 193 /** Pointer to a function that is a hook to listen to token consumption during error recovery. 194 * This is mainly used by the debug parser to send events to the listener. 195 */ 196 void (*endResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 197 198 /** Pointer to a function that is a hook to listen to token consumption during error recovery. 199 * This is mainly used by the debug parser to send events to the listener. 200 */ 201 void (*beginBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level); 202 203 /** Pointer to a function that is a hook to listen to token consumption during error recovery. 204 * This is mainly used by the debug parser to send events to the listener. 205 */ 206 void (*endBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful); 207 208 /** Pointer to a function to computer the error recovery set for the current rule. 209 * \see antlr3ComputeErrorRecoverySet() for details. 210 */ 211 pANTLR3_BITSET (*computeErrorRecoverySet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 212 213 /** Pointer to a function that computes the context-sensitive FOLLOW set for the 214 * current rule. 215 * \see antlr3ComputeCSRuleFollow() for details. 216 */ 217 pANTLR3_BITSET (*computeCSRuleFollow) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 218 219 /** Pointer to a function to combine follow bitsets. 220 * \see antlr3CombineFollows() for details. 221 */ 222 pANTLR3_BITSET (*combineFollows) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 223 ANTLR3_BOOLEAN exact); 224 225 /** Pointer to a function that recovers from a mismatched token in the input stream. 226 * \see antlr3RecoverMismatch() for details. 227 */ 228 void * (*recoverFromMismatchedToken) 229 (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 230 ANTLR3_UINT32 ttype, 231 pANTLR3_BITSET_LIST follow); 232 233 /** Pointer to a function that recovers from a mismatched set in the token stream, in a similar manner 234 * to (*recoverFromMismatchedToken) 235 */ 236 void * (*recoverFromMismatchedSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 237 pANTLR3_BITSET_LIST follow); 238 239 /** Pointer to common routine to handle single token insertion for recovery functions. 240 */ 241 ANTLR3_BOOLEAN (*recoverFromMismatchedElement) 242 (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 243 pANTLR3_BITSET_LIST follow); 244 245 /** Pointer to function that consumes input until the next token matches 246 * the given token. 247 */ 248 void (*consumeUntil) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 249 ANTLR3_UINT32 tokenType); 250 251 /** Pointer to function that consumes input until the next token matches 252 * one in the given set. 253 */ 254 void (*consumeUntilSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 255 pANTLR3_BITSET set); 256 257 /** Pointer to function that returns an ANTLR3_LIST of the strings that identify 258 * the rules in the parser that got you to this point. Can be overridden by installing your 259 * own function set. 260 * 261 * \todo Document how to override invocation stack functions. 262 */ 263 pANTLR3_STACK (*getRuleInvocationStack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 264 pANTLR3_STACK (*getRuleInvocationStackNamed) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 265 pANTLR3_UINT8 name); 266 267 /** Pointer to a function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of 268 * string token names. As this is mostly used in string template processing it may not be useful 269 * in the C runtime. 270 */ 271 pANTLR3_HASH_TABLE (*toStrings) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 272 pANTLR3_HASH_TABLE); 273 274 /** Pointer to a function to return whether the rule has parsed input starting at the supplied 275 * start index before. If the rule has not parsed input starting from the supplied start index, 276 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point 277 * then it will return the point where it last stopped parsing after that start point. 278 */ 279 ANTLR3_MARKER (*getRuleMemoization) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 280 ANTLR3_INTKEY ruleIndex, 281 ANTLR3_MARKER ruleParseStart); 282 283 /** Pointer to function that determines whether the rule has parsed input at the current index 284 * in the input stream 285 */ 286 ANTLR3_BOOLEAN (*alreadyParsedRule) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 287 ANTLR3_MARKER ruleIndex); 288 289 /** Pointer to function that records whether the rule has parsed the input at a 290 * current position successfully or not. 291 */ 292 void (*memoize) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 293 ANTLR3_MARKER ruleIndex, 294 ANTLR3_MARKER ruleParseStart); 295 296 /// Pointer to a function that returns the current input symbol. 297 /// The is placed into any label for the associated token ref; e.g., x=ID. Token 298 /// and tree parsers need to return different objects. Rather than test 299 /// for input stream type or change the IntStream interface, I use 300 /// a simple method to ask the recognizer to tell me what the current 301 /// input symbol is. 302 /// 303 /// This is ignored for lexers and the lexer implementation of this 304 /// function should return NULL. 305 /// 306 void * (*getCurrentInputSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 307 pANTLR3_INT_STREAM istream); 308 309 /// Conjure up a missing token during error recovery. 310 /// 311 /// The recognizer attempts to recover from single missing 312 /// symbols. But, actions might refer to that missing symbol. 313 /// For example, x=ID {f($x);}. The action clearly assumes 314 /// that there has been an identifier matched previously and that 315 /// $x points at that token. If that token is missing, but 316 /// the next token in the stream is what we want we assume that 317 /// this token is missing and we keep going. Because we 318 /// have to return some token to replace the missing token, 319 /// we have to conjure one up. This method gives the user control 320 /// over the tokens returned for missing tokens. Mostly, 321 /// you will want to create something special for identifier 322 /// tokens. For literals such as '{' and ',', the default 323 /// action in the parser or tree parser works. It simply creates 324 /// a CommonToken of the appropriate type. The text will be the token. 325 /// If you change what tokens must be created by the lexer, 326 /// override this method to create the appropriate tokens. 327 /// 328 void * (*getMissingSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 329 pANTLR3_INT_STREAM istream, 330 pANTLR3_EXCEPTION e, 331 ANTLR3_UINT32 expectedTokenType, 332 pANTLR3_BITSET_LIST follow); 333 334 /** Pointer to a function that returns whether the supplied grammar function 335 * will parse the current input stream or not. This is the way that syntactic 336 * predicates are evaluated. Unlike java, C is perfectly happy to invoke code 337 * via a pointer to a function (hence that's what all the ANTLR3 C interfaces 338 * do. 339 */ 340 ANTLR3_BOOLEAN (*synpred) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, void * ctx, 341 void (*predicate)(void * ctx)); 342 343 /** Pointer to a function that can construct a generic exception structure 344 * with such information as the input stream can provide. 345 */ 346 void (*exConstruct) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 347 348 /** Reset the recognizer 349 */ 350 void (*reset) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 351 352 /** Pointer to a function that knows how to free the resources of a base recognizer. 353 */ 354 void (*free) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 355 356 } 357 ANTLR3_BASE_RECOGNIZER; 358 359 #ifdef __cplusplus 360 } 361 #endif 362 363 #include <antlr3lexer.h> 364 #include <antlr3parser.h> 365 #include <antlr3treeparser.h> 366 367 /// @} 368 /// 369 370 #endif /* _ANTLR3_BASERECOGNIZER_H */ 371 372