1 /** \file 2 * Contains the base functions that all recognizers require. 3 * Any function can be overridden by a lexer/parser/tree parser or by the 4 * ANTLR3 programmer. 5 * 6 * \addtogroup pANTLR3_BASE_RECOGNIZER 7 * @{ 8 */ 9 #include <antlr3baserecognizer.h> 10 11 // [The "BSD licence"] 12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 13 // http://www.temporal-wave.com 14 // http://www.linkedin.com/in/jimidle 15 // 16 // All rights reserved. 17 // 18 // Redistribution and use in source and binary forms, with or without 19 // modification, are permitted provided that the following conditions 20 // are met: 21 // 1. Redistributions of source code must retain the above copyright 22 // notice, this list of conditions and the following disclaimer. 23 // 2. Redistributions in binary form must reproduce the above copyright 24 // notice, this list of conditions and the following disclaimer in the 25 // documentation and/or other materials provided with the distribution. 26 // 3. The name of the author may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 40 #ifdef ANTLR3_WINDOWS 41 #pragma warning( disable : 4100 ) 42 #endif 43 44 /* Interface functions -standard implementations cover parser and treeparser 45 * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides 46 * most of these functions. 47 */ 48 static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer); 49 static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer); 50 static void endResync (pANTLR3_BASE_RECOGNIZER recognizer); 51 static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level); 52 static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful); 53 54 static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 55 static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer); 56 static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 57 static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype); 58 static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow); 59 static void reportError (pANTLR3_BASE_RECOGNIZER recognizer); 60 static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer); 61 static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact); 62 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames); 63 static void recover (pANTLR3_BASE_RECOGNIZER recognizer); 64 static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 65 static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow); 66 static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow); 67 static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType); 68 static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set); 69 static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer); 70 static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name); 71 static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE); 72 static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart); 73 static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex); 74 static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart); 75 static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx)); 76 static void reset (pANTLR3_BASE_RECOGNIZER recognizer); 77 static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer); 78 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); 79 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 80 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); 81 static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer); 82 83 ANTLR3_API pANTLR3_BASE_RECOGNIZER 84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) 85 { 86 pANTLR3_BASE_RECOGNIZER recognizer; 87 88 // Allocate memory for the structure 89 // 90 recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER)); 91 92 if (recognizer == NULL) 93 { 94 // Allocation failed 95 // 96 return NULL; 97 } 98 99 100 // If we have been supplied with a pre-existing recognizer state 101 // then we just install it, otherwise we must create one from scratch 102 // 103 if (state == NULL) 104 { 105 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE)); 106 107 if (recognizer->state == NULL) 108 { 109 ANTLR3_FREE(recognizer); 110 return NULL; 111 } 112 113 // Initialize any new recognizer state 114 // 115 recognizer->state->errorRecovery = ANTLR3_FALSE; 116 recognizer->state->lastErrorIndex = -1; 117 recognizer->state->failed = ANTLR3_FALSE; 118 recognizer->state->errorCount = 0; 119 recognizer->state->backtracking = 0; 120 recognizer->state->following = NULL; 121 recognizer->state->ruleMemo = NULL; 122 recognizer->state->tokenNames = NULL; 123 recognizer->state->sizeHint = sizeHint; 124 recognizer->state->tokSource = NULL; 125 recognizer->state->tokFactory = NULL; 126 127 // Rather than check to see if we must initialize 128 // the stack every time we are asked for an new rewrite stream 129 // we just always create an empty stack and then just 130 // free it when the base recognizer is freed. 131 // 132 recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size. 133 134 if (recognizer->state->rStreams == NULL) 135 { 136 // Out of memory 137 // 138 ANTLR3_FREE(recognizer->state); 139 ANTLR3_FREE(recognizer); 140 return NULL; 141 } 142 } 143 else 144 { 145 // Install the one we were given, and do not reset it here 146 // as it will either already have been initialized or will 147 // be in a state that needs to be preserved. 148 // 149 recognizer->state = state; 150 } 151 152 // Install the BR API 153 // 154 recognizer->alreadyParsedRule = alreadyParsedRule; 155 recognizer->beginResync = beginResync; 156 recognizer->combineFollows = combineFollows; 157 recognizer->beginBacktrack = beginBacktrack; 158 recognizer->endBacktrack = endBacktrack; 159 recognizer->computeCSRuleFollow = computeCSRuleFollow; 160 recognizer->computeErrorRecoverySet = computeErrorRecoverySet; 161 recognizer->consumeUntil = consumeUntil; 162 recognizer->consumeUntilSet = consumeUntilSet; 163 recognizer->displayRecognitionError = displayRecognitionError; 164 recognizer->endResync = endResync; 165 recognizer->exConstruct = antlr3MTExceptionNew; 166 recognizer->getRuleInvocationStack = getRuleInvocationStack; 167 recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed; 168 recognizer->getRuleMemoization = getRuleMemoization; 169 recognizer->match = match; 170 recognizer->matchAny = matchAny; 171 recognizer->memoize = memoize; 172 recognizer->mismatch = mismatch; 173 recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken; 174 recognizer->mismatchIsMissingToken = mismatchIsMissingToken; 175 recognizer->recover = recover; 176 recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement; 177 recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet; 178 recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken; 179 recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors; 180 recognizer->reportError = reportError; 181 recognizer->reset = reset; 182 recognizer->synpred = synpred; 183 recognizer->toStrings = toStrings; 184 recognizer->getCurrentInputSymbol = getCurrentInputSymbol; 185 recognizer->getMissingSymbol = getMissingSymbol; 186 recognizer->debugger = NULL; 187 188 recognizer->free = freeBR; 189 190 /* Initialize variables 191 */ 192 recognizer->type = type; 193 194 195 return recognizer; 196 } 197 static void 198 freeBR (pANTLR3_BASE_RECOGNIZER recognizer) 199 { 200 pANTLR3_EXCEPTION thisE; 201 202 // Did we have a state allocated? 203 // 204 if (recognizer->state != NULL) 205 { 206 // Free any rule memoization we set up 207 // 208 if (recognizer->state->ruleMemo != NULL) 209 { 210 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo); 211 recognizer->state->ruleMemo = NULL; 212 } 213 214 // Free any exception space we have left around 215 // 216 thisE = recognizer->state->exception; 217 if (thisE != NULL) 218 { 219 thisE->freeEx(thisE); 220 } 221 222 // Free any rewrite streams we have allocated 223 // 224 if (recognizer->state->rStreams != NULL) 225 { 226 recognizer->state->rStreams->free(recognizer->state->rStreams); 227 } 228 229 // Free up any token factory we created (error recovery for instance) 230 // 231 if (recognizer->state->tokFactory != NULL) 232 { 233 recognizer->state->tokFactory->close(recognizer->state->tokFactory); 234 } 235 // Free the shared state memory 236 // 237 ANTLR3_FREE(recognizer->state); 238 } 239 240 // Free the actual recognizer space 241 // 242 ANTLR3_FREE(recognizer); 243 } 244 245 /** 246 * Creates a new Mismatched Token Exception and inserts in the recognizer 247 * exception stack. 248 * 249 * \param recognizer 250 * Context pointer for this recognizer 251 * 252 */ 253 ANTLR3_API void 254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) 255 { 256 /* Create a basic recognition exception structure 257 */ 258 antlr3RecognitionExceptionNew(recognizer); 259 260 /* Now update it to indicate this is a Mismatched token exception 261 */ 262 recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME; 263 recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION; 264 265 return; 266 } 267 268 ANTLR3_API void 269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) 270 { 271 pANTLR3_EXCEPTION ex; 272 pANTLR3_LEXER lexer; 273 pANTLR3_PARSER parser; 274 pANTLR3_TREE_PARSER tparser; 275 276 pANTLR3_INPUT_STREAM ins; 277 pANTLR3_INT_STREAM is; 278 pANTLR3_COMMON_TOKEN_STREAM cts; 279 pANTLR3_TREE_NODE_STREAM tns; 280 281 ins = NULL; 282 cts = NULL; 283 tns = NULL; 284 is = NULL; 285 lexer = NULL; 286 parser = NULL; 287 tparser = NULL; 288 289 switch (recognizer->type) 290 { 291 case ANTLR3_TYPE_LEXER: 292 293 lexer = (pANTLR3_LEXER) (recognizer->super); 294 ins = lexer->input; 295 is = ins->istream; 296 297 break; 298 299 case ANTLR3_TYPE_PARSER: 300 301 parser = (pANTLR3_PARSER) (recognizer->super); 302 cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super); 303 is = parser->tstream->istream; 304 305 break; 306 307 case ANTLR3_TYPE_TREE_PARSER: 308 309 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 310 tns = tparser->ctnstream->tnstream; 311 is = tns->istream; 312 313 break; 314 315 default: 316 317 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n"); 318 return; 319 320 break; 321 } 322 323 /* Create a basic exception structure 324 */ 325 ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION, 326 (void *)ANTLR3_RECOGNITION_EX_NAME, 327 NULL, 328 ANTLR3_FALSE); 329 330 /* Rest of information depends on the base type of the 331 * input stream. 332 */ 333 switch (is->type & ANTLR3_INPUT_MASK) 334 { 335 case ANTLR3_CHARSTREAM: 336 337 ex->c = is->_LA (is, 1); /* Current input character */ 338 ex->line = ins->getLine (ins); /* Line number comes from stream */ 339 ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */ 340 ex->index = is->index (is); 341 ex->streamName = ins->fileName; 342 ex->message = "Unexpected character"; 343 break; 344 345 case ANTLR3_TOKENSTREAM: 346 347 ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */ 348 ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine (ex->token); 349 ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine (ex->token); 350 ex->index = cts->tstream->istream->index (cts->tstream->istream); 351 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) 352 { 353 ex->streamName = NULL; 354 } 355 else 356 { 357 ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName; 358 } 359 ex->message = "Unexpected token"; 360 break; 361 362 case ANTLR3_COMMONTREENODE: 363 364 ex->token = tns->_LT (tns, 1); /* Current input tree node */ 365 ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine (ex->token); 366 ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine (ex->token); 367 ex->index = tns->istream->index (tns->istream); 368 369 // Are you ready for this? Deep breath now... 370 // 371 { 372 pANTLR3_COMMON_TREE tnode; 373 374 tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super)); 375 376 if (tnode->token == NULL) 377 { 378 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-"); 379 } 380 else 381 { 382 if (tnode->token->input == NULL) 383 { 384 ex->streamName = NULL; 385 } 386 else 387 { 388 ex->streamName = tnode->token->input->fileName; 389 } 390 } 391 ex->message = "Unexpected node"; 392 } 393 break; 394 } 395 396 ex->input = is; 397 ex->nextException = recognizer->state->exception; /* So we don't leak the memory */ 398 recognizer->state->exception = ex; 399 recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */ 400 401 return; 402 } 403 404 405 /// Match current input symbol against ttype. Upon error, do one token 406 /// insertion or deletion if possible. 407 /// To turn off single token insertion or deletion error 408 /// recovery, override mismatchRecover() and have it call 409 /// plain mismatch(), which does not recover. Then any error 410 /// in a rule will cause an exception and immediate exit from 411 /// rule. Rule would recover by resynchronizing to the set of 412 /// symbols that can follow rule ref. 413 /// 414 static void * 415 match( pANTLR3_BASE_RECOGNIZER recognizer, 416 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) 417 { 418 pANTLR3_PARSER parser; 419 pANTLR3_TREE_PARSER tparser; 420 pANTLR3_INT_STREAM is; 421 void * matchedSymbol; 422 423 switch (recognizer->type) 424 { 425 case ANTLR3_TYPE_PARSER: 426 427 parser = (pANTLR3_PARSER) (recognizer->super); 428 tparser = NULL; 429 is = parser->tstream->istream; 430 431 break; 432 433 case ANTLR3_TYPE_TREE_PARSER: 434 435 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 436 parser = NULL; 437 is = tparser->ctnstream->tnstream->istream; 438 439 break; 440 441 default: 442 443 ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n"); 444 return ANTLR3_FALSE; 445 446 break; 447 } 448 449 // Pick up the current input token/node for assignment to labels 450 // 451 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); 452 453 if (is->_LA(is, 1) == ttype) 454 { 455 // The token was the one we were told to expect 456 // 457 is->consume(is); // Consume that token from the stream 458 recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were) 459 recognizer->state->failed = ANTLR3_FALSE; // The match was a success 460 return matchedSymbol; // We are done 461 } 462 463 // We did not find the expected token type, if we are backtracking then 464 // we just set the failed flag and return. 465 // 466 if (recognizer->state->backtracking > 0) 467 { 468 // Backtracking is going on 469 // 470 recognizer->state->failed = ANTLR3_TRUE; 471 return matchedSymbol; 472 } 473 474 // We did not find the expected token and there is no backtracking 475 // going on, so we mismatch, which creates an exception in the recognizer exception 476 // stack. 477 // 478 matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); 479 return matchedSymbol; 480 } 481 482 /// Consumes the next token, whatever it is, and resets the recognizer state 483 /// so that it is not in error. 484 /// 485 /// \param recognizer 486 /// Recognizer context pointer 487 /// 488 static void 489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer) 490 { 491 pANTLR3_PARSER parser; 492 pANTLR3_TREE_PARSER tparser; 493 pANTLR3_INT_STREAM is; 494 495 switch (recognizer->type) 496 { 497 case ANTLR3_TYPE_PARSER: 498 499 parser = (pANTLR3_PARSER) (recognizer->super); 500 tparser = NULL; 501 is = parser->tstream->istream; 502 503 break; 504 505 case ANTLR3_TYPE_TREE_PARSER: 506 507 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 508 parser = NULL; 509 is = tparser->ctnstream->tnstream->istream; 510 511 break; 512 513 default: 514 515 ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n"); 516 return; 517 518 break; 519 } 520 recognizer->state->errorRecovery = ANTLR3_FALSE; 521 recognizer->state->failed = ANTLR3_FALSE; 522 is->consume(is); 523 524 return; 525 } 526 /// 527 /// 528 static ANTLR3_BOOLEAN 529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype) 530 { 531 ANTLR3_UINT32 nextt; 532 533 nextt = is->_LA(is, 2); 534 535 if (nextt == ttype) 536 { 537 if (recognizer->state->exception != NULL) 538 { 539 recognizer->state->exception->expecting = nextt; 540 } 541 return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted 542 } 543 else 544 { 545 return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted 546 } 547 } 548 549 /// 550 /// 551 static ANTLR3_BOOLEAN 552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow) 553 { 554 ANTLR3_BOOLEAN retcode; 555 pANTLR3_BITSET followClone; 556 pANTLR3_BITSET viableTokensFollowingThisRule; 557 558 if (follow == NULL) 559 { 560 // There is no information about the tokens that can follow the last one 561 // hence we must say that the current one we found is not a member of the 562 // follow set and does not indicate a missing token. We will just consume this 563 // single token and see if the parser works it out from there. 564 // 565 return ANTLR3_FALSE; 566 } 567 568 followClone = NULL; 569 viableTokensFollowingThisRule = NULL; 570 571 // The C bitset maps are laid down at compile time by the 572 // C code generation. Hence we cannot remove things from them 573 // and so on. So, in order to remove EOR (if we need to) then 574 // we clone the static bitset. 575 // 576 followClone = antlr3BitsetLoad(follow); 577 if (followClone == NULL) 578 { 579 return ANTLR3_FALSE; 580 } 581 582 // Compute what can follow this grammar reference 583 // 584 if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)) 585 { 586 // EOR can follow, but if we are not the start symbol, we 587 // need to remove it. 588 // 589 if (recognizer->state->following->vector->count >= 0) 590 { 591 followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE); 592 } 593 594 // Now compute the visiable tokens that can follow this rule, according to context 595 // and make them part of the follow set. 596 // 597 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer); 598 followClone->borInPlace(followClone, viableTokensFollowingThisRule); 599 } 600 601 /// if current token is consistent with what could come after set 602 /// then we know we're missing a token; error recovery is free to 603 /// "insert" the missing token 604 /// 605 /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 606 /// in follow set to indicate that the fall of the start symbol is 607 /// in the set (EOF can follow). 608 /// 609 if ( followClone->isMember(followClone, is->_LA(is, 1)) 610 || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE) 611 ) 612 { 613 retcode = ANTLR3_TRUE; 614 } 615 else 616 { 617 retcode = ANTLR3_FALSE; 618 } 619 620 if (viableTokensFollowingThisRule != NULL) 621 { 622 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule); 623 } 624 if (followClone != NULL) 625 { 626 followClone->free(followClone); 627 } 628 629 return retcode; 630 631 } 632 633 /// Factor out what to do upon token mismatch so tree parsers can behave 634 /// differently. Override and call mismatchRecover(input, ttype, follow) 635 /// to get single token insertion and deletion. Use this to turn off 636 /// single token insertion and deletion. Override mismatchRecover 637 /// to call this instead. 638 /// 639 /// \remark mismatch only works for parsers and must be overridden for anything else. 640 /// 641 static void 642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) 643 { 644 pANTLR3_PARSER parser; 645 pANTLR3_TREE_PARSER tparser; 646 pANTLR3_INT_STREAM is; 647 648 // Install a mismatched token exception in the exception stack 649 // 650 antlr3MTExceptionNew(recognizer); 651 recognizer->state->exception->expecting = ttype; 652 653 switch (recognizer->type) 654 { 655 case ANTLR3_TYPE_PARSER: 656 657 parser = (pANTLR3_PARSER) (recognizer->super); 658 tparser = NULL; 659 is = parser->tstream->istream; 660 661 break; 662 663 default: 664 665 ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n"); 666 return; 667 668 break; 669 } 670 671 if (mismatchIsUnwantedToken(recognizer, is, ttype)) 672 { 673 // Create a basic recognition exception structure 674 // 675 antlr3RecognitionExceptionNew(recognizer); 676 677 // Now update it to indicate this is an unwanted token exception 678 // 679 recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; 680 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; 681 682 return; 683 } 684 685 if (mismatchIsMissingToken(recognizer, is, follow)) 686 { 687 // Create a basic recognition exception structure 688 // 689 antlr3RecognitionExceptionNew(recognizer); 690 691 // Now update it to indicate this is an unwanted token exception 692 // 693 recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; 694 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; 695 696 return; 697 } 698 699 // Just a mismatched token is all we can dtermine 700 // 701 antlr3MTExceptionNew(recognizer); 702 703 return; 704 } 705 /// Report a recognition problem. 706 /// 707 /// This method sets errorRecovery to indicate the parser is recovering 708 /// not parsing. Once in recovery mode, no errors are generated. 709 /// To get out of recovery mode, the parser must successfully match 710 /// a token (after a resync). So it will go: 711 /// 712 /// 1. error occurs 713 /// 2. enter recovery mode, report error 714 /// 3. consume until token found in resynch set 715 /// 4. try to resume parsing 716 /// 5. next match() will reset errorRecovery mode 717 /// 718 /// If you override, make sure to update errorCount if you care about that. 719 /// 720 static void 721 reportError (pANTLR3_BASE_RECOGNIZER recognizer) 722 { 723 // Invoke the debugger event if there is a debugger listening to us 724 // 725 if (recognizer->debugger != NULL) 726 { 727 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception); 728 } 729 730 if (recognizer->state->errorRecovery == ANTLR3_TRUE) 731 { 732 // Already in error recovery so don't display another error while doing so 733 // 734 return; 735 } 736 737 // Signal we are in error recovery now 738 // 739 recognizer->state->errorRecovery = ANTLR3_TRUE; 740 741 // Indicate this recognizer had an error while processing. 742 // 743 recognizer->state->errorCount++; 744 745 // Call the error display routine 746 // 747 recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); 748 } 749 750 static void 751 beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level) 752 { 753 if (recognizer->debugger != NULL) 754 { 755 recognizer->debugger->beginBacktrack(recognizer->debugger, level); 756 } 757 } 758 759 static void 760 endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful) 761 { 762 if (recognizer->debugger != NULL) 763 { 764 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful); 765 } 766 } 767 static void 768 beginResync (pANTLR3_BASE_RECOGNIZER recognizer) 769 { 770 if (recognizer->debugger != NULL) 771 { 772 recognizer->debugger->beginResync(recognizer->debugger); 773 } 774 } 775 776 static void 777 endResync (pANTLR3_BASE_RECOGNIZER recognizer) 778 { 779 if (recognizer->debugger != NULL) 780 { 781 recognizer->debugger->endResync(recognizer->debugger); 782 } 783 } 784 785 /// Compute the error recovery set for the current rule. 786 /// Documentation below is from the Java implementation. 787 /// 788 /// During rule invocation, the parser pushes the set of tokens that can 789 /// follow that rule reference on the stack; this amounts to 790 /// computing FIRST of what follows the rule reference in the 791 /// enclosing rule. This local follow set only includes tokens 792 /// from within the rule; i.e., the FIRST computation done by 793 /// ANTLR stops at the end of a rule. 794 // 795 /// EXAMPLE 796 // 797 /// When you find a "no viable alt exception", the input is not 798 /// consistent with any of the alternatives for rule r. The best 799 /// thing to do is to consume tokens until you see something that 800 /// can legally follow a call to r *or* any rule that called r. 801 /// You don't want the exact set of viable next tokens because the 802 /// input might just be missing a token--you might consume the 803 /// rest of the input looking for one of the missing tokens. 804 /// 805 /// Consider grammar: 806 /// 807 /// a : '[' b ']' 808 /// | '(' b ')' 809 /// ; 810 /// b : c '^' INT ; 811 /// c : ID 812 /// | INT 813 /// ; 814 /// 815 /// At each rule invocation, the set of tokens that could follow 816 /// that rule is pushed on a stack. Here are the various "local" 817 /// follow sets: 818 /// 819 /// FOLLOW(b1_in_a) = FIRST(']') = ']' 820 /// FOLLOW(b2_in_a) = FIRST(')') = ')' 821 /// FOLLOW(c_in_b) = FIRST('^') = '^' 822 /// 823 /// Upon erroneous input "[]", the call chain is 824 /// 825 /// a -> b -> c 826 /// 827 /// and, hence, the follow context stack is: 828 /// 829 /// depth local follow set after call to rule 830 /// 0 <EOF> a (from main()) 831 /// 1 ']' b 832 /// 3 '^' c 833 /// 834 /// Notice that ')' is not included, because b would have to have 835 /// been called from a different context in rule a for ')' to be 836 /// included. 837 /// 838 /// For error recovery, we cannot consider FOLLOW(c) 839 /// (context-sensitive or otherwise). We need the combined set of 840 /// all context-sensitive FOLLOW sets--the set of all tokens that 841 /// could follow any reference in the call chain. We need to 842 /// resync to one of those tokens. Note that FOLLOW(c)='^' and if 843 /// we resync'd to that token, we'd consume until EOF. We need to 844 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 845 /// In this case, for input "[]", LA(1) is in this set so we would 846 /// not consume anything and after printing an error rule c would 847 /// return normally. It would not find the required '^' though. 848 /// At this point, it gets a mismatched token error and throws an 849 /// exception (since LA(1) is not in the viable following token 850 /// set). The rule exception handler tries to recover, but finds 851 /// the same recovery set and doesn't consume anything. Rule b 852 /// exits normally returning to rule a. Now it finds the ']' (and 853 /// with the successful match exits errorRecovery mode). 854 /// 855 /// So, you can see that the parser walks up call chain looking 856 /// for the token that was a member of the recovery set. 857 /// 858 /// Errors are not generated in errorRecovery mode. 859 /// 860 /// ANTLR's error recovery mechanism is based upon original ideas: 861 /// 862 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth 863 /// 864 /// and 865 /// 866 /// "A note on error recovery in recursive descent parsers": 867 /// http://portal.acm.org/citation.cfm?id=947902.947905 868 /// 869 /// Later, Josef Grosch had some good ideas: 870 /// 871 /// "Efficient and Comfortable Error Recovery in Recursive Descent 872 /// Parsers": 873 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 874 /// 875 /// Like Grosch I implemented local FOLLOW sets that are combined 876 /// at run-time upon error to avoid overhead during parsing. 877 /// 878 static pANTLR3_BITSET 879 computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer) 880 { 881 return recognizer->combineFollows(recognizer, ANTLR3_FALSE); 882 } 883 884 /// Compute the context-sensitive FOLLOW set for current rule. 885 /// Documentation below is from the Java runtime. 886 /// 887 /// This is the set of token types that can follow a specific rule 888 /// reference given a specific call chain. You get the set of 889 /// viable tokens that can possibly come next (look ahead depth 1) 890 /// given the current call chain. Contrast this with the 891 /// definition of plain FOLLOW for rule r: 892 /// 893 /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 894 /// 895 /// where x in T* and alpha, beta in V*; T is set of terminals and 896 /// V is the set of terminals and non terminals. In other words, 897 /// FOLLOW(r) is the set of all tokens that can possibly follow 898 /// references to r in///any* sentential form (context). At 899 /// runtime, however, we know precisely which context applies as 900 /// we have the call chain. We may compute the exact (rather 901 /// than covering superset) set of following tokens. 902 /// 903 /// For example, consider grammar: 904 /// 905 /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 906 /// | "return" expr '.' 907 /// ; 908 /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 909 /// atom : INT // FOLLOW(atom)=={'+',')',';','.'} 910 /// | '(' expr ')' 911 /// ; 912 /// 913 /// The FOLLOW sets are all inclusive whereas context-sensitive 914 /// FOLLOW sets are precisely what could follow a rule reference. 915 /// For input input "i=(3);", here is the derivation: 916 /// 917 /// stat => ID '=' expr ';' 918 /// => ID '=' atom ('+' atom)* ';' 919 /// => ID '=' '(' expr ')' ('+' atom)* ';' 920 /// => ID '=' '(' atom ')' ('+' atom)* ';' 921 /// => ID '=' '(' INT ')' ('+' atom)* ';' 922 /// => ID '=' '(' INT ')' ';' 923 /// 924 /// At the "3" token, you'd have a call chain of 925 /// 926 /// stat -> expr -> atom -> expr -> atom 927 /// 928 /// What can follow that specific nested ref to atom? Exactly ')' 929 /// as you can see by looking at the derivation of this specific 930 /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 931 /// 932 /// You want the exact viable token set when recovering from a 933 /// token mismatch. Upon token mismatch, if LA(1) is member of 934 /// the viable next token set, then you know there is most likely 935 /// a missing token in the input stream. "Insert" one by just not 936 /// throwing an exception. 937 /// 938 static pANTLR3_BITSET 939 computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer) 940 { 941 return recognizer->combineFollows(recognizer, ANTLR3_FALSE); 942 } 943 944 /// Compute the current followset for the input stream. 945 /// 946 static pANTLR3_BITSET 947 combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact) 948 { 949 pANTLR3_BITSET followSet; 950 pANTLR3_BITSET localFollowSet; 951 ANTLR3_UINT32 top; 952 ANTLR3_UINT32 i; 953 954 top = recognizer->state->following->size(recognizer->state->following); 955 956 followSet = antlr3BitsetNew(0); 957 localFollowSet = NULL; 958 959 for (i = top; i>0; i--) 960 { 961 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1)); 962 963 if (localFollowSet != NULL) 964 { 965 followSet->borInPlace(followSet, localFollowSet); 966 967 if (exact == ANTLR3_TRUE) 968 { 969 if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE) 970 { 971 // Only leave EOR in the set if at top (start rule); this lets us know 972 // if we have to include the follow(start rule); I.E., EOF 973 // 974 if (i>1) 975 { 976 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE); 977 } 978 } 979 else 980 { 981 break; // Cannot see End Of Rule from here, just drop out 982 } 983 } 984 localFollowSet->free(localFollowSet); 985 localFollowSet = NULL; 986 } 987 } 988 989 if (localFollowSet != NULL) 990 { 991 localFollowSet->free(localFollowSet); 992 } 993 return followSet; 994 } 995 996 /// Standard/Example error display method. 997 /// No generic error message display funciton coudl possibly do everything correctly 998 /// for all possible parsers. Hence you are provided with this example routine, which 999 /// you should override in your parser/tree parser to do as you will. 1000 /// 1001 /// Here we depart somewhat from the Java runtime as that has now split up a lot 1002 /// of the error display routines into spearate units. However, ther is little advantage 1003 /// to this in the C version as you will probably implement all such routines as a 1004 /// separate translation unit, rather than install them all as pointers to functions 1005 /// in the base recognizer. 1006 /// 1007 static void 1008 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) 1009 { 1010 pANTLR3_PARSER parser; 1011 pANTLR3_TREE_PARSER tparser; 1012 pANTLR3_INT_STREAM is; 1013 pANTLR3_STRING ttext; 1014 pANTLR3_STRING ftext; 1015 pANTLR3_EXCEPTION ex; 1016 pANTLR3_COMMON_TOKEN theToken; 1017 pANTLR3_BASE_TREE theBaseTree; 1018 pANTLR3_COMMON_TREE theCommonTree; 1019 1020 // Retrieve some info for easy reading. 1021 // 1022 ex = recognizer->state->exception; 1023 ttext = NULL; 1024 1025 // See if there is a 'filename' we can use 1026 // 1027 if (ex->streamName == NULL) 1028 { 1029 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) 1030 { 1031 ANTLR3_FPRINTF(stderr, "-end of input-("); 1032 } 1033 else 1034 { 1035 ANTLR3_FPRINTF(stderr, "-unknown source-("); 1036 } 1037 } 1038 else 1039 { 1040 ftext = ex->streamName->to8(ex->streamName); 1041 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); 1042 } 1043 1044 // Next comes the line number 1045 // 1046 1047 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); 1048 ANTLR3_FPRINTF(stderr, " : error %d : %s", 1049 recognizer->state->exception->type, 1050 (pANTLR3_UINT8) (recognizer->state->exception->message)); 1051 1052 1053 // How we determine the next piece is dependent on which thing raised the 1054 // error. 1055 // 1056 switch (recognizer->type) 1057 { 1058 case ANTLR3_TYPE_PARSER: 1059 1060 // Prepare the knowledge we know we have 1061 // 1062 parser = (pANTLR3_PARSER) (recognizer->super); 1063 tparser = NULL; 1064 is = parser->tstream->istream; 1065 theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token); 1066 ttext = theToken->toString(theToken); 1067 1068 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine); 1069 if (theToken != NULL) 1070 { 1071 if (theToken->type == ANTLR3_TOKEN_EOF) 1072 { 1073 ANTLR3_FPRINTF(stderr, ", at <EOF>"); 1074 } 1075 else 1076 { 1077 // Guard against null text in a token 1078 // 1079 ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars); 1080 } 1081 } 1082 break; 1083 1084 case ANTLR3_TYPE_TREE_PARSER: 1085 1086 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1087 parser = NULL; 1088 is = tparser->ctnstream->tnstream->istream; 1089 theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token); 1090 ttext = theBaseTree->toStringTree(theBaseTree); 1091 1092 if (theBaseTree != NULL) 1093 { 1094 theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super; 1095 1096 if (theCommonTree != NULL) 1097 { 1098 theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree); 1099 } 1100 ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree)); 1101 ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars); 1102 } 1103 break; 1104 1105 default: 1106 1107 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n"); 1108 return; 1109 break; 1110 } 1111 1112 // Although this function should generally be provided by the implementation, this one 1113 // should be as helpful as possible for grammar developers and serve as an example 1114 // of what you can do with each exception type. In general, when you make up your 1115 // 'real' handler, you should debug the routine with all possible errors you expect 1116 // which will then let you be as specific as possible about all circumstances. 1117 // 1118 // Note that in the general case, errors thrown by tree parsers indicate a problem 1119 // with the output of the parser or with the tree grammar itself. The job of the parser 1120 // is to produce a perfect (in traversal terms) syntactically correct tree, so errors 1121 // at that stage should really be semantic errors that your own code determines and handles 1122 // in whatever way is appropriate. 1123 // 1124 switch (ex->type) 1125 { 1126 case ANTLR3_UNWANTED_TOKEN_EXCEPTION: 1127 1128 // Indicates that the recognizer was fed a token which seesm to be 1129 // spurious input. We can detect this when the token that follows 1130 // this unwanted token would normally be part of the syntactically 1131 // correct stream. Then we can see that the token we are looking at 1132 // is just something that should not be there and throw this exception. 1133 // 1134 if (tokenNames == NULL) 1135 { 1136 ANTLR3_FPRINTF(stderr, " : Extraneous input..."); 1137 } 1138 else 1139 { 1140 if (ex->expecting == ANTLR3_TOKEN_EOF) 1141 { 1142 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n"); 1143 } 1144 else 1145 { 1146 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]); 1147 } 1148 } 1149 break; 1150 1151 case ANTLR3_MISSING_TOKEN_EXCEPTION: 1152 1153 // Indicates that the recognizer detected that the token we just 1154 // hit would be valid syntactically if preceeded by a particular 1155 // token. Perhaps a missing ';' at line end or a missing ',' in an 1156 // expression list, and such like. 1157 // 1158 if (tokenNames == NULL) 1159 { 1160 ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting); 1161 } 1162 else 1163 { 1164 if (ex->expecting == ANTLR3_TOKEN_EOF) 1165 { 1166 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n"); 1167 } 1168 else 1169 { 1170 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]); 1171 } 1172 } 1173 break; 1174 1175 case ANTLR3_RECOGNITION_EXCEPTION: 1176 1177 // Indicates that the recognizer received a token 1178 // in the input that was not predicted. This is the basic exception type 1179 // from which all others are derived. So we assume it was a syntax error. 1180 // You may get this if there are not more tokens and more are needed 1181 // to complete a parse for instance. 1182 // 1183 ANTLR3_FPRINTF(stderr, " : syntax error...\n"); 1184 break; 1185 1186 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: 1187 1188 // We were expecting to see one thing and got another. This is the 1189 // most common error if we coudl not detect a missing or unwanted token. 1190 // Here you can spend your efforts to 1191 // derive more useful error messages based on the expected 1192 // token set and the last token and so on. The error following 1193 // bitmaps do a good job of reducing the set that we were looking 1194 // for down to something small. Knowing what you are parsing may be 1195 // able to allow you to be even more specific about an error. 1196 // 1197 if (tokenNames == NULL) 1198 { 1199 ANTLR3_FPRINTF(stderr, " : syntax error...\n"); 1200 } 1201 else 1202 { 1203 if (ex->expecting == ANTLR3_TOKEN_EOF) 1204 { 1205 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n"); 1206 } 1207 else 1208 { 1209 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]); 1210 } 1211 } 1212 break; 1213 1214 case ANTLR3_NO_VIABLE_ALT_EXCEPTION: 1215 1216 // We could not pick any alt decision from the input given 1217 // so god knows what happened - however when you examine your grammar, 1218 // you should. It means that at the point where the current token occurred 1219 // that the DFA indicates nowhere to go from here. 1220 // 1221 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n"); 1222 1223 break; 1224 1225 case ANTLR3_MISMATCHED_SET_EXCEPTION: 1226 1227 { 1228 ANTLR3_UINT32 count; 1229 ANTLR3_UINT32 bit; 1230 ANTLR3_UINT32 size; 1231 ANTLR3_UINT32 numbits; 1232 pANTLR3_BITSET errBits; 1233 1234 // This means we were able to deal with one of a set of 1235 // possible tokens at this point, but we did not see any 1236 // member of that set. 1237 // 1238 ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : "); 1239 1240 // What tokens could we have accepted at this point in the 1241 // parse? 1242 // 1243 count = 0; 1244 errBits = antlr3BitsetLoad (ex->expectingSet); 1245 numbits = errBits->numBits (errBits); 1246 size = errBits->size (errBits); 1247 1248 if (size > 0) 1249 { 1250 // However many tokens we could have dealt with here, it is usually 1251 // not useful to print ALL of the set here. I arbitrarily chose 8 1252 // here, but you should do whatever makes sense for you of course. 1253 // No token number 0, so look for bit 1 and on. 1254 // 1255 for (bit = 1; bit < numbits && count < 8 && count < size; bit++) 1256 { 1257 // TODO: This doesn;t look right - should be asking if the bit is set!! 1258 // 1259 if (tokenNames[bit]) 1260 { 1261 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); 1262 count++; 1263 } 1264 } 1265 ANTLR3_FPRINTF(stderr, "\n"); 1266 } 1267 else 1268 { 1269 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n"); 1270 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n"); 1271 } 1272 } 1273 break; 1274 1275 case ANTLR3_EARLY_EXIT_EXCEPTION: 1276 1277 // We entered a loop requiring a number of token sequences 1278 // but found a token that ended that sequence earlier than 1279 // we should have done. 1280 // 1281 ANTLR3_FPRINTF(stderr, " : missing elements...\n"); 1282 break; 1283 1284 default: 1285 1286 // We don't handle any other exceptions here, but you can 1287 // if you wish. If we get an exception that hits this point 1288 // then we are just going to report what we know about the 1289 // token. 1290 // 1291 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n"); 1292 break; 1293 } 1294 1295 // Here you have the token that was in error which if this is 1296 // the standard implementation will tell you the line and offset 1297 // and also record the address of the start of the line in the 1298 // input stream. You could therefore print the source line and so on. 1299 // Generally though, I would expect that your lexer/parser will keep 1300 // its own map of lines and source pointers or whatever as there 1301 // are a lot of specific things you need to know about the input 1302 // to do something like that. 1303 // Here is where you do it though :-). 1304 // 1305 } 1306 1307 /// Return how many syntax errors were detected by this recognizer 1308 /// 1309 static ANTLR3_UINT32 1310 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer) 1311 { 1312 return recognizer->state->errorCount; 1313 } 1314 1315 /// Recover from an error found on the input stream. Mostly this is 1316 /// NoViableAlt exceptions, but could be a mismatched token that 1317 /// the match() routine could not recover from. 1318 /// 1319 static void 1320 recover (pANTLR3_BASE_RECOGNIZER recognizer) 1321 { 1322 // Used to compute the follow set of tokens 1323 // 1324 pANTLR3_BITSET followSet; 1325 pANTLR3_PARSER parser; 1326 pANTLR3_TREE_PARSER tparser; 1327 pANTLR3_INT_STREAM is; 1328 1329 switch (recognizer->type) 1330 { 1331 case ANTLR3_TYPE_PARSER: 1332 1333 parser = (pANTLR3_PARSER) (recognizer->super); 1334 tparser = NULL; 1335 is = parser->tstream->istream; 1336 1337 break; 1338 1339 case ANTLR3_TYPE_TREE_PARSER: 1340 1341 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1342 parser = NULL; 1343 is = tparser->ctnstream->tnstream->istream; 1344 1345 break; 1346 1347 default: 1348 1349 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n"); 1350 return; 1351 1352 break; 1353 } 1354 1355 // Are we about to repeat the same error? 1356 // 1357 if (recognizer->state->lastErrorIndex == is->index(is)) 1358 { 1359 // The last error was at the same token index point. This must be a case 1360 // where LT(1) is in the recovery token set so nothing is 1361 // consumed. Consume a single token so at least to prevent 1362 // an infinite loop; this is a failsafe. 1363 // 1364 is->consume(is); 1365 } 1366 1367 // Record error index position 1368 // 1369 recognizer->state->lastErrorIndex = is->index(is); 1370 1371 // Work out the follows set for error recovery 1372 // 1373 followSet = recognizer->computeErrorRecoverySet(recognizer); 1374 1375 // Call resync hook (for debuggers and so on) 1376 // 1377 recognizer->beginResync(recognizer); 1378 1379 // Consume tokens until we have resynced to something in the follows set 1380 // 1381 recognizer->consumeUntilSet(recognizer, followSet); 1382 1383 // End resync hook 1384 // 1385 recognizer->endResync(recognizer); 1386 1387 // Destroy the temporary bitset we produced. 1388 // 1389 followSet->free(followSet); 1390 1391 // Reset the inError flag so we don't re-report the exception 1392 // 1393 recognizer->state->error = ANTLR3_FALSE; 1394 recognizer->state->failed = ANTLR3_FALSE; 1395 } 1396 1397 1398 /// Attempt to recover from a single missing or extra token. 1399 /// 1400 /// EXTRA TOKEN 1401 /// 1402 /// LA(1) is not what we are looking for. If LA(2) has the right token, 1403 /// however, then assume LA(1) is some extra spurious token. Delete it 1404 /// and LA(2) as if we were doing a normal match(), which advances the 1405 /// input. 1406 /// 1407 /// MISSING TOKEN 1408 /// 1409 /// If current token is consistent with what could come after 1410 /// ttype then it is ok to "insert" the missing token, else throw 1411 /// exception For example, Input "i=(3;" is clearly missing the 1412 /// ')'. When the parser returns from the nested call to expr, it 1413 /// will have call chain: 1414 /// 1415 /// stat -> expr -> atom 1416 /// 1417 /// and it will be trying to match the ')' at this point in the 1418 /// derivation: 1419 /// 1420 /// => ID '=' '(' INT ')' ('+' atom)* ';' 1421 /// ^ 1422 /// match() will see that ';' doesn't match ')' and report a 1423 /// mismatched token error. To recover, it sees that LA(1)==';' 1424 /// is in the set of tokens that can follow the ')' token 1425 /// reference in rule atom. It can assume that you forgot the ')'. 1426 /// 1427 /// The exception that was passed in, in the java implementation is 1428 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the 1429 /// error flag and rules cascade back when this is set. 1430 /// 1431 static void * 1432 recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) 1433 { 1434 pANTLR3_PARSER parser; 1435 pANTLR3_TREE_PARSER tparser; 1436 pANTLR3_INT_STREAM is; 1437 void * matchedSymbol; 1438 1439 1440 1441 switch (recognizer->type) 1442 { 1443 case ANTLR3_TYPE_PARSER: 1444 1445 parser = (pANTLR3_PARSER) (recognizer->super); 1446 tparser = NULL; 1447 is = parser->tstream->istream; 1448 1449 break; 1450 1451 case ANTLR3_TYPE_TREE_PARSER: 1452 1453 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1454 parser = NULL; 1455 is = tparser->ctnstream->tnstream->istream; 1456 1457 break; 1458 1459 default: 1460 1461 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n"); 1462 return NULL; 1463 1464 break; 1465 } 1466 1467 // Create an exception if we need one 1468 // 1469 if (recognizer->state->exception == NULL) 1470 { 1471 antlr3RecognitionExceptionNew(recognizer); 1472 } 1473 1474 // If the next token after the one we are looking at in the input stream 1475 // is what we are looking for then we remove the one we have discovered 1476 // from the stream by consuming it, then consume this next one along too as 1477 // if nothing had happened. 1478 // 1479 if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE) 1480 { 1481 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; 1482 recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; 1483 1484 // Call resync hook (for debuggers and so on) 1485 // 1486 if (recognizer->debugger != NULL) 1487 { 1488 recognizer->debugger->beginResync(recognizer->debugger); 1489 } 1490 1491 // "delete" the extra token 1492 // 1493 recognizer->beginResync(recognizer); 1494 is->consume(is); 1495 recognizer->endResync(recognizer); 1496 // End resync hook 1497 // 1498 if (recognizer->debugger != NULL) 1499 { 1500 recognizer->debugger->endResync(recognizer->debugger); 1501 } 1502 1503 // Print out the error after we consume so that ANTLRWorks sees the 1504 // token in the exception. 1505 // 1506 recognizer->reportError(recognizer); 1507 1508 // Return the token we are actually matching 1509 // 1510 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); 1511 1512 // Consume the token that the rule actually expected to get as if everything 1513 // was hunky dory. 1514 // 1515 is->consume(is); 1516 1517 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more 1518 1519 return matchedSymbol; 1520 } 1521 1522 // Single token deletion (Unwanted above) did not work 1523 // so we see if we can insert a token instead by calculating which 1524 // token would be missing 1525 // 1526 if (mismatchIsMissingToken(recognizer, is, follow)) 1527 { 1528 // We can fake the missing token and proceed 1529 // 1530 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow); 1531 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; 1532 recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; 1533 recognizer->state->exception->token = matchedSymbol; 1534 recognizer->state->exception->expecting = ttype; 1535 1536 // Print out the error after we insert so that ANTLRWorks sees the 1537 // token in the exception. 1538 // 1539 recognizer->reportError(recognizer); 1540 1541 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more 1542 1543 return matchedSymbol; 1544 } 1545 1546 1547 // Neither deleting nor inserting tokens allows recovery 1548 // must just report the exception. 1549 // 1550 recognizer->state->error = ANTLR3_TRUE; 1551 return NULL; 1552 } 1553 1554 static void * 1555 recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow) 1556 { 1557 pANTLR3_PARSER parser; 1558 pANTLR3_TREE_PARSER tparser; 1559 pANTLR3_INT_STREAM is; 1560 pANTLR3_COMMON_TOKEN matchedSymbol; 1561 1562 switch (recognizer->type) 1563 { 1564 case ANTLR3_TYPE_PARSER: 1565 1566 parser = (pANTLR3_PARSER) (recognizer->super); 1567 tparser = NULL; 1568 is = parser->tstream->istream; 1569 1570 break; 1571 1572 case ANTLR3_TYPE_TREE_PARSER: 1573 1574 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1575 parser = NULL; 1576 is = tparser->ctnstream->tnstream->istream; 1577 1578 break; 1579 1580 default: 1581 1582 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n"); 1583 return NULL; 1584 1585 break; 1586 } 1587 1588 if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE) 1589 { 1590 // We can fake the missing token and proceed 1591 // 1592 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow); 1593 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; 1594 recognizer->state->exception->token = matchedSymbol; 1595 1596 // Print out the error after we insert so that ANTLRWorks sees the 1597 // token in the exception. 1598 // 1599 recognizer->reportError(recognizer); 1600 1601 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more 1602 1603 return matchedSymbol; 1604 } 1605 1606 // TODO - Single token deletion like in recoverFromMismatchedToken() 1607 // 1608 recognizer->state->error = ANTLR3_TRUE; 1609 recognizer->state->failed = ANTLR3_TRUE; 1610 return NULL; 1611 } 1612 1613 /// This code is factored out from mismatched token and mismatched set 1614 /// recovery. It handles "single token insertion" error recovery for 1615 /// both. No tokens are consumed to recover from insertions. Return 1616 /// true if recovery was possible else return false. 1617 /// 1618 static ANTLR3_BOOLEAN 1619 recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits) 1620 { 1621 pANTLR3_BITSET viableToksFollowingRule; 1622 pANTLR3_BITSET follow; 1623 pANTLR3_PARSER parser; 1624 pANTLR3_TREE_PARSER tparser; 1625 pANTLR3_INT_STREAM is; 1626 1627 switch (recognizer->type) 1628 { 1629 case ANTLR3_TYPE_PARSER: 1630 1631 parser = (pANTLR3_PARSER) (recognizer->super); 1632 tparser = NULL; 1633 is = parser->tstream->istream; 1634 1635 break; 1636 1637 case ANTLR3_TYPE_TREE_PARSER: 1638 1639 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1640 parser = NULL; 1641 is = tparser->ctnstream->tnstream->istream; 1642 1643 break; 1644 1645 default: 1646 1647 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n"); 1648 return ANTLR3_FALSE; 1649 1650 break; 1651 } 1652 1653 follow = antlr3BitsetLoad(followBits); 1654 1655 if (follow == NULL) 1656 { 1657 /* The follow set is NULL, which means we don't know what can come 1658 * next, so we "hit and hope" by just signifying that we cannot 1659 * recover, which will just cause the next token to be consumed, 1660 * which might dig us out. 1661 */ 1662 return ANTLR3_FALSE; 1663 } 1664 1665 /* We have a bitmap for the follow set, hence we can compute 1666 * what can follow this grammar element reference. 1667 */ 1668 if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE) 1669 { 1670 /* First we need to know which of the available tokens are viable 1671 * to follow this reference. 1672 */ 1673 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer); 1674 1675 /* Remove the EOR token, which we do not wish to compute with 1676 */ 1677 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE); 1678 viableToksFollowingRule->free(viableToksFollowingRule); 1679 /* We now have the computed set of what can follow the current token 1680 */ 1681 } 1682 1683 /* We can now see if the current token works with the set of tokens 1684 * that could follow the current grammar reference. If it looks like it 1685 * is consistent, then we can "insert" that token by not throwing 1686 * an exception and assuming that we saw it. 1687 */ 1688 if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE) 1689 { 1690 /* report the error, but don't cause any rules to abort and stuff 1691 */ 1692 recognizer->reportError(recognizer); 1693 if (follow != NULL) 1694 { 1695 follow->free(follow); 1696 } 1697 recognizer->state->error = ANTLR3_FALSE; 1698 recognizer->state->failed = ANTLR3_FALSE; 1699 return ANTLR3_TRUE; /* Success in recovery */ 1700 } 1701 1702 if (follow != NULL) 1703 { 1704 follow->free(follow); 1705 } 1706 1707 /* We could not find anything viable to do, so this is going to 1708 * cause an exception. 1709 */ 1710 return ANTLR3_FALSE; 1711 } 1712 1713 /// Eat tokens from the input stream until we get one of JUST the right type 1714 /// 1715 static void 1716 consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType) 1717 { 1718 ANTLR3_UINT32 ttype; 1719 pANTLR3_PARSER parser; 1720 pANTLR3_TREE_PARSER tparser; 1721 pANTLR3_INT_STREAM is; 1722 1723 switch (recognizer->type) 1724 { 1725 case ANTLR3_TYPE_PARSER: 1726 1727 parser = (pANTLR3_PARSER) (recognizer->super); 1728 tparser = NULL; 1729 is = parser->tstream->istream; 1730 1731 break; 1732 1733 case ANTLR3_TYPE_TREE_PARSER: 1734 1735 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1736 parser = NULL; 1737 is = tparser->ctnstream->tnstream->istream; 1738 1739 break; 1740 1741 default: 1742 1743 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n"); 1744 return; 1745 1746 break; 1747 } 1748 1749 // What do have at the moment? 1750 // 1751 ttype = is->_LA(is, 1); 1752 1753 // Start eating tokens until we get to the one we want. 1754 // 1755 while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType) 1756 { 1757 is->consume(is); 1758 ttype = is->_LA(is, 1); 1759 } 1760 } 1761 1762 /// Eat tokens from the input stream until we find one that 1763 /// belongs to the supplied set. 1764 /// 1765 static void 1766 consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set) 1767 { 1768 ANTLR3_UINT32 ttype; 1769 pANTLR3_PARSER parser; 1770 pANTLR3_TREE_PARSER tparser; 1771 pANTLR3_INT_STREAM is; 1772 1773 switch (recognizer->type) 1774 { 1775 case ANTLR3_TYPE_PARSER: 1776 1777 parser = (pANTLR3_PARSER) (recognizer->super); 1778 tparser = NULL; 1779 is = parser->tstream->istream; 1780 1781 break; 1782 1783 case ANTLR3_TYPE_TREE_PARSER: 1784 1785 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1786 parser = NULL; 1787 is = tparser->ctnstream->tnstream->istream; 1788 1789 break; 1790 1791 default: 1792 1793 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n"); 1794 return; 1795 1796 break; 1797 } 1798 1799 // What do have at the moment? 1800 // 1801 ttype = is->_LA(is, 1); 1802 1803 // Start eating tokens until we get to one we want. 1804 // 1805 while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE) 1806 { 1807 is->consume(is); 1808 ttype = is->_LA(is, 1); 1809 } 1810 } 1811 1812 /** Return the rule invocation stack (how we got here in the parse. 1813 * In the java version Ter just asks the JVM for all the information 1814 * but in C we don't get this information, so I am going to do nothing 1815 * right now. 1816 */ 1817 static pANTLR3_STACK 1818 getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer) 1819 { 1820 return NULL; 1821 } 1822 1823 static pANTLR3_STACK 1824 getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name) 1825 { 1826 return NULL; 1827 } 1828 1829 /** Convenience method for template rewrites - NYI. 1830 */ 1831 static pANTLR3_HASH_TABLE 1832 toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens) 1833 { 1834 return NULL; 1835 } 1836 1837 static void ANTLR3_CDECL 1838 freeIntTrie (void * trie) 1839 { 1840 ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie); 1841 } 1842 1843 1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied 1845 * start index before. If the rule has not parsed input starting from the supplied start index, 1846 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point 1847 * then it will return the point where it last stopped parsing after that start point. 1848 * 1849 * \remark 1850 * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance 1851 * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only 1852 * version of the table. 1853 */ 1854 static ANTLR3_MARKER 1855 getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart) 1856 { 1857 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. 1858 */ 1859 pANTLR3_INT_TRIE ruleList; 1860 ANTLR3_MARKER stopIndex; 1861 pANTLR3_TRIE_ENTRY entry; 1862 1863 /* See if we have a list in the ruleMemos for this rule, and if not, then create one 1864 * as we will need it eventually if we are being asked for the memo here. 1865 */ 1866 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex); 1867 1868 if (entry == NULL) 1869 { 1870 /* Did not find it, so create a new one for it, with a bit depth based on the 1871 * size of the input stream. We need the bit depth to incorporate the number if 1872 * bits required to represent the largest possible stop index in the input, which is the 1873 * last character. An int stream is free to return the largest 64 bit offset if it has 1874 * no idea of the size, but you should remember that this will cause the leftmost 1875 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-) 1876 */ 1877 ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */ 1878 1879 if (ruleList != NULL) 1880 { 1881 recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie); 1882 } 1883 1884 /* We cannot have a stopIndex in a trie we have just created of course 1885 */ 1886 return MEMO_RULE_UNKNOWN; 1887 } 1888 1889 ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr); 1890 1891 /* See if there is a stop index associated with the supplied start index. 1892 */ 1893 stopIndex = 0; 1894 1895 entry = ruleList->get(ruleList, ruleParseStart); 1896 if (entry != NULL) 1897 { 1898 stopIndex = (ANTLR3_MARKER)(entry->data.intVal); 1899 } 1900 1901 if (stopIndex == 0) 1902 { 1903 return MEMO_RULE_UNKNOWN; 1904 } 1905 1906 return stopIndex; 1907 } 1908 1909 /** Has this rule already parsed input at the current index in the 1910 * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE 1911 * if we have not. 1912 * 1913 * This method has a side-effect: if we have seen this input for 1914 * this rule and successfully parsed before, then seek ahead to 1915 * 1 past the stop token matched for this rule last time. 1916 */ 1917 static ANTLR3_BOOLEAN 1918 alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex) 1919 { 1920 ANTLR3_MARKER stopIndex; 1921 pANTLR3_LEXER lexer; 1922 pANTLR3_PARSER parser; 1923 pANTLR3_TREE_PARSER tparser; 1924 pANTLR3_INT_STREAM is; 1925 1926 switch (recognizer->type) 1927 { 1928 case ANTLR3_TYPE_PARSER: 1929 1930 parser = (pANTLR3_PARSER) (recognizer->super); 1931 tparser = NULL; 1932 lexer = NULL; 1933 is = parser->tstream->istream; 1934 1935 break; 1936 1937 case ANTLR3_TYPE_TREE_PARSER: 1938 1939 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 1940 parser = NULL; 1941 lexer = NULL; 1942 is = tparser->ctnstream->tnstream->istream; 1943 1944 break; 1945 1946 case ANTLR3_TYPE_LEXER: 1947 1948 lexer = (pANTLR3_LEXER) (recognizer->super); 1949 parser = NULL; 1950 tparser = NULL; 1951 is = lexer->input->istream; 1952 break; 1953 1954 default: 1955 1956 ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n"); 1957 return ANTLR3_FALSE; 1958 1959 break; 1960 } 1961 1962 /* See if we have a memo marker for this. 1963 */ 1964 stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is)); 1965 1966 if (stopIndex == MEMO_RULE_UNKNOWN) 1967 { 1968 return ANTLR3_FALSE; 1969 } 1970 1971 if (stopIndex == MEMO_RULE_FAILED) 1972 { 1973 recognizer->state->failed = ANTLR3_TRUE; 1974 } 1975 else 1976 { 1977 is->seek(is, stopIndex+1); 1978 } 1979 1980 /* If here then the rule was executed for this input already 1981 */ 1982 return ANTLR3_TRUE; 1983 } 1984 1985 /** Record whether or not this rule parsed the input at this position 1986 * successfully. 1987 */ 1988 static void 1989 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart) 1990 { 1991 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. 1992 */ 1993 pANTLR3_INT_TRIE ruleList; 1994 pANTLR3_TRIE_ENTRY entry; 1995 ANTLR3_MARKER stopIndex; 1996 pANTLR3_LEXER lexer; 1997 pANTLR3_PARSER parser; 1998 pANTLR3_TREE_PARSER tparser; 1999 pANTLR3_INT_STREAM is; 2000 2001 switch (recognizer->type) 2002 { 2003 case ANTLR3_TYPE_PARSER: 2004 2005 parser = (pANTLR3_PARSER) (recognizer->super); 2006 tparser = NULL; 2007 is = parser->tstream->istream; 2008 2009 break; 2010 2011 case ANTLR3_TYPE_TREE_PARSER: 2012 2013 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 2014 parser = NULL; 2015 is = tparser->ctnstream->tnstream->istream; 2016 2017 break; 2018 2019 case ANTLR3_TYPE_LEXER: 2020 2021 lexer = (pANTLR3_LEXER) (recognizer->super); 2022 parser = NULL; 2023 tparser = NULL; 2024 is = lexer->input->istream; 2025 break; 2026 2027 default: 2028 2029 ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n"); 2030 return; 2031 2032 break; 2033 } 2034 2035 stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1; 2036 2037 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex); 2038 2039 if (entry != NULL) 2040 { 2041 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr); 2042 2043 /* If we don't already have this entry, append it. The memoize trie does not 2044 * accept duplicates so it won't add it if already there and we just ignore the 2045 * return code as we don't care if it is there already. 2046 */ 2047 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL); 2048 } 2049 } 2050 /** A syntactic predicate. Returns true/false depending on whether 2051 * the specified grammar fragment matches the current input stream. 2052 * This resets the failed instance var afterwards. 2053 */ 2054 static ANTLR3_BOOLEAN 2055 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx)) 2056 { 2057 ANTLR3_MARKER start; 2058 pANTLR3_PARSER parser; 2059 pANTLR3_TREE_PARSER tparser; 2060 pANTLR3_INT_STREAM is; 2061 2062 switch (recognizer->type) 2063 { 2064 case ANTLR3_TYPE_PARSER: 2065 2066 parser = (pANTLR3_PARSER) (recognizer->super); 2067 tparser = NULL; 2068 is = parser->tstream->istream; 2069 2070 break; 2071 2072 case ANTLR3_TYPE_TREE_PARSER: 2073 2074 tparser = (pANTLR3_TREE_PARSER) (recognizer->super); 2075 parser = NULL; 2076 is = tparser->ctnstream->tnstream->istream; 2077 2078 break; 2079 2080 default: 2081 2082 ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n"); 2083 return ANTLR3_FALSE; 2084 2085 break; 2086 } 2087 2088 /* Begin backtracking so we can get back to where we started after trying out 2089 * the syntactic predicate. 2090 */ 2091 start = is->mark(is); 2092 recognizer->state->backtracking++; 2093 2094 /* Try the syntactical predicate 2095 */ 2096 predicate(ctx); 2097 2098 /* Reset 2099 */ 2100 is->rewind(is, start); 2101 recognizer->state->backtracking--; 2102 2103 if (recognizer->state->failed == ANTLR3_TRUE) 2104 { 2105 /* Predicate failed 2106 */ 2107 recognizer->state->failed = ANTLR3_FALSE; 2108 return ANTLR3_FALSE; 2109 } 2110 else 2111 { 2112 /* Predicate was successful 2113 */ 2114 recognizer->state->failed = ANTLR3_FALSE; 2115 return ANTLR3_TRUE; 2116 } 2117 } 2118 2119 static void 2120 reset(pANTLR3_BASE_RECOGNIZER recognizer) 2121 { 2122 if (recognizer->state->following != NULL) 2123 { 2124 recognizer->state->following->free(recognizer->state->following); 2125 } 2126 2127 // Reset the state flags 2128 // 2129 recognizer->state->errorRecovery = ANTLR3_FALSE; 2130 recognizer->state->lastErrorIndex = -1; 2131 recognizer->state->failed = ANTLR3_FALSE; 2132 recognizer->state->errorCount = 0; 2133 recognizer->state->backtracking = 0; 2134 recognizer->state->following = NULL; 2135 2136 if (recognizer->state != NULL) 2137 { 2138 if (recognizer->state->ruleMemo != NULL) 2139 { 2140 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo); 2141 recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */ 2142 } 2143 } 2144 2145 2146 // Install a new following set 2147 // 2148 recognizer->state->following = antlr3StackNew(8); 2149 2150 } 2151 2152 // Default implementation is for parser and assumes a token stream as supplied by the runtime. 2153 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using. 2154 // 2155 static void * 2156 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) 2157 { 2158 return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1); 2159 } 2160 2161 // Default implementation is for parser and assumes a token stream as supplied by the runtime. 2162 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using. 2163 // 2164 static void * 2165 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 2166 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) 2167 { 2168 pANTLR3_TOKEN_STREAM ts; 2169 pANTLR3_COMMON_TOKEN_STREAM cts; 2170 pANTLR3_COMMON_TOKEN token; 2171 pANTLR3_COMMON_TOKEN current; 2172 pANTLR3_STRING text; 2173 2174 // Dereference the standard pointers 2175 // 2176 ts = (pANTLR3_TOKEN_STREAM)istream->super; 2177 cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super; 2178 2179 // Work out what to use as the current symbol to make a line and offset etc 2180 // If we are at EOF, we use the token before EOF 2181 // 2182 current = ts->_LT(ts, 1); 2183 if (current->getType(current) == ANTLR3_TOKEN_EOF) 2184 { 2185 current = ts->_LT(ts, -1); 2186 } 2187 2188 // Create a new empty token 2189 // 2190 if (recognizer->state->tokFactory == NULL) 2191 { 2192 // We don't yet have a token factory for making tokens 2193 // we just need a fake one using the input stream of the current 2194 // token. 2195 // 2196 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input); 2197 } 2198 token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory); 2199 2200 // Set some of the token properties based on the current token 2201 // 2202 token->setLine (token, current->getLine(current)); 2203 token->setCharPositionInLine (token, current->getCharPositionInLine(current)); 2204 token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL); 2205 token->setType (token, expectedTokenType); 2206 token->user1 = current->user1; 2207 token->user2 = current->user2; 2208 token->user3 = current->user3; 2209 token->custom = current->custom; 2210 token->lineStart = current->lineStart; 2211 2212 // Create the token text that shows it has been inserted 2213 // 2214 token->setText8(token, (pANTLR3_UINT8)"<missing "); 2215 text = token->getText(token); 2216 2217 if (text != NULL) 2218 { 2219 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]); 2220 text->append8(text, (const char *)">"); 2221 } 2222 2223 // Finally return the pointer to our new token 2224 // 2225 return token; 2226 } 2227 2228 2229 #ifdef ANTLR3_WINDOWS 2230 #pragma warning( default : 4100 ) 2231 #endif 2232 2233 /// @} 2234 /// 2235 2236