1 /** \file 2 * 3 * Base implementation of an antlr 3 lexer. 4 * 5 * An ANTLR3 lexer implements a base recongizer, a token source and 6 * a lexer interface. It constructs a base recognizer with default 7 * functions, then overrides any of these that are parser specific (usual 8 * default implementation of base recognizer. 9 */ 10 11 // [The "BSD licence"] 12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 13 // http://www.temporal-wave.com 14 // http://www.linkedin.com/in/jimidle 15 // 16 // All rights reserved. 17 // 18 // Redistribution and use in source and binary forms, with or without 19 // modification, are permitted provided that the following conditions 20 // are met: 21 // 1. Redistributions of source code must retain the above copyright 22 // notice, this list of conditions and the following disclaimer. 23 // 2. Redistributions in binary form must reproduce the above copyright 24 // notice, this list of conditions and the following disclaimer in the 25 // documentation and/or other materials provided with the distribution. 26 // 3. The name of the author may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 40 #include <antlr3lexer.h> 41 42 static void mTokens (pANTLR3_LEXER lexer); 43 static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); 44 static void pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); 45 static void popCharStream (pANTLR3_LEXER lexer); 46 47 static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token); 48 static pANTLR3_COMMON_TOKEN emit (pANTLR3_LEXER lexer); 49 static ANTLR3_BOOLEAN matchs (pANTLR3_LEXER lexer, ANTLR3_UCHAR * string); 50 static ANTLR3_BOOLEAN matchc (pANTLR3_LEXER lexer, ANTLR3_UCHAR c); 51 static ANTLR3_BOOLEAN matchRange (pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high); 52 static void matchAny (pANTLR3_LEXER lexer); 53 static void recover (pANTLR3_LEXER lexer); 54 static ANTLR3_UINT32 getLine (pANTLR3_LEXER lexer); 55 static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer); 56 static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_LEXER lexer); 57 static pANTLR3_STRING getText (pANTLR3_LEXER lexer); 58 static pANTLR3_COMMON_TOKEN nextToken (pANTLR3_TOKEN_SOURCE toksource); 59 60 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames); 61 static void reportError (pANTLR3_BASE_RECOGNIZER rec); 62 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); 63 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 64 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); 65 66 static void reset (pANTLR3_BASE_RECOGNIZER rec); 67 68 static void freeLexer (pANTLR3_LEXER lexer); 69 70 71 ANTLR3_API pANTLR3_LEXER 72 antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) 73 { 74 pANTLR3_LEXER lexer; 75 pANTLR3_COMMON_TOKEN specialT; 76 77 /* Allocate memory 78 */ 79 lexer = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER)); 80 81 if (lexer == NULL) 82 { 83 return NULL; 84 } 85 86 /* Now we need to create the base recognizer 87 */ 88 lexer->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state); 89 90 if (lexer->rec == NULL) 91 { 92 lexer->free(lexer); 93 return NULL; 94 } 95 lexer->rec->super = lexer; 96 97 lexer->rec->displayRecognitionError = displayRecognitionError; 98 lexer->rec->reportError = reportError; 99 lexer->rec->reset = reset; 100 lexer->rec->getCurrentInputSymbol = getCurrentInputSymbol; 101 lexer->rec->getMissingSymbol = getMissingSymbol; 102 103 /* Now install the token source interface 104 */ 105 if (lexer->rec->state->tokSource == NULL) 106 { 107 lexer->rec->state->tokSource = (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE)); 108 109 if (lexer->rec->state->tokSource == NULL) 110 { 111 lexer->rec->free(lexer->rec); 112 lexer->free(lexer); 113 114 return NULL; 115 } 116 lexer->rec->state->tokSource->super = lexer; 117 118 /* Install the default nextToken() method, which may be overridden 119 * by generated code, or by anything else in fact. 120 */ 121 lexer->rec->state->tokSource->nextToken = nextToken; 122 lexer->rec->state->tokSource->strFactory = NULL; 123 124 lexer->rec->state->tokFactory = NULL; 125 } 126 127 /* Install the lexer API 128 */ 129 lexer->setCharStream = setCharStream; 130 lexer->mTokens = (void (*)(void *))(mTokens); 131 lexer->setCharStream = setCharStream; 132 lexer->pushCharStream = pushCharStream; 133 lexer->popCharStream = popCharStream; 134 lexer->emit = emit; 135 lexer->emitNew = emitNew; 136 lexer->matchs = matchs; 137 lexer->matchc = matchc; 138 lexer->matchRange = matchRange; 139 lexer->matchAny = matchAny; 140 lexer->recover = recover; 141 lexer->getLine = getLine; 142 lexer->getCharIndex = getCharIndex; 143 lexer->getCharPositionInLine = getCharPositionInLine; 144 lexer->getText = getText; 145 lexer->free = freeLexer; 146 147 /* Initialise the eof token 148 */ 149 specialT = &(lexer->rec->state->tokSource->eofToken); 150 antlr3SetTokenAPI (specialT); 151 specialT->setType (specialT, ANTLR3_TOKEN_EOF); 152 specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it 153 specialT->strFactory = NULL; 154 specialT->textState = ANTLR3_TEXT_NONE; 155 specialT->custom = NULL; 156 specialT->user1 = 0; 157 specialT->user2 = 0; 158 specialT->user3 = 0; 159 160 // Initialize the skip token. 161 // 162 specialT = &(lexer->rec->state->tokSource->skipToken); 163 antlr3SetTokenAPI (specialT); 164 specialT->setType (specialT, ANTLR3_TOKEN_INVALID); 165 specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it 166 specialT->strFactory = NULL; 167 specialT->custom = NULL; 168 specialT->user1 = 0; 169 specialT->user2 = 0; 170 specialT->user3 = 0; 171 return lexer; 172 } 173 174 static void 175 reset (pANTLR3_BASE_RECOGNIZER rec) 176 { 177 pANTLR3_LEXER lexer; 178 179 lexer = rec->super; 180 181 lexer->rec->state->token = NULL; 182 lexer->rec->state->type = ANTLR3_TOKEN_INVALID; 183 lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; 184 lexer->rec->state->tokenStartCharIndex = -1; 185 lexer->rec->state->tokenStartCharPositionInLine = -1; 186 lexer->rec->state->tokenStartLine = -1; 187 188 lexer->rec->state->text = NULL; 189 190 // OK - that's all hunky dory, but we may well have had 191 // a token factory that needs a reset. Do that here 192 // 193 if (lexer->rec->state->tokFactory != NULL) 194 { 195 lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory); 196 } 197 } 198 199 /// 200 /// \brief 201 /// Returns the next available token from the current input stream. 202 /// 203 /// \param toksource 204 /// Points to the implementation of a token source. The lexer is 205 /// addressed by the super structure pointer. 206 /// 207 /// \returns 208 /// The next token in the current input stream or the EOF token 209 /// if there are no more tokens. 210 /// 211 /// \remarks 212 /// Write remarks for nextToken here. 213 /// 214 /// \see nextToken 215 /// 216 ANTLR3_INLINE static pANTLR3_COMMON_TOKEN 217 nextTokenStr (pANTLR3_TOKEN_SOURCE toksource) 218 { 219 pANTLR3_LEXER lexer; 220 pANTLR3_RECOGNIZER_SHARED_STATE state; 221 pANTLR3_INPUT_STREAM input; 222 pANTLR3_INT_STREAM istream; 223 224 lexer = (pANTLR3_LEXER)(toksource->super); 225 state = lexer->rec->state; 226 input = lexer->input; 227 istream = input->istream; 228 229 /// Loop until we get a non skipped token or EOF 230 /// 231 for (;;) 232 { 233 // Get rid of any previous token (token factory takes care of 234 // any de-allocation when this token is finally used up. 235 // 236 state->token = NULL; 237 state->error = ANTLR3_FALSE; // Start out without an exception 238 state->failed = ANTLR3_FALSE; 239 240 // Now call the matching rules and see if we can generate a new token 241 // 242 for (;;) 243 { 244 // Record the start of the token in our input stream. 245 // 246 state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; 247 state->tokenStartCharIndex = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar)); 248 state->tokenStartCharPositionInLine = input->charPositionInLine; 249 state->tokenStartLine = input->line; 250 state->text = NULL; 251 state->custom = NULL; 252 state->user1 = 0; 253 state->user2 = 0; 254 state->user3 = 0; 255 256 if (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF) 257 { 258 // Reached the end of the current stream, nothing more to do if this is 259 // the last in the stack. 260 // 261 pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); 262 263 teof->setStartIndex (teof, lexer->getCharIndex(lexer)); 264 teof->setStopIndex (teof, lexer->getCharIndex(lexer)); 265 teof->setLine (teof, lexer->getLine(lexer)); 266 teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it 267 return teof; 268 } 269 270 state->token = NULL; 271 state->error = ANTLR3_FALSE; // Start out without an exception 272 state->failed = ANTLR3_FALSE; 273 274 // Call the generated lexer, see if it can get a new token together. 275 // 276 lexer->mTokens(lexer->ctx); 277 278 if (state->error == ANTLR3_TRUE) 279 { 280 // Recognition exception, report it and try to recover. 281 // 282 state->failed = ANTLR3_TRUE; 283 lexer->rec->reportError(lexer->rec); 284 lexer->recover(lexer); 285 } 286 else 287 { 288 if (state->token == NULL) 289 { 290 // Emit the real token, which adds it in to the token stream basically 291 // 292 emit(lexer); 293 } 294 else if (state->token == &(toksource->skipToken)) 295 { 296 // A real token could have been generated, but "Computer say's naaaaah" and it 297 // it is just something we need to skip altogether. 298 // 299 continue; 300 } 301 302 // Good token, not skipped, not EOF token 303 // 304 return state->token; 305 } 306 } 307 } 308 } 309 310 /** 311 * \brief 312 * Default implementation of the nextToken() call for a lexer. 313 * 314 * \param toksource 315 * Points to the implementation of a token source. The lexer is 316 * addressed by the super structure pointer. 317 * 318 * \returns 319 * The next token in the current input stream or the EOF token 320 * if there are no more tokens in any input stream in the stack. 321 * 322 * Write detailed description for nextToken here. 323 * 324 * \remarks 325 * Write remarks for nextToken here. 326 * 327 * \see nextTokenStr 328 */ 329 static pANTLR3_COMMON_TOKEN 330 nextToken (pANTLR3_TOKEN_SOURCE toksource) 331 { 332 pANTLR3_COMMON_TOKEN tok; 333 334 // Find the next token in the current stream 335 // 336 tok = nextTokenStr(toksource); 337 338 // If we got to the EOF token then switch to the previous 339 // input stream if there were any and just return the 340 // EOF if there are none. We must check the next token 341 // in any outstanding input stream we pop into the active 342 // role to see if it was sitting at EOF after PUSHing the 343 // stream we just consumed, otherwise we will return EOF 344 // on the reinstalled input stream, when in actual fact 345 // there might be more input streams to POP before the 346 // real EOF of the whole logical inptu stream. Hence we 347 // use a while loop here until we find somethign in the stream 348 // that isn't EOF or we reach the actual end of the last input 349 // stream on the stack. 350 // 351 while (tok->type == ANTLR3_TOKEN_EOF) 352 { 353 pANTLR3_LEXER lexer; 354 355 lexer = (pANTLR3_LEXER)(toksource->super); 356 357 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) 358 { 359 // We have another input stream in the stack so we 360 // need to revert to it, then resume the loop to check 361 // it wasn't sitting at EOF itself. 362 // 363 lexer->popCharStream(lexer); 364 tok = nextTokenStr(toksource); 365 } 366 else 367 { 368 // There were no more streams on the input stack 369 // so this EOF is the 'real' logical EOF for 370 // the input stream. So we just exit the loop and 371 // return the EOF we have found. 372 // 373 break; 374 } 375 376 } 377 378 // return whatever token we have, which may be EOF 379 // 380 return tok; 381 } 382 383 ANTLR3_API pANTLR3_LEXER 384 antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state) 385 { 386 pANTLR3_LEXER lexer; 387 388 // Create a basic lexer first 389 // 390 lexer = antlr3LexerNew(sizeHint, state); 391 392 if (lexer != NULL) 393 { 394 // Install the input stream and reset the lexer 395 // 396 setCharStream(lexer, input); 397 } 398 399 return lexer; 400 } 401 402 static void mTokens (pANTLR3_LEXER lexer) 403 { 404 if (lexer) // Fool compiler, avoid pragmas 405 { 406 ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n"); 407 } 408 } 409 410 static void 411 reportError (pANTLR3_BASE_RECOGNIZER rec) 412 { 413 // Indicate this recognizer had an error while processing. 414 // 415 rec->state->errorCount++; 416 417 rec->displayRecognitionError(rec, rec->state->tokenNames); 418 } 419 420 #ifdef ANTLR3_WINDOWS 421 #pragma warning( disable : 4100 ) 422 #endif 423 424 /** Default lexer error handler (works for 8 bit streams only!!!) 425 */ 426 static void 427 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) 428 { 429 pANTLR3_LEXER lexer; 430 pANTLR3_EXCEPTION ex; 431 pANTLR3_STRING ftext; 432 433 lexer = (pANTLR3_LEXER)(recognizer->super); 434 ex = lexer->rec->state->exception; 435 436 // See if there is a 'filename' we can use 437 // 438 if (ex->name == NULL) 439 { 440 ANTLR3_FPRINTF(stderr, "-unknown source-("); 441 } 442 else 443 { 444 ftext = ex->streamName->to8(ex->streamName); 445 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); 446 } 447 448 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); 449 ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ", 450 ex->type, 451 (pANTLR3_UINT8) (ex->message), 452 ex->charPositionInLine+1 453 ); 454 { 455 ANTLR3_INT32 width; 456 457 width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index)); 458 459 if (width >= 1) 460 { 461 if (isprint(ex->c)) 462 { 463 ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c); 464 } 465 else 466 { 467 ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c)); 468 } 469 ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index)); 470 } 471 else 472 { 473 ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"); 474 ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ", 475 (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine), 476 (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) 477 ); 478 width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); 479 480 if (width >= 1) 481 { 482 ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); 483 } 484 else 485 { 486 ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n"); 487 } 488 } 489 } 490 } 491 492 static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) 493 { 494 /* Install the input interface 495 */ 496 lexer->input = input; 497 498 /* We may need a token factory for the lexer; we don't destroy any existing factory 499 * until the lexer is destroyed, as people may still be using the tokens it produced. 500 * TODO: Later I will provide a dup() method for a token so that it can extract itself 501 * out of the factory. 502 */ 503 if (lexer->rec->state->tokFactory == NULL) 504 { 505 lexer->rec->state->tokFactory = antlr3TokenFactoryNew(input); 506 } 507 else 508 { 509 /* When the input stream is being changed on the fly, rather than 510 * at the start of a new lexer, then we must tell the tokenFactory 511 * which input stream to adorn the tokens with so that when they 512 * are asked to provide their original input strings they can 513 * do so from the correct text stream. 514 */ 515 lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input); 516 } 517 518 /* Propagate the string factory so that we preserve the encoding form from 519 * the input stream. 520 */ 521 if (lexer->rec->state->tokSource->strFactory == NULL) 522 { 523 lexer->rec->state->tokSource->strFactory = input->strFactory; 524 525 // Set the newly acquired string factory up for our pre-made tokens 526 // for EOF. 527 // 528 if (lexer->rec->state->tokSource->eofToken.strFactory == NULL) 529 { 530 lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory; 531 } 532 } 533 534 /* This is a lexer, install the appropriate exception creator 535 */ 536 lexer->rec->exConstruct = antlr3RecognitionExceptionNew; 537 538 /* Set the current token to nothing 539 */ 540 lexer->rec->state->token = NULL; 541 lexer->rec->state->text = NULL; 542 lexer->rec->state->tokenStartCharIndex = -1; 543 544 /* Copy the name of the char stream to the token source 545 */ 546 lexer->rec->state->tokSource->fileName = input->fileName; 547 } 548 549 /*! 550 * \brief 551 * Change to a new input stream, remembering the old one. 552 * 553 * \param lexer 554 * Pointer to the lexer instance to switch input streams for. 555 * 556 * \param input 557 * New input stream to install as the current one. 558 * 559 * Switches the current character input stream to 560 * a new one, saving the old one, which we will revert to at the end of this 561 * new one. 562 */ 563 static void 564 pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) 565 { 566 // Do we need a new input stream stack? 567 // 568 if (lexer->rec->state->streams == NULL) 569 { 570 // This is the first call to stack a new 571 // stream and so we must create the stack first. 572 // 573 lexer->rec->state->streams = antlr3StackNew(0); 574 575 if (lexer->rec->state->streams == NULL) 576 { 577 // Could not do this, we just fail to push it. 578 // TODO: Consider if this is what we want to do, but then 579 // any programmer can override this method to do something else. 580 return; 581 } 582 } 583 584 // We have a stack, so we can save the current input stream 585 // into it. 586 // 587 lexer->input->istream->mark(lexer->input->istream); 588 lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL); 589 590 // And now we can install this new one 591 // 592 lexer->setCharStream(lexer, input); 593 } 594 595 /*! 596 * \brief 597 * Stops using the current input stream and reverts to any prior 598 * input stream on the stack. 599 * 600 * \param lexer 601 * Description of parameter lexer. 602 * 603 * Pointer to a function that abandons the current input stream, whether it 604 * is empty or not and reverts to the previous stacked input stream. 605 * 606 * \remark 607 * The function fails silently if there are no prior input streams. 608 */ 609 static void 610 popCharStream (pANTLR3_LEXER lexer) 611 { 612 pANTLR3_INPUT_STREAM input; 613 614 // If we do not have a stream stack or we are already at the 615 // stack bottom, then do nothing. 616 // 617 if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) 618 { 619 // We just leave the current stream to its fate, we do not close 620 // it or anything as we do not know what the programmer intended 621 // for it. This method can always be overridden of course. 622 // So just find out what was currently saved on the stack and use 623 // that now, then pop it from the stack. 624 // 625 input = (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top); 626 lexer->rec->state->streams->pop(lexer->rec->state->streams); 627 628 // Now install the stream as the current one. 629 // 630 lexer->setCharStream(lexer, input); 631 lexer->input->istream->rewindLast(lexer->input->istream); 632 } 633 return; 634 } 635 636 static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token) 637 { 638 lexer->rec->state->token = token; /* Voila! */ 639 } 640 641 static pANTLR3_COMMON_TOKEN 642 emit (pANTLR3_LEXER lexer) 643 { 644 pANTLR3_COMMON_TOKEN token; 645 646 /* We could check pointers to token factories and so on, but 647 * we are in code that we want to run as fast as possible 648 * so we are not checking any errors. So make sure you have installed an input stream before 649 * trying to emit a new token. 650 */ 651 token = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory); 652 653 /* Install the supplied information, and some other bits we already know 654 * get added automatically, such as the input stream it is associated with 655 * (though it can all be overridden of course) 656 */ 657 token->type = lexer->rec->state->type; 658 token->channel = lexer->rec->state->channel; 659 token->start = lexer->rec->state->tokenStartCharIndex; 660 token->stop = lexer->getCharIndex(lexer) - 1; 661 token->line = lexer->rec->state->tokenStartLine; 662 token->charPosition = lexer->rec->state->tokenStartCharPositionInLine; 663 664 if (lexer->rec->state->text != NULL) 665 { 666 token->textState = ANTLR3_TEXT_STRING; 667 token->tokText.text = lexer->rec->state->text; 668 } 669 else 670 { 671 token->textState = ANTLR3_TEXT_NONE; 672 } 673 token->lineStart = lexer->input->currentLine; 674 token->user1 = lexer->rec->state->user1; 675 token->user2 = lexer->rec->state->user2; 676 token->user3 = lexer->rec->state->user3; 677 token->custom = lexer->rec->state->custom; 678 679 lexer->rec->state->token = token; 680 681 return token; 682 } 683 684 /** 685 * Free the resources allocated by a lexer 686 */ 687 static void 688 freeLexer (pANTLR3_LEXER lexer) 689 { 690 // This may have ben a delegate or delegator lexer, in which case the 691 // state may already have been freed (and set to NULL therefore) 692 // so we ignore the state if we don't have it. 693 // 694 if (lexer->rec->state != NULL) 695 { 696 if (lexer->rec->state->streams != NULL) 697 { 698 lexer->rec->state->streams->free(lexer->rec->state->streams); 699 } 700 if (lexer->rec->state->tokFactory != NULL) 701 { 702 lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory); 703 lexer->rec->state->tokFactory = NULL; 704 } 705 if (lexer->rec->state->tokSource != NULL) 706 { 707 ANTLR3_FREE(lexer->rec->state->tokSource); 708 lexer->rec->state->tokSource = NULL; 709 } 710 } 711 if (lexer->rec != NULL) 712 { 713 lexer->rec->free(lexer->rec); 714 lexer->rec = NULL; 715 } 716 ANTLR3_FREE(lexer); 717 } 718 719 /** Implementation of matchs for the lexer, overrides any 720 * base implementation in the base recognizer. 721 * 722 * \remark 723 * Note that the generated code lays down arrays of ints for constant 724 * strings so that they are int UTF32 form! 725 */ 726 static ANTLR3_BOOLEAN 727 matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string) 728 { 729 while (*string != ANTLR3_STRING_TERMINATOR) 730 { 731 if (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string)) 732 { 733 if (lexer->rec->state->backtracking > 0) 734 { 735 lexer->rec->state->failed = ANTLR3_TRUE; 736 return ANTLR3_FALSE; 737 } 738 739 lexer->rec->exConstruct(lexer->rec); 740 lexer->rec->state->failed = ANTLR3_TRUE; 741 742 /* TODO: Implement exception creation more fully perhaps 743 */ 744 lexer->recover(lexer); 745 return ANTLR3_FALSE; 746 } 747 748 /* Matched correctly, do consume it 749 */ 750 lexer->input->istream->consume(lexer->input->istream); 751 string++; 752 753 /* Reset any failed indicator 754 */ 755 lexer->rec->state->failed = ANTLR3_FALSE; 756 } 757 758 759 return ANTLR3_TRUE; 760 } 761 762 /** Implementation of matchc for the lexer, overrides any 763 * base implementation in the base recognizer. 764 * 765 * \remark 766 * Note that the generated code lays down arrays of ints for constant 767 * strings so that they are int UTF32 form! 768 */ 769 static ANTLR3_BOOLEAN 770 matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c) 771 { 772 if (lexer->input->istream->_LA(lexer->input->istream, 1) == c) 773 { 774 /* Matched correctly, do consume it 775 */ 776 lexer->input->istream->consume(lexer->input->istream); 777 778 /* Reset any failed indicator 779 */ 780 lexer->rec->state->failed = ANTLR3_FALSE; 781 782 return ANTLR3_TRUE; 783 } 784 785 /* Failed to match, exception and recovery time. 786 */ 787 if (lexer->rec->state->backtracking > 0) 788 { 789 lexer->rec->state->failed = ANTLR3_TRUE; 790 return ANTLR3_FALSE; 791 } 792 793 lexer->rec->exConstruct(lexer->rec); 794 795 /* TODO: Implement exception creation more fully perhaps 796 */ 797 lexer->recover(lexer); 798 799 return ANTLR3_FALSE; 800 } 801 802 /** Implementation of match range for the lexer, overrides any 803 * base implementation in the base recognizer. 804 * 805 * \remark 806 * Note that the generated code lays down arrays of ints for constant 807 * strings so that they are int UTF32 form! 808 */ 809 static ANTLR3_BOOLEAN 810 matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high) 811 { 812 ANTLR3_UCHAR c; 813 814 /* What is in the stream at the moment? 815 */ 816 c = lexer->input->istream->_LA(lexer->input->istream, 1); 817 if ( c >= low && c <= high) 818 { 819 /* Matched correctly, consume it 820 */ 821 lexer->input->istream->consume(lexer->input->istream); 822 823 /* Reset any failed indicator 824 */ 825 lexer->rec->state->failed = ANTLR3_FALSE; 826 827 return ANTLR3_TRUE; 828 } 829 830 /* Failed to match, execption and recovery time. 831 */ 832 833 if (lexer->rec->state->backtracking > 0) 834 { 835 lexer->rec->state->failed = ANTLR3_TRUE; 836 return ANTLR3_FALSE; 837 } 838 839 lexer->rec->exConstruct(lexer->rec); 840 841 /* TODO: Implement exception creation more fully 842 */ 843 lexer->recover(lexer); 844 845 return ANTLR3_FALSE; 846 } 847 848 static void 849 matchAny (pANTLR3_LEXER lexer) 850 { 851 lexer->input->istream->consume(lexer->input->istream); 852 } 853 854 static void 855 recover (pANTLR3_LEXER lexer) 856 { 857 lexer->input->istream->consume(lexer->input->istream); 858 } 859 860 static ANTLR3_UINT32 861 getLine (pANTLR3_LEXER lexer) 862 { 863 return lexer->input->getLine(lexer->input); 864 } 865 866 static ANTLR3_UINT32 867 getCharPositionInLine (pANTLR3_LEXER lexer) 868 { 869 return lexer->input->charPositionInLine; 870 } 871 872 static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer) 873 { 874 return lexer->input->istream->index(lexer->input->istream); 875 } 876 877 static pANTLR3_STRING 878 getText (pANTLR3_LEXER lexer) 879 { 880 if (lexer->rec->state->text) 881 { 882 return lexer->rec->state->text; 883 884 } 885 return lexer->input->substr( 886 lexer->input, 887 lexer->rec->state->tokenStartCharIndex, 888 lexer->getCharIndex(lexer) - lexer->input->charByteSize 889 ); 890 891 } 892 893 static void * 894 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) 895 { 896 return NULL; 897 } 898 899 static void * 900 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 901 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) 902 { 903 return NULL; 904 } 905