1 /* 2 * Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org) 3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. 4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich (at) uwaterloo.ca) 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #include "config.h" 24 #include "Lexer.h" 25 26 #include "JSFunction.h" 27 #include "JSGlobalObjectFunctions.h" 28 #include "NodeInfo.h" 29 #include "Nodes.h" 30 #include "dtoa.h" 31 #include <ctype.h> 32 #include <limits.h> 33 #include <string.h> 34 #include <wtf/Assertions.h> 35 36 using namespace WTF; 37 using namespace Unicode; 38 39 // We can't specify the namespace in yacc's C output, so do it here instead. 40 using namespace JSC; 41 42 #include "Grammar.h" 43 #include "Lookup.h" 44 #include "Lexer.lut.h" 45 46 namespace JSC { 47 48 static const UChar byteOrderMark = 0xFEFF; 49 50 Lexer::Lexer(JSGlobalData* globalData) 51 : m_isReparsing(false) 52 , m_globalData(globalData) 53 , m_keywordTable(JSC::mainTable) 54 { 55 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); 56 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity); 57 } 58 59 Lexer::~Lexer() 60 { 61 m_keywordTable.deleteTable(); 62 } 63 64 inline const UChar* Lexer::currentCharacter() const 65 { 66 return m_code - 4; 67 } 68 69 inline int Lexer::currentOffset() const 70 { 71 return currentCharacter() - m_codeStart; 72 } 73 74 ALWAYS_INLINE void Lexer::shift1() 75 { 76 m_current = m_next1; 77 m_next1 = m_next2; 78 m_next2 = m_next3; 79 if (LIKELY(m_code < m_codeEnd)) 80 m_next3 = m_code[0]; 81 else 82 m_next3 = -1; 83 84 ++m_code; 85 } 86 87 ALWAYS_INLINE void Lexer::shift2() 88 { 89 m_current = m_next2; 90 m_next1 = m_next3; 91 if (LIKELY(m_code + 1 < m_codeEnd)) { 92 m_next2 = m_code[0]; 93 m_next3 = m_code[1]; 94 } else { 95 m_next2 = m_code < m_codeEnd ? m_code[0] : -1; 96 m_next3 = -1; 97 } 98 99 m_code += 2; 100 } 101 102 ALWAYS_INLINE void Lexer::shift3() 103 { 104 m_current = m_next3; 105 if (LIKELY(m_code + 2 < m_codeEnd)) { 106 m_next1 = m_code[0]; 107 m_next2 = m_code[1]; 108 m_next3 = m_code[2]; 109 } else { 110 m_next1 = m_code < m_codeEnd ? m_code[0] : -1; 111 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1; 112 m_next3 = -1; 113 } 114 115 m_code += 3; 116 } 117 118 ALWAYS_INLINE void Lexer::shift4() 119 { 120 if (LIKELY(m_code + 3 < m_codeEnd)) { 121 m_current = m_code[0]; 122 m_next1 = m_code[1]; 123 m_next2 = m_code[2]; 124 m_next3 = m_code[3]; 125 } else { 126 m_current = m_code < m_codeEnd ? m_code[0] : -1; 127 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1; 128 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1; 129 m_next3 = -1; 130 } 131 132 m_code += 4; 133 } 134 135 void Lexer::setCode(const SourceCode& source, ParserArena& arena) 136 { 137 m_arena = &arena.identifierArena(); 138 139 m_lineNumber = source.firstLine(); 140 m_delimited = false; 141 m_lastToken = -1; 142 143 const UChar* data = source.provider()->data(); 144 145 m_source = &source; 146 m_codeStart = data; 147 m_code = data + source.startOffset(); 148 m_codeEnd = data + source.endOffset(); 149 m_error = false; 150 m_atLineStart = true; 151 152 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters. 153 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details. 154 if (source.provider()->hasBOMs()) { 155 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) { 156 if (UNLIKELY(*p == byteOrderMark)) { 157 copyCodeWithoutBOMs(); 158 break; 159 } 160 } 161 } 162 163 // Read the first characters into the 4-character buffer. 164 shift4(); 165 ASSERT(currentOffset() == source.startOffset()); 166 } 167 168 void Lexer::copyCodeWithoutBOMs() 169 { 170 // Note: In this case, the character offset data for debugging will be incorrect. 171 // If it's important to correctly debug code with extraneous BOMs, then the caller 172 // should strip the BOMs when creating the SourceProvider object and do its own 173 // mapping of offsets within the stripped text to original text offset. 174 175 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code); 176 for (const UChar* p = m_code; p < m_codeEnd; ++p) { 177 UChar c = *p; 178 if (c != byteOrderMark) 179 m_codeWithoutBOMs.append(c); 180 } 181 ptrdiff_t startDelta = m_codeStart - m_code; 182 m_code = m_codeWithoutBOMs.data(); 183 m_codeStart = m_code + startDelta; 184 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size(); 185 } 186 187 void Lexer::shiftLineTerminator() 188 { 189 ASSERT(isLineTerminator(m_current)); 190 191 // Allow both CRLF and LFCR. 192 if (m_current + m_next1 == '\n' + '\r') 193 shift2(); 194 else 195 shift1(); 196 197 ++m_lineNumber; 198 } 199 200 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length) 201 { 202 return &m_arena->makeIdentifier(m_globalData, characters, length); 203 } 204 205 inline bool Lexer::lastTokenWasRestrKeyword() const 206 { 207 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; 208 } 209 210 static NEVER_INLINE bool isNonASCIIIdentStart(int c) 211 { 212 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); 213 } 214 215 static inline bool isIdentStart(int c) 216 { 217 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); 218 } 219 220 static NEVER_INLINE bool isNonASCIIIdentPart(int c) 221 { 222 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other 223 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); 224 } 225 226 static inline bool isIdentPart(int c) 227 { 228 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); 229 } 230 231 static inline int singleEscape(int c) 232 { 233 switch (c) { 234 case 'b': 235 return 0x08; 236 case 't': 237 return 0x09; 238 case 'n': 239 return 0x0A; 240 case 'v': 241 return 0x0B; 242 case 'f': 243 return 0x0C; 244 case 'r': 245 return 0x0D; 246 default: 247 return c; 248 } 249 } 250 251 inline void Lexer::record8(int c) 252 { 253 ASSERT(c >= 0); 254 ASSERT(c <= 0xFF); 255 m_buffer8.append(static_cast<char>(c)); 256 } 257 258 inline void Lexer::record16(UChar c) 259 { 260 m_buffer16.append(c); 261 } 262 263 inline void Lexer::record16(int c) 264 { 265 ASSERT(c >= 0); 266 ASSERT(c <= USHRT_MAX); 267 record16(UChar(static_cast<unsigned short>(c))); 268 } 269 270 int Lexer::lex(void* p1, void* p2) 271 { 272 ASSERT(!m_error); 273 ASSERT(m_buffer8.isEmpty()); 274 ASSERT(m_buffer16.isEmpty()); 275 276 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); 277 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); 278 int token = 0; 279 m_terminator = false; 280 281 start: 282 while (isWhiteSpace(m_current)) 283 shift1(); 284 285 int startOffset = currentOffset(); 286 287 if (m_current == -1) { 288 if (!m_terminator && !m_delimited && !m_isReparsing) { 289 // automatic semicolon insertion if program incomplete 290 token = ';'; 291 goto doneSemicolon; 292 } 293 return 0; 294 } 295 296 m_delimited = false; 297 switch (m_current) { 298 case '>': 299 if (m_next1 == '>' && m_next2 == '>') { 300 if (m_next3 == '=') { 301 shift4(); 302 token = URSHIFTEQUAL; 303 break; 304 } 305 shift3(); 306 token = URSHIFT; 307 break; 308 } 309 if (m_next1 == '>') { 310 if (m_next2 == '=') { 311 shift3(); 312 token = RSHIFTEQUAL; 313 break; 314 } 315 shift2(); 316 token = RSHIFT; 317 break; 318 } 319 if (m_next1 == '=') { 320 shift2(); 321 token = GE; 322 break; 323 } 324 shift1(); 325 token = '>'; 326 break; 327 case '=': 328 if (m_next1 == '=') { 329 if (m_next2 == '=') { 330 shift3(); 331 token = STREQ; 332 break; 333 } 334 shift2(); 335 token = EQEQ; 336 break; 337 } 338 shift1(); 339 token = '='; 340 break; 341 case '!': 342 if (m_next1 == '=') { 343 if (m_next2 == '=') { 344 shift3(); 345 token = STRNEQ; 346 break; 347 } 348 shift2(); 349 token = NE; 350 break; 351 } 352 shift1(); 353 token = '!'; 354 break; 355 case '<': 356 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { 357 // <!-- marks the beginning of a line comment (for www usage) 358 shift4(); 359 goto inSingleLineComment; 360 } 361 if (m_next1 == '<') { 362 if (m_next2 == '=') { 363 shift3(); 364 token = LSHIFTEQUAL; 365 break; 366 } 367 shift2(); 368 token = LSHIFT; 369 break; 370 } 371 if (m_next1 == '=') { 372 shift2(); 373 token = LE; 374 break; 375 } 376 shift1(); 377 token = '<'; 378 break; 379 case '+': 380 if (m_next1 == '+') { 381 shift2(); 382 if (m_terminator) { 383 token = AUTOPLUSPLUS; 384 break; 385 } 386 token = PLUSPLUS; 387 break; 388 } 389 if (m_next1 == '=') { 390 shift2(); 391 token = PLUSEQUAL; 392 break; 393 } 394 shift1(); 395 token = '+'; 396 break; 397 case '-': 398 if (m_next1 == '-') { 399 if (m_atLineStart && m_next2 == '>') { 400 shift3(); 401 goto inSingleLineComment; 402 } 403 shift2(); 404 if (m_terminator) { 405 token = AUTOMINUSMINUS; 406 break; 407 } 408 token = MINUSMINUS; 409 break; 410 } 411 if (m_next1 == '=') { 412 shift2(); 413 token = MINUSEQUAL; 414 break; 415 } 416 shift1(); 417 token = '-'; 418 break; 419 case '*': 420 if (m_next1 == '=') { 421 shift2(); 422 token = MULTEQUAL; 423 break; 424 } 425 shift1(); 426 token = '*'; 427 break; 428 case '/': 429 if (m_next1 == '/') { 430 shift2(); 431 goto inSingleLineComment; 432 } 433 if (m_next1 == '*') 434 goto inMultiLineComment; 435 if (m_next1 == '=') { 436 shift2(); 437 token = DIVEQUAL; 438 break; 439 } 440 shift1(); 441 token = '/'; 442 break; 443 case '&': 444 if (m_next1 == '&') { 445 shift2(); 446 token = AND; 447 break; 448 } 449 if (m_next1 == '=') { 450 shift2(); 451 token = ANDEQUAL; 452 break; 453 } 454 shift1(); 455 token = '&'; 456 break; 457 case '^': 458 if (m_next1 == '=') { 459 shift2(); 460 token = XOREQUAL; 461 break; 462 } 463 shift1(); 464 token = '^'; 465 break; 466 case '%': 467 if (m_next1 == '=') { 468 shift2(); 469 token = MODEQUAL; 470 break; 471 } 472 shift1(); 473 token = '%'; 474 break; 475 case '|': 476 if (m_next1 == '=') { 477 shift2(); 478 token = OREQUAL; 479 break; 480 } 481 if (m_next1 == '|') { 482 shift2(); 483 token = OR; 484 break; 485 } 486 shift1(); 487 token = '|'; 488 break; 489 case '.': 490 if (isASCIIDigit(m_next1)) { 491 record8('.'); 492 shift1(); 493 goto inNumberAfterDecimalPoint; 494 } 495 token = '.'; 496 shift1(); 497 break; 498 case ',': 499 case '~': 500 case '?': 501 case ':': 502 case '(': 503 case ')': 504 case '[': 505 case ']': 506 token = m_current; 507 shift1(); 508 break; 509 case ';': 510 shift1(); 511 m_delimited = true; 512 token = ';'; 513 break; 514 case '{': 515 lvalp->intValue = currentOffset(); 516 shift1(); 517 token = OPENBRACE; 518 break; 519 case '}': 520 lvalp->intValue = currentOffset(); 521 shift1(); 522 m_delimited = true; 523 token = CLOSEBRACE; 524 break; 525 case '\\': 526 goto startIdentifierWithBackslash; 527 case '0': 528 goto startNumberWithZeroDigit; 529 case '1': 530 case '2': 531 case '3': 532 case '4': 533 case '5': 534 case '6': 535 case '7': 536 case '8': 537 case '9': 538 goto startNumber; 539 case '"': 540 case '\'': 541 goto startString; 542 default: 543 if (isIdentStart(m_current)) 544 goto startIdentifierOrKeyword; 545 if (isLineTerminator(m_current)) { 546 shiftLineTerminator(); 547 m_atLineStart = true; 548 m_terminator = true; 549 if (lastTokenWasRestrKeyword()) { 550 token = ';'; 551 goto doneSemicolon; 552 } 553 goto start; 554 } 555 goto returnError; 556 } 557 558 m_atLineStart = false; 559 goto returnToken; 560 561 startString: { 562 int stringQuoteCharacter = m_current; 563 shift1(); 564 565 const UChar* stringStart = currentCharacter(); 566 while (m_current != stringQuoteCharacter) { 567 // Fast check for characters that require special handling. 568 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently 569 // as possible, and lets through all common ASCII characters. 570 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { 571 m_buffer16.append(stringStart, currentCharacter() - stringStart); 572 goto inString; 573 } 574 shift1(); 575 } 576 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart); 577 shift1(); 578 m_atLineStart = false; 579 m_delimited = false; 580 token = STRING; 581 goto returnToken; 582 583 inString: 584 while (m_current != stringQuoteCharacter) { 585 if (m_current == '\\') 586 goto inStringEscapeSequence; 587 if (UNLIKELY(isLineTerminator(m_current))) 588 goto returnError; 589 if (UNLIKELY(m_current == -1)) 590 goto returnError; 591 record16(m_current); 592 shift1(); 593 } 594 goto doneString; 595 596 inStringEscapeSequence: 597 shift1(); 598 if (m_current == 'x') { 599 shift1(); 600 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { 601 record16(convertHex(m_current, m_next1)); 602 shift2(); 603 goto inString; 604 } 605 record16('x'); 606 if (m_current == stringQuoteCharacter) 607 goto doneString; 608 goto inString; 609 } 610 if (m_current == 'u') { 611 shift1(); 612 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { 613 record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); 614 shift4(); 615 goto inString; 616 } 617 if (m_current == stringQuoteCharacter) { 618 record16('u'); 619 goto doneString; 620 } 621 goto returnError; 622 } 623 if (isASCIIOctalDigit(m_current)) { 624 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { 625 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); 626 shift3(); 627 goto inString; 628 } 629 if (isASCIIOctalDigit(m_next1)) { 630 record16((m_current - '0') * 8 + m_next1 - '0'); 631 shift2(); 632 goto inString; 633 } 634 record16(m_current - '0'); 635 shift1(); 636 goto inString; 637 } 638 if (isLineTerminator(m_current)) { 639 shiftLineTerminator(); 640 goto inString; 641 } 642 if (m_current == -1) 643 goto returnError; 644 record16(singleEscape(m_current)); 645 shift1(); 646 goto inString; 647 } 648 649 startIdentifierWithBackslash: 650 shift1(); 651 if (UNLIKELY(m_current != 'u')) 652 goto returnError; 653 shift1(); 654 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) 655 goto returnError; 656 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 657 if (UNLIKELY(!isIdentStart(token))) 658 goto returnError; 659 goto inIdentifierAfterCharacterCheck; 660 661 startIdentifierOrKeyword: { 662 const UChar* identifierStart = currentCharacter(); 663 shift1(); 664 while (isIdentPart(m_current)) 665 shift1(); 666 if (LIKELY(m_current != '\\')) { 667 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart); 668 goto doneIdentifierOrKeyword; 669 } 670 m_buffer16.append(identifierStart, currentCharacter() - identifierStart); 671 } 672 673 do { 674 shift1(); 675 if (UNLIKELY(m_current != 'u')) 676 goto returnError; 677 shift1(); 678 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) 679 goto returnError; 680 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 681 if (UNLIKELY(!isIdentPart(token))) 682 goto returnError; 683 inIdentifierAfterCharacterCheck: 684 record16(token); 685 shift4(); 686 687 while (isIdentPart(m_current)) { 688 record16(m_current); 689 shift1(); 690 } 691 } while (UNLIKELY(m_current == '\\')); 692 goto doneIdentifier; 693 694 inSingleLineComment: 695 while (!isLineTerminator(m_current)) { 696 if (UNLIKELY(m_current == -1)) 697 return 0; 698 shift1(); 699 } 700 shiftLineTerminator(); 701 m_atLineStart = true; 702 m_terminator = true; 703 if (lastTokenWasRestrKeyword()) 704 goto doneSemicolon; 705 goto start; 706 707 inMultiLineComment: 708 shift2(); 709 while (m_current != '*' || m_next1 != '/') { 710 if (isLineTerminator(m_current)) 711 shiftLineTerminator(); 712 else { 713 shift1(); 714 if (UNLIKELY(m_current == -1)) 715 goto returnError; 716 } 717 } 718 shift2(); 719 m_atLineStart = false; 720 goto start; 721 722 startNumberWithZeroDigit: 723 shift1(); 724 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) { 725 shift1(); 726 goto inHex; 727 } 728 if (m_current == '.') { 729 record8('0'); 730 record8('.'); 731 shift1(); 732 goto inNumberAfterDecimalPoint; 733 } 734 if ((m_current | 0x20) == 'e') { 735 record8('0'); 736 record8('e'); 737 shift1(); 738 goto inExponentIndicator; 739 } 740 if (isASCIIOctalDigit(m_current)) 741 goto inOctal; 742 if (isASCIIDigit(m_current)) 743 goto startNumber; 744 lvalp->doubleValue = 0; 745 goto doneNumeric; 746 747 inNumberAfterDecimalPoint: 748 while (isASCIIDigit(m_current)) { 749 record8(m_current); 750 shift1(); 751 } 752 if ((m_current | 0x20) == 'e') { 753 record8('e'); 754 shift1(); 755 goto inExponentIndicator; 756 } 757 goto doneNumber; 758 759 inExponentIndicator: 760 if (m_current == '+' || m_current == '-') { 761 record8(m_current); 762 shift1(); 763 } 764 if (!isASCIIDigit(m_current)) 765 goto returnError; 766 do { 767 record8(m_current); 768 shift1(); 769 } while (isASCIIDigit(m_current)); 770 goto doneNumber; 771 772 inOctal: { 773 do { 774 record8(m_current); 775 shift1(); 776 } while (isASCIIOctalDigit(m_current)); 777 if (isASCIIDigit(m_current)) 778 goto startNumber; 779 780 double dval = 0; 781 782 const char* end = m_buffer8.end(); 783 for (const char* p = m_buffer8.data(); p < end; ++p) { 784 dval *= 8; 785 dval += *p - '0'; 786 } 787 if (dval >= mantissaOverflowLowerBound) 788 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8); 789 790 m_buffer8.resize(0); 791 792 lvalp->doubleValue = dval; 793 goto doneNumeric; 794 } 795 796 inHex: { 797 do { 798 record8(m_current); 799 shift1(); 800 } while (isASCIIHexDigit(m_current)); 801 802 double dval = 0; 803 804 const char* end = m_buffer8.end(); 805 for (const char* p = m_buffer8.data(); p < end; ++p) { 806 dval *= 16; 807 dval += toASCIIHexValue(*p); 808 } 809 if (dval >= mantissaOverflowLowerBound) 810 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16); 811 812 m_buffer8.resize(0); 813 814 lvalp->doubleValue = dval; 815 goto doneNumeric; 816 } 817 818 startNumber: 819 record8(m_current); 820 shift1(); 821 while (isASCIIDigit(m_current)) { 822 record8(m_current); 823 shift1(); 824 } 825 if (m_current == '.') { 826 record8('.'); 827 shift1(); 828 goto inNumberAfterDecimalPoint; 829 } 830 if ((m_current | 0x20) == 'e') { 831 record8('e'); 832 shift1(); 833 goto inExponentIndicator; 834 } 835 836 // Fall through into doneNumber. 837 838 doneNumber: 839 // Null-terminate string for strtod. 840 m_buffer8.append('\0'); 841 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0); 842 m_buffer8.resize(0); 843 844 // Fall through into doneNumeric. 845 846 doneNumeric: 847 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. 848 if (UNLIKELY(isIdentStart(m_current))) 849 goto returnError; 850 851 m_atLineStart = false; 852 m_delimited = false; 853 token = NUMBER; 854 goto returnToken; 855 856 doneSemicolon: 857 token = ';'; 858 m_delimited = true; 859 goto returnToken; 860 861 doneIdentifier: 862 m_atLineStart = false; 863 m_delimited = false; 864 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 865 m_buffer16.resize(0); 866 token = IDENT; 867 goto returnToken; 868 869 doneIdentifierOrKeyword: { 870 m_atLineStart = false; 871 m_delimited = false; 872 m_buffer16.resize(0); 873 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); 874 token = entry ? entry->lexerValue() : IDENT; 875 goto returnToken; 876 } 877 878 doneString: 879 // Atomize constant strings in case they're later used in property lookup. 880 shift1(); 881 m_atLineStart = false; 882 m_delimited = false; 883 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 884 m_buffer16.resize(0); 885 token = STRING; 886 887 // Fall through into returnToken. 888 889 returnToken: { 890 int lineNumber = m_lineNumber; 891 llocp->first_line = lineNumber; 892 llocp->last_line = lineNumber; 893 llocp->first_column = startOffset; 894 llocp->last_column = currentOffset(); 895 896 m_lastToken = token; 897 return token; 898 } 899 900 returnError: 901 m_error = true; 902 return -1; 903 } 904 905 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) 906 { 907 ASSERT(m_buffer16.isEmpty()); 908 909 bool lastWasEscape = false; 910 bool inBrackets = false; 911 912 if (patternPrefix) { 913 ASSERT(!isLineTerminator(patternPrefix)); 914 ASSERT(patternPrefix != '/'); 915 ASSERT(patternPrefix != '['); 916 record16(patternPrefix); 917 } 918 919 while (true) { 920 int current = m_current; 921 922 if (isLineTerminator(current) || current == -1) { 923 m_buffer16.resize(0); 924 return false; 925 } 926 927 shift1(); 928 929 if (current == '/' && !lastWasEscape && !inBrackets) 930 break; 931 932 record16(current); 933 934 if (lastWasEscape) { 935 lastWasEscape = false; 936 continue; 937 } 938 939 switch (current) { 940 case '[': 941 inBrackets = true; 942 break; 943 case ']': 944 inBrackets = false; 945 break; 946 case '\\': 947 lastWasEscape = true; 948 break; 949 } 950 } 951 952 pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 953 m_buffer16.resize(0); 954 955 while (isIdentPart(m_current)) { 956 record16(m_current); 957 shift1(); 958 } 959 960 flags = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 961 m_buffer16.resize(0); 962 963 return true; 964 } 965 966 bool Lexer::skipRegExp() 967 { 968 bool lastWasEscape = false; 969 bool inBrackets = false; 970 971 while (true) { 972 int current = m_current; 973 974 if (isLineTerminator(current) || current == -1) 975 return false; 976 977 shift1(); 978 979 if (current == '/' && !lastWasEscape && !inBrackets) 980 break; 981 982 if (lastWasEscape) { 983 lastWasEscape = false; 984 continue; 985 } 986 987 switch (current) { 988 case '[': 989 inBrackets = true; 990 break; 991 case ']': 992 inBrackets = false; 993 break; 994 case '\\': 995 lastWasEscape = true; 996 break; 997 } 998 } 999 1000 while (isIdentPart(m_current)) 1001 shift1(); 1002 1003 return true; 1004 } 1005 1006 void Lexer::clear() 1007 { 1008 m_arena = 0; 1009 m_codeWithoutBOMs.clear(); 1010 1011 Vector<char> newBuffer8; 1012 newBuffer8.reserveInitialCapacity(initialReadBufferCapacity); 1013 m_buffer8.swap(newBuffer8); 1014 1015 Vector<UChar> newBuffer16; 1016 newBuffer16.reserveInitialCapacity(initialReadBufferCapacity); 1017 m_buffer16.swap(newBuffer16); 1018 1019 m_isReparsing = false; 1020 } 1021 1022 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) 1023 { 1024 if (m_codeWithoutBOMs.isEmpty()) 1025 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); 1026 1027 const UChar* data = m_source->provider()->data(); 1028 1029 ASSERT(openBrace < closeBrace); 1030 1031 int numBOMsBeforeOpenBrace = 0; 1032 int numBOMsBetweenBraces = 0; 1033 1034 int i; 1035 for (i = m_source->startOffset(); i < openBrace; ++i) 1036 numBOMsBeforeOpenBrace += data[i] == byteOrderMark; 1037 for (; i < closeBrace; ++i) 1038 numBOMsBetweenBraces += data[i] == byteOrderMark; 1039 1040 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace, 1041 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine); 1042 } 1043 1044 } // namespace JSC 1045