Home | History | Annotate | Download | only in parser
      1 /*
      2  *  Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org)
      3  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
      4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich (at) uwaterloo.ca)
      5  *
      6  *  This library is free software; you can redistribute it and/or
      7  *  modify it under the terms of the GNU Library General Public
      8  *  License as published by the Free Software Foundation; either
      9  *  version 2 of the License, or (at your option) any later version.
     10  *
     11  *  This library is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  *  Library General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU Library General Public License
     17  *  along with this library; see the file COPYING.LIB.  If not, write to
     18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  *  Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #include "config.h"
     24 #include "Lexer.h"
     25 
     26 #include "JSFunction.h"
     27 #include "JSGlobalObjectFunctions.h"
     28 #include "NodeInfo.h"
     29 #include "Nodes.h"
     30 #include "dtoa.h"
     31 #include <ctype.h>
     32 #include <limits.h>
     33 #include <string.h>
     34 #include <wtf/Assertions.h>
     35 
     36 using namespace WTF;
     37 using namespace Unicode;
     38 
     39 // We can't specify the namespace in yacc's C output, so do it here instead.
     40 using namespace JSC;
     41 
     42 #include "Grammar.h"
     43 #include "Lookup.h"
     44 #include "Lexer.lut.h"
     45 
     46 namespace JSC {
     47 
     48 static const UChar byteOrderMark = 0xFEFF;
     49 
     50 Lexer::Lexer(JSGlobalData* globalData)
     51     : m_isReparsing(false)
     52     , m_globalData(globalData)
     53     , m_keywordTable(JSC::mainTable)
     54 {
     55     m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
     56     m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
     57 }
     58 
     59 Lexer::~Lexer()
     60 {
     61     m_keywordTable.deleteTable();
     62 }
     63 
     64 inline const UChar* Lexer::currentCharacter() const
     65 {
     66     return m_code - 4;
     67 }
     68 
     69 inline int Lexer::currentOffset() const
     70 {
     71     return currentCharacter() - m_codeStart;
     72 }
     73 
     74 ALWAYS_INLINE void Lexer::shift1()
     75 {
     76     m_current = m_next1;
     77     m_next1 = m_next2;
     78     m_next2 = m_next3;
     79     if (LIKELY(m_code < m_codeEnd))
     80         m_next3 = m_code[0];
     81     else
     82         m_next3 = -1;
     83 
     84     ++m_code;
     85 }
     86 
     87 ALWAYS_INLINE void Lexer::shift2()
     88 {
     89     m_current = m_next2;
     90     m_next1 = m_next3;
     91     if (LIKELY(m_code + 1 < m_codeEnd)) {
     92         m_next2 = m_code[0];
     93         m_next3 = m_code[1];
     94     } else {
     95         m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
     96         m_next3 = -1;
     97     }
     98 
     99     m_code += 2;
    100 }
    101 
    102 ALWAYS_INLINE void Lexer::shift3()
    103 {
    104     m_current = m_next3;
    105     if (LIKELY(m_code + 2 < m_codeEnd)) {
    106         m_next1 = m_code[0];
    107         m_next2 = m_code[1];
    108         m_next3 = m_code[2];
    109     } else {
    110         m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
    111         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
    112         m_next3 = -1;
    113     }
    114 
    115     m_code += 3;
    116 }
    117 
    118 ALWAYS_INLINE void Lexer::shift4()
    119 {
    120     if (LIKELY(m_code + 3 < m_codeEnd)) {
    121         m_current = m_code[0];
    122         m_next1 = m_code[1];
    123         m_next2 = m_code[2];
    124         m_next3 = m_code[3];
    125     } else {
    126         m_current = m_code < m_codeEnd ? m_code[0] : -1;
    127         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
    128         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
    129         m_next3 = -1;
    130     }
    131 
    132     m_code += 4;
    133 }
    134 
    135 void Lexer::setCode(const SourceCode& source, ParserArena& arena)
    136 {
    137     m_arena = &arena.identifierArena();
    138 
    139     m_lineNumber = source.firstLine();
    140     m_delimited = false;
    141     m_lastToken = -1;
    142 
    143     const UChar* data = source.provider()->data();
    144 
    145     m_source = &source;
    146     m_codeStart = data;
    147     m_code = data + source.startOffset();
    148     m_codeEnd = data + source.endOffset();
    149     m_error = false;
    150     m_atLineStart = true;
    151 
    152     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
    153     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
    154     if (source.provider()->hasBOMs()) {
    155         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
    156             if (UNLIKELY(*p == byteOrderMark)) {
    157                 copyCodeWithoutBOMs();
    158                 break;
    159             }
    160         }
    161     }
    162 
    163     // Read the first characters into the 4-character buffer.
    164     shift4();
    165     ASSERT(currentOffset() == source.startOffset());
    166 }
    167 
    168 void Lexer::copyCodeWithoutBOMs()
    169 {
    170     // Note: In this case, the character offset data for debugging will be incorrect.
    171     // If it's important to correctly debug code with extraneous BOMs, then the caller
    172     // should strip the BOMs when creating the SourceProvider object and do its own
    173     // mapping of offsets within the stripped text to original text offset.
    174 
    175     m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
    176     for (const UChar* p = m_code; p < m_codeEnd; ++p) {
    177         UChar c = *p;
    178         if (c != byteOrderMark)
    179             m_codeWithoutBOMs.append(c);
    180     }
    181     ptrdiff_t startDelta = m_codeStart - m_code;
    182     m_code = m_codeWithoutBOMs.data();
    183     m_codeStart = m_code + startDelta;
    184     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
    185 }
    186 
    187 void Lexer::shiftLineTerminator()
    188 {
    189     ASSERT(isLineTerminator(m_current));
    190 
    191     // Allow both CRLF and LFCR.
    192     if (m_current + m_next1 == '\n' + '\r')
    193         shift2();
    194     else
    195         shift1();
    196 
    197     ++m_lineNumber;
    198 }
    199 
    200 ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
    201 {
    202     return &m_arena->makeIdentifier(m_globalData, characters, length);
    203 }
    204 
    205 inline bool Lexer::lastTokenWasRestrKeyword() const
    206 {
    207     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
    208 }
    209 
    210 static NEVER_INLINE bool isNonASCIIIdentStart(int c)
    211 {
    212     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
    213 }
    214 
    215 static inline bool isIdentStart(int c)
    216 {
    217     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
    218 }
    219 
    220 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
    221 {
    222     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
    223         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
    224 }
    225 
    226 static inline bool isIdentPart(int c)
    227 {
    228     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
    229 }
    230 
    231 static inline int singleEscape(int c)
    232 {
    233     switch (c) {
    234         case 'b':
    235             return 0x08;
    236         case 't':
    237             return 0x09;
    238         case 'n':
    239             return 0x0A;
    240         case 'v':
    241             return 0x0B;
    242         case 'f':
    243             return 0x0C;
    244         case 'r':
    245             return 0x0D;
    246         default:
    247             return c;
    248     }
    249 }
    250 
    251 inline void Lexer::record8(int c)
    252 {
    253     ASSERT(c >= 0);
    254     ASSERT(c <= 0xFF);
    255     m_buffer8.append(static_cast<char>(c));
    256 }
    257 
    258 inline void Lexer::record16(UChar c)
    259 {
    260     m_buffer16.append(c);
    261 }
    262 
    263 inline void Lexer::record16(int c)
    264 {
    265     ASSERT(c >= 0);
    266     ASSERT(c <= USHRT_MAX);
    267     record16(UChar(static_cast<unsigned short>(c)));
    268 }
    269 
    270 int Lexer::lex(void* p1, void* p2)
    271 {
    272     ASSERT(!m_error);
    273     ASSERT(m_buffer8.isEmpty());
    274     ASSERT(m_buffer16.isEmpty());
    275 
    276     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
    277     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
    278     int token = 0;
    279     m_terminator = false;
    280 
    281 start:
    282     while (isWhiteSpace(m_current))
    283         shift1();
    284 
    285     int startOffset = currentOffset();
    286 
    287     if (m_current == -1) {
    288         if (!m_terminator && !m_delimited && !m_isReparsing) {
    289             // automatic semicolon insertion if program incomplete
    290             token = ';';
    291             goto doneSemicolon;
    292         }
    293         return 0;
    294     }
    295 
    296     m_delimited = false;
    297     switch (m_current) {
    298         case '>':
    299             if (m_next1 == '>' && m_next2 == '>') {
    300                 if (m_next3 == '=') {
    301                     shift4();
    302                     token = URSHIFTEQUAL;
    303                     break;
    304                 }
    305                 shift3();
    306                 token = URSHIFT;
    307                 break;
    308             }
    309             if (m_next1 == '>') {
    310                 if (m_next2 == '=') {
    311                     shift3();
    312                     token = RSHIFTEQUAL;
    313                     break;
    314                 }
    315                 shift2();
    316                 token = RSHIFT;
    317                 break;
    318             }
    319             if (m_next1 == '=') {
    320                 shift2();
    321                 token = GE;
    322                 break;
    323             }
    324             shift1();
    325             token = '>';
    326             break;
    327         case '=':
    328             if (m_next1 == '=') {
    329                 if (m_next2 == '=') {
    330                     shift3();
    331                     token = STREQ;
    332                     break;
    333                 }
    334                 shift2();
    335                 token = EQEQ;
    336                 break;
    337             }
    338             shift1();
    339             token = '=';
    340             break;
    341         case '!':
    342             if (m_next1 == '=') {
    343                 if (m_next2 == '=') {
    344                     shift3();
    345                     token = STRNEQ;
    346                     break;
    347                 }
    348                 shift2();
    349                 token = NE;
    350                 break;
    351             }
    352             shift1();
    353             token = '!';
    354             break;
    355         case '<':
    356             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
    357                 // <!-- marks the beginning of a line comment (for www usage)
    358                 shift4();
    359                 goto inSingleLineComment;
    360             }
    361             if (m_next1 == '<') {
    362                 if (m_next2 == '=') {
    363                     shift3();
    364                     token = LSHIFTEQUAL;
    365                     break;
    366                 }
    367                 shift2();
    368                 token = LSHIFT;
    369                 break;
    370             }
    371             if (m_next1 == '=') {
    372                 shift2();
    373                 token = LE;
    374                 break;
    375             }
    376             shift1();
    377             token = '<';
    378             break;
    379         case '+':
    380             if (m_next1 == '+') {
    381                 shift2();
    382                 if (m_terminator) {
    383                     token = AUTOPLUSPLUS;
    384                     break;
    385                 }
    386                 token = PLUSPLUS;
    387                 break;
    388             }
    389             if (m_next1 == '=') {
    390                 shift2();
    391                 token = PLUSEQUAL;
    392                 break;
    393             }
    394             shift1();
    395             token = '+';
    396             break;
    397         case '-':
    398             if (m_next1 == '-') {
    399                 if (m_atLineStart && m_next2 == '>') {
    400                     shift3();
    401                     goto inSingleLineComment;
    402                 }
    403                 shift2();
    404                 if (m_terminator) {
    405                     token = AUTOMINUSMINUS;
    406                     break;
    407                 }
    408                 token = MINUSMINUS;
    409                 break;
    410             }
    411             if (m_next1 == '=') {
    412                 shift2();
    413                 token = MINUSEQUAL;
    414                 break;
    415             }
    416             shift1();
    417             token = '-';
    418             break;
    419         case '*':
    420             if (m_next1 == '=') {
    421                 shift2();
    422                 token = MULTEQUAL;
    423                 break;
    424             }
    425             shift1();
    426             token = '*';
    427             break;
    428         case '/':
    429             if (m_next1 == '/') {
    430                 shift2();
    431                 goto inSingleLineComment;
    432             }
    433             if (m_next1 == '*')
    434                 goto inMultiLineComment;
    435             if (m_next1 == '=') {
    436                 shift2();
    437                 token = DIVEQUAL;
    438                 break;
    439             }
    440             shift1();
    441             token = '/';
    442             break;
    443         case '&':
    444             if (m_next1 == '&') {
    445                 shift2();
    446                 token = AND;
    447                 break;
    448             }
    449             if (m_next1 == '=') {
    450                 shift2();
    451                 token = ANDEQUAL;
    452                 break;
    453             }
    454             shift1();
    455             token = '&';
    456             break;
    457         case '^':
    458             if (m_next1 == '=') {
    459                 shift2();
    460                 token = XOREQUAL;
    461                 break;
    462             }
    463             shift1();
    464             token = '^';
    465             break;
    466         case '%':
    467             if (m_next1 == '=') {
    468                 shift2();
    469                 token = MODEQUAL;
    470                 break;
    471             }
    472             shift1();
    473             token = '%';
    474             break;
    475         case '|':
    476             if (m_next1 == '=') {
    477                 shift2();
    478                 token = OREQUAL;
    479                 break;
    480             }
    481             if (m_next1 == '|') {
    482                 shift2();
    483                 token = OR;
    484                 break;
    485             }
    486             shift1();
    487             token = '|';
    488             break;
    489         case '.':
    490             if (isASCIIDigit(m_next1)) {
    491                 record8('.');
    492                 shift1();
    493                 goto inNumberAfterDecimalPoint;
    494             }
    495             token = '.';
    496             shift1();
    497             break;
    498         case ',':
    499         case '~':
    500         case '?':
    501         case ':':
    502         case '(':
    503         case ')':
    504         case '[':
    505         case ']':
    506             token = m_current;
    507             shift1();
    508             break;
    509         case ';':
    510             shift1();
    511             m_delimited = true;
    512             token = ';';
    513             break;
    514         case '{':
    515             lvalp->intValue = currentOffset();
    516             shift1();
    517             token = OPENBRACE;
    518             break;
    519         case '}':
    520             lvalp->intValue = currentOffset();
    521             shift1();
    522             m_delimited = true;
    523             token = CLOSEBRACE;
    524             break;
    525         case '\\':
    526             goto startIdentifierWithBackslash;
    527         case '0':
    528             goto startNumberWithZeroDigit;
    529         case '1':
    530         case '2':
    531         case '3':
    532         case '4':
    533         case '5':
    534         case '6':
    535         case '7':
    536         case '8':
    537         case '9':
    538             goto startNumber;
    539         case '"':
    540         case '\'':
    541             goto startString;
    542         default:
    543             if (isIdentStart(m_current))
    544                 goto startIdentifierOrKeyword;
    545             if (isLineTerminator(m_current)) {
    546                 shiftLineTerminator();
    547                 m_atLineStart = true;
    548                 m_terminator = true;
    549                 if (lastTokenWasRestrKeyword()) {
    550                     token = ';';
    551                     goto doneSemicolon;
    552                 }
    553                 goto start;
    554             }
    555             goto returnError;
    556     }
    557 
    558     m_atLineStart = false;
    559     goto returnToken;
    560 
    561 startString: {
    562     int stringQuoteCharacter = m_current;
    563     shift1();
    564 
    565     const UChar* stringStart = currentCharacter();
    566     while (m_current != stringQuoteCharacter) {
    567         // Fast check for characters that require special handling.
    568         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
    569         // as possible, and lets through all common ASCII characters.
    570         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
    571             m_buffer16.append(stringStart, currentCharacter() - stringStart);
    572             goto inString;
    573         }
    574         shift1();
    575     }
    576     lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
    577     shift1();
    578     m_atLineStart = false;
    579     m_delimited = false;
    580     token = STRING;
    581     goto returnToken;
    582 
    583 inString:
    584     while (m_current != stringQuoteCharacter) {
    585         if (m_current == '\\')
    586             goto inStringEscapeSequence;
    587         if (UNLIKELY(isLineTerminator(m_current)))
    588             goto returnError;
    589         if (UNLIKELY(m_current == -1))
    590             goto returnError;
    591         record16(m_current);
    592         shift1();
    593     }
    594     goto doneString;
    595 
    596 inStringEscapeSequence:
    597     shift1();
    598     if (m_current == 'x') {
    599         shift1();
    600         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
    601             record16(convertHex(m_current, m_next1));
    602             shift2();
    603             goto inString;
    604         }
    605         record16('x');
    606         if (m_current == stringQuoteCharacter)
    607             goto doneString;
    608         goto inString;
    609     }
    610     if (m_current == 'u') {
    611         shift1();
    612         if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
    613             record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
    614             shift4();
    615             goto inString;
    616         }
    617         if (m_current == stringQuoteCharacter) {
    618             record16('u');
    619             goto doneString;
    620         }
    621         goto returnError;
    622     }
    623     if (isASCIIOctalDigit(m_current)) {
    624         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
    625             record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
    626             shift3();
    627             goto inString;
    628         }
    629         if (isASCIIOctalDigit(m_next1)) {
    630             record16((m_current - '0') * 8 + m_next1 - '0');
    631             shift2();
    632             goto inString;
    633         }
    634         record16(m_current - '0');
    635         shift1();
    636         goto inString;
    637     }
    638     if (isLineTerminator(m_current)) {
    639         shiftLineTerminator();
    640         goto inString;
    641     }
    642     if (m_current == -1)
    643         goto returnError;
    644     record16(singleEscape(m_current));
    645     shift1();
    646     goto inString;
    647 }
    648 
    649 startIdentifierWithBackslash:
    650     shift1();
    651     if (UNLIKELY(m_current != 'u'))
    652         goto returnError;
    653     shift1();
    654     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
    655         goto returnError;
    656     token = convertUnicode(m_current, m_next1, m_next2, m_next3);
    657     if (UNLIKELY(!isIdentStart(token)))
    658         goto returnError;
    659     goto inIdentifierAfterCharacterCheck;
    660 
    661 startIdentifierOrKeyword: {
    662     const UChar* identifierStart = currentCharacter();
    663     shift1();
    664     while (isIdentPart(m_current))
    665         shift1();
    666     if (LIKELY(m_current != '\\')) {
    667         lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
    668         goto doneIdentifierOrKeyword;
    669     }
    670     m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
    671 }
    672 
    673     do {
    674         shift1();
    675         if (UNLIKELY(m_current != 'u'))
    676             goto returnError;
    677         shift1();
    678         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
    679             goto returnError;
    680         token = convertUnicode(m_current, m_next1, m_next2, m_next3);
    681         if (UNLIKELY(!isIdentPart(token)))
    682             goto returnError;
    683 inIdentifierAfterCharacterCheck:
    684         record16(token);
    685         shift4();
    686 
    687         while (isIdentPart(m_current)) {
    688             record16(m_current);
    689             shift1();
    690         }
    691     } while (UNLIKELY(m_current == '\\'));
    692     goto doneIdentifier;
    693 
    694 inSingleLineComment:
    695     while (!isLineTerminator(m_current)) {
    696         if (UNLIKELY(m_current == -1))
    697             return 0;
    698         shift1();
    699     }
    700     shiftLineTerminator();
    701     m_atLineStart = true;
    702     m_terminator = true;
    703     if (lastTokenWasRestrKeyword())
    704         goto doneSemicolon;
    705     goto start;
    706 
    707 inMultiLineComment:
    708     shift2();
    709     while (m_current != '*' || m_next1 != '/') {
    710         if (isLineTerminator(m_current))
    711             shiftLineTerminator();
    712         else {
    713             shift1();
    714             if (UNLIKELY(m_current == -1))
    715                 goto returnError;
    716         }
    717     }
    718     shift2();
    719     m_atLineStart = false;
    720     goto start;
    721 
    722 startNumberWithZeroDigit:
    723     shift1();
    724     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
    725         shift1();
    726         goto inHex;
    727     }
    728     if (m_current == '.') {
    729         record8('0');
    730         record8('.');
    731         shift1();
    732         goto inNumberAfterDecimalPoint;
    733     }
    734     if ((m_current | 0x20) == 'e') {
    735         record8('0');
    736         record8('e');
    737         shift1();
    738         goto inExponentIndicator;
    739     }
    740     if (isASCIIOctalDigit(m_current))
    741         goto inOctal;
    742     if (isASCIIDigit(m_current))
    743         goto startNumber;
    744     lvalp->doubleValue = 0;
    745     goto doneNumeric;
    746 
    747 inNumberAfterDecimalPoint:
    748     while (isASCIIDigit(m_current)) {
    749         record8(m_current);
    750         shift1();
    751     }
    752     if ((m_current | 0x20) == 'e') {
    753         record8('e');
    754         shift1();
    755         goto inExponentIndicator;
    756     }
    757     goto doneNumber;
    758 
    759 inExponentIndicator:
    760     if (m_current == '+' || m_current == '-') {
    761         record8(m_current);
    762         shift1();
    763     }
    764     if (!isASCIIDigit(m_current))
    765         goto returnError;
    766     do {
    767         record8(m_current);
    768         shift1();
    769     } while (isASCIIDigit(m_current));
    770     goto doneNumber;
    771 
    772 inOctal: {
    773     do {
    774         record8(m_current);
    775         shift1();
    776     } while (isASCIIOctalDigit(m_current));
    777     if (isASCIIDigit(m_current))
    778         goto startNumber;
    779 
    780     double dval = 0;
    781 
    782     const char* end = m_buffer8.end();
    783     for (const char* p = m_buffer8.data(); p < end; ++p) {
    784         dval *= 8;
    785         dval += *p - '0';
    786     }
    787     if (dval >= mantissaOverflowLowerBound)
    788         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
    789 
    790     m_buffer8.resize(0);
    791 
    792     lvalp->doubleValue = dval;
    793     goto doneNumeric;
    794 }
    795 
    796 inHex: {
    797     do {
    798         record8(m_current);
    799         shift1();
    800     } while (isASCIIHexDigit(m_current));
    801 
    802     double dval = 0;
    803 
    804     const char* end = m_buffer8.end();
    805     for (const char* p = m_buffer8.data(); p < end; ++p) {
    806         dval *= 16;
    807         dval += toASCIIHexValue(*p);
    808     }
    809     if (dval >= mantissaOverflowLowerBound)
    810         dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
    811 
    812     m_buffer8.resize(0);
    813 
    814     lvalp->doubleValue = dval;
    815     goto doneNumeric;
    816 }
    817 
    818 startNumber:
    819     record8(m_current);
    820     shift1();
    821     while (isASCIIDigit(m_current)) {
    822         record8(m_current);
    823         shift1();
    824     }
    825     if (m_current == '.') {
    826         record8('.');
    827         shift1();
    828         goto inNumberAfterDecimalPoint;
    829     }
    830     if ((m_current | 0x20) == 'e') {
    831         record8('e');
    832         shift1();
    833         goto inExponentIndicator;
    834     }
    835 
    836     // Fall through into doneNumber.
    837 
    838 doneNumber:
    839     // Null-terminate string for strtod.
    840     m_buffer8.append('\0');
    841     lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
    842     m_buffer8.resize(0);
    843 
    844     // Fall through into doneNumeric.
    845 
    846 doneNumeric:
    847     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
    848     if (UNLIKELY(isIdentStart(m_current)))
    849         goto returnError;
    850 
    851     m_atLineStart = false;
    852     m_delimited = false;
    853     token = NUMBER;
    854     goto returnToken;
    855 
    856 doneSemicolon:
    857     token = ';';
    858     m_delimited = true;
    859     goto returnToken;
    860 
    861 doneIdentifier:
    862     m_atLineStart = false;
    863     m_delimited = false;
    864     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    865     m_buffer16.resize(0);
    866     token = IDENT;
    867     goto returnToken;
    868 
    869 doneIdentifierOrKeyword: {
    870     m_atLineStart = false;
    871     m_delimited = false;
    872     m_buffer16.resize(0);
    873     const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
    874     token = entry ? entry->lexerValue() : IDENT;
    875     goto returnToken;
    876 }
    877 
    878 doneString:
    879     // Atomize constant strings in case they're later used in property lookup.
    880     shift1();
    881     m_atLineStart = false;
    882     m_delimited = false;
    883     lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    884     m_buffer16.resize(0);
    885     token = STRING;
    886 
    887     // Fall through into returnToken.
    888 
    889 returnToken: {
    890     int lineNumber = m_lineNumber;
    891     llocp->first_line = lineNumber;
    892     llocp->last_line = lineNumber;
    893     llocp->first_column = startOffset;
    894     llocp->last_column = currentOffset();
    895 
    896     m_lastToken = token;
    897     return token;
    898 }
    899 
    900 returnError:
    901     m_error = true;
    902     return -1;
    903 }
    904 
    905 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
    906 {
    907     ASSERT(m_buffer16.isEmpty());
    908 
    909     bool lastWasEscape = false;
    910     bool inBrackets = false;
    911 
    912     if (patternPrefix) {
    913         ASSERT(!isLineTerminator(patternPrefix));
    914         ASSERT(patternPrefix != '/');
    915         ASSERT(patternPrefix != '[');
    916         record16(patternPrefix);
    917     }
    918 
    919     while (true) {
    920         int current = m_current;
    921 
    922         if (isLineTerminator(current) || current == -1) {
    923             m_buffer16.resize(0);
    924             return false;
    925         }
    926 
    927         shift1();
    928 
    929         if (current == '/' && !lastWasEscape && !inBrackets)
    930             break;
    931 
    932         record16(current);
    933 
    934         if (lastWasEscape) {
    935             lastWasEscape = false;
    936             continue;
    937         }
    938 
    939         switch (current) {
    940         case '[':
    941             inBrackets = true;
    942             break;
    943         case ']':
    944             inBrackets = false;
    945             break;
    946         case '\\':
    947             lastWasEscape = true;
    948             break;
    949         }
    950     }
    951 
    952     pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    953     m_buffer16.resize(0);
    954 
    955     while (isIdentPart(m_current)) {
    956         record16(m_current);
    957         shift1();
    958     }
    959 
    960     flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
    961     m_buffer16.resize(0);
    962 
    963     return true;
    964 }
    965 
    966 bool Lexer::skipRegExp()
    967 {
    968     bool lastWasEscape = false;
    969     bool inBrackets = false;
    970 
    971     while (true) {
    972         int current = m_current;
    973 
    974         if (isLineTerminator(current) || current == -1)
    975             return false;
    976 
    977         shift1();
    978 
    979         if (current == '/' && !lastWasEscape && !inBrackets)
    980             break;
    981 
    982         if (lastWasEscape) {
    983             lastWasEscape = false;
    984             continue;
    985         }
    986 
    987         switch (current) {
    988         case '[':
    989             inBrackets = true;
    990             break;
    991         case ']':
    992             inBrackets = false;
    993             break;
    994         case '\\':
    995             lastWasEscape = true;
    996             break;
    997         }
    998     }
    999 
   1000     while (isIdentPart(m_current))
   1001         shift1();
   1002 
   1003     return true;
   1004 }
   1005 
   1006 void Lexer::clear()
   1007 {
   1008     m_arena = 0;
   1009     m_codeWithoutBOMs.clear();
   1010 
   1011     Vector<char> newBuffer8;
   1012     newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
   1013     m_buffer8.swap(newBuffer8);
   1014 
   1015     Vector<UChar> newBuffer16;
   1016     newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
   1017     m_buffer16.swap(newBuffer16);
   1018 
   1019     m_isReparsing = false;
   1020 }
   1021 
   1022 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
   1023 {
   1024     if (m_codeWithoutBOMs.isEmpty())
   1025         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
   1026 
   1027     const UChar* data = m_source->provider()->data();
   1028 
   1029     ASSERT(openBrace < closeBrace);
   1030 
   1031     int numBOMsBeforeOpenBrace = 0;
   1032     int numBOMsBetweenBraces = 0;
   1033 
   1034     int i;
   1035     for (i = m_source->startOffset(); i < openBrace; ++i)
   1036         numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
   1037     for (; i < closeBrace; ++i)
   1038         numBOMsBetweenBraces += data[i] == byteOrderMark;
   1039 
   1040     return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
   1041         closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
   1042 }
   1043 
   1044 } // namespace JSC
   1045