Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "LiteralParser.h"
     28 
     29 #include "JSArray.h"
     30 #include "JSString.h"
     31 #include "Lexer.h"
     32 #include "StringBuilder.h"
     33 #include <wtf/ASCIICType.h>
     34 #include <wtf/dtoa.h>
     35 
     36 namespace JSC {
     37 
     38 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
     39 {
     40     while (m_ptr < m_end && isASCIISpace(*m_ptr))
     41         ++m_ptr;
     42 
     43     ASSERT(m_ptr <= m_end);
     44     if (m_ptr >= m_end) {
     45         token.type = TokEnd;
     46         token.start = token.end = m_ptr;
     47         return TokEnd;
     48     }
     49     token.type = TokError;
     50     token.start = m_ptr;
     51     switch (*m_ptr) {
     52         case '[':
     53             token.type = TokLBracket;
     54             token.end = ++m_ptr;
     55             return TokLBracket;
     56         case ']':
     57             token.type = TokRBracket;
     58             token.end = ++m_ptr;
     59             return TokRBracket;
     60         case '(':
     61             token.type = TokLParen;
     62             token.end = ++m_ptr;
     63             return TokLBracket;
     64         case ')':
     65             token.type = TokRParen;
     66             token.end = ++m_ptr;
     67             return TokRBracket;
     68         case '{':
     69             token.type = TokLBrace;
     70             token.end = ++m_ptr;
     71             return TokLBrace;
     72         case '}':
     73             token.type = TokRBrace;
     74             token.end = ++m_ptr;
     75             return TokRBrace;
     76         case ',':
     77             token.type = TokComma;
     78             token.end = ++m_ptr;
     79             return TokComma;
     80         case ':':
     81             token.type = TokColon;
     82             token.end = ++m_ptr;
     83             return TokColon;
     84         case '"':
     85             if (m_mode == StrictJSON)
     86                 return lexString<StrictJSON>(token);
     87             return lexString<NonStrictJSON>(token);
     88         case 't':
     89             if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
     90                 m_ptr += 4;
     91                 token.type = TokTrue;
     92                 token.end = m_ptr;
     93                 return TokTrue;
     94             }
     95             break;
     96         case 'f':
     97             if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
     98                 m_ptr += 5;
     99                 token.type = TokFalse;
    100                 token.end = m_ptr;
    101                 return TokFalse;
    102             }
    103             break;
    104         case 'n':
    105             if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
    106                 m_ptr += 4;
    107                 token.type = TokNull;
    108                 token.end = m_ptr;
    109                 return TokNull;
    110             }
    111             break;
    112         case '-':
    113         case '0':
    114         case '1':
    115         case '2':
    116         case '3':
    117         case '4':
    118         case '5':
    119         case '6':
    120         case '7':
    121         case '8':
    122         case '9':
    123             return lexNumber(token);
    124     }
    125     return TokError;
    126 }
    127 
    128 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
    129 {
    130     return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
    131 }
    132 
    133 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
    134 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
    135 {
    136     ++m_ptr;
    137     const UChar* runStart;
    138     StringBuilder builder;
    139     do {
    140         runStart = m_ptr;
    141         while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
    142             ++m_ptr;
    143         if (runStart < m_ptr)
    144             builder.append(runStart, m_ptr - runStart);
    145         if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
    146             ++m_ptr;
    147             if (m_ptr >= m_end)
    148                 return TokError;
    149             switch (*m_ptr) {
    150                 case '"':
    151                     builder.append('"');
    152                     m_ptr++;
    153                     break;
    154                 case '\\':
    155                     builder.append('\\');
    156                     m_ptr++;
    157                     break;
    158                 case '/':
    159                     builder.append('/');
    160                     m_ptr++;
    161                     break;
    162                 case 'b':
    163                     builder.append('\b');
    164                     m_ptr++;
    165                     break;
    166                 case 'f':
    167                     builder.append('\f');
    168                     m_ptr++;
    169                     break;
    170                 case 'n':
    171                     builder.append('\n');
    172                     m_ptr++;
    173                     break;
    174                 case 'r':
    175                     builder.append('\r');
    176                     m_ptr++;
    177                     break;
    178                 case 't':
    179                     builder.append('\t');
    180                     m_ptr++;
    181                     break;
    182 
    183                 case 'u':
    184                     if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
    185                         return TokError;
    186                     for (int i = 1; i < 5; i++) {
    187                         if (!isASCIIHexDigit(m_ptr[i]))
    188                             return TokError;
    189                     }
    190                     builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
    191                     m_ptr += 5;
    192                     break;
    193 
    194                 default:
    195                     return TokError;
    196             }
    197         }
    198     } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
    199 
    200     if (m_ptr >= m_end || *m_ptr != '"')
    201         return TokError;
    202 
    203     token.stringToken = builder.build();
    204     token.type = TokString;
    205     token.end = ++m_ptr;
    206     return TokString;
    207 }
    208 
    209 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
    210 {
    211     // ES5 and json.org define numbers as
    212     // number
    213     //     int
    214     //     int frac? exp?
    215     //
    216     // int
    217     //     -? 0
    218     //     -? digit1-9 digits?
    219     //
    220     // digits
    221     //     digit digits?
    222     //
    223     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
    224 
    225     if (m_ptr < m_end && *m_ptr == '-') // -?
    226         ++m_ptr;
    227 
    228     // (0 | [1-9][0-9]*)
    229     if (m_ptr < m_end && *m_ptr == '0') // 0
    230         ++m_ptr;
    231     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
    232         ++m_ptr;
    233         // [0-9]*
    234         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
    235             ++m_ptr;
    236     } else
    237         return TokError;
    238 
    239     // ('.' [0-9]+)?
    240     if (m_ptr < m_end && *m_ptr == '.') {
    241         ++m_ptr;
    242         // [0-9]+
    243         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
    244             return TokError;
    245 
    246         ++m_ptr;
    247         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
    248             ++m_ptr;
    249     }
    250 
    251     //  ([eE][+-]? [0-9]+)?
    252     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
    253         ++m_ptr;
    254 
    255         // [-+]?
    256         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
    257             ++m_ptr;
    258 
    259         // [0-9]+
    260         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
    261             return TokError;
    262 
    263         ++m_ptr;
    264         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
    265             ++m_ptr;
    266     }
    267 
    268     token.type = TokNumber;
    269     token.end = m_ptr;
    270     Vector<char, 64> buffer(token.end - token.start + 1);
    271     int i;
    272     for (i = 0; i < token.end - token.start; i++) {
    273         ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
    274         buffer[i] = static_cast<char>(token.start[i]);
    275     }
    276     buffer[i] = 0;
    277     char* end;
    278     token.numberToken = WTF::strtod(buffer.data(), &end);
    279     ASSERT(buffer.data() + (token.end - token.start) == end);
    280     return TokNumber;
    281 }
    282 
    283 JSValue LiteralParser::parse(ParserState initialState)
    284 {
    285     ParserState state = initialState;
    286     MarkedArgumentBuffer objectStack;
    287     JSValue lastValue;
    288     Vector<ParserState, 16> stateStack;
    289     Vector<Identifier, 16> identifierStack;
    290     while (1) {
    291         switch(state) {
    292             startParseArray:
    293             case StartParseArray: {
    294                 JSArray* array = constructEmptyArray(m_exec);
    295                 objectStack.append(array);
    296                 // fallthrough
    297             }
    298             doParseArrayStartExpression:
    299             case DoParseArrayStartExpression: {
    300                 TokenType lastToken = m_lexer.currentToken().type;
    301                 if (m_lexer.next() == TokRBracket) {
    302                     if (lastToken == TokComma)
    303                         return JSValue();
    304                     m_lexer.next();
    305                     lastValue = objectStack.last();
    306                     objectStack.removeLast();
    307                     break;
    308                 }
    309 
    310                 stateStack.append(DoParseArrayEndExpression);
    311                 goto startParseExpression;
    312             }
    313             case DoParseArrayEndExpression: {
    314                  asArray(objectStack.last())->push(m_exec, lastValue);
    315 
    316                 if (m_lexer.currentToken().type == TokComma)
    317                     goto doParseArrayStartExpression;
    318 
    319                 if (m_lexer.currentToken().type != TokRBracket)
    320                     return JSValue();
    321 
    322                 m_lexer.next();
    323                 lastValue = objectStack.last();
    324                 objectStack.removeLast();
    325                 break;
    326             }
    327             startParseObject:
    328             case StartParseObject: {
    329                 JSObject* object = constructEmptyObject(m_exec);
    330                 objectStack.append(object);
    331 
    332                 TokenType type = m_lexer.next();
    333                 if (type == TokString) {
    334                     Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
    335 
    336                     // Check for colon
    337                     if (m_lexer.next() != TokColon)
    338                         return JSValue();
    339 
    340                     m_lexer.next();
    341                     identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
    342                     stateStack.append(DoParseObjectEndExpression);
    343                     goto startParseExpression;
    344                 } else if (type != TokRBrace)
    345                     return JSValue();
    346                 m_lexer.next();
    347                 lastValue = objectStack.last();
    348                 objectStack.removeLast();
    349                 break;
    350             }
    351             doParseObjectStartExpression:
    352             case DoParseObjectStartExpression: {
    353                 TokenType type = m_lexer.next();
    354                 if (type != TokString)
    355                     return JSValue();
    356                 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
    357 
    358                 // Check for colon
    359                 if (m_lexer.next() != TokColon)
    360                     return JSValue();
    361 
    362                 m_lexer.next();
    363                 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
    364                 stateStack.append(DoParseObjectEndExpression);
    365                 goto startParseExpression;
    366             }
    367             case DoParseObjectEndExpression:
    368             {
    369                 asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue);
    370                 identifierStack.removeLast();
    371                 if (m_lexer.currentToken().type == TokComma)
    372                     goto doParseObjectStartExpression;
    373                 if (m_lexer.currentToken().type != TokRBrace)
    374                     return JSValue();
    375                 m_lexer.next();
    376                 lastValue = objectStack.last();
    377                 objectStack.removeLast();
    378                 break;
    379             }
    380             startParseExpression:
    381             case StartParseExpression: {
    382                 switch (m_lexer.currentToken().type) {
    383                     case TokLBracket:
    384                         goto startParseArray;
    385                     case TokLBrace:
    386                         goto startParseObject;
    387                     case TokString: {
    388                         Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
    389                         m_lexer.next();
    390                         lastValue = jsString(m_exec, stringToken.stringToken);
    391                         break;
    392                     }
    393                     case TokNumber: {
    394                         Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
    395                         m_lexer.next();
    396                         lastValue = jsNumber(m_exec, numberToken.numberToken);
    397                         break;
    398                     }
    399                     case TokNull:
    400                         m_lexer.next();
    401                         lastValue = jsNull();
    402                         break;
    403 
    404                     case TokTrue:
    405                         m_lexer.next();
    406                         lastValue = jsBoolean(true);
    407                         break;
    408 
    409                     case TokFalse:
    410                         m_lexer.next();
    411                         lastValue = jsBoolean(false);
    412                         break;
    413 
    414                     default:
    415                         // Error
    416                         return JSValue();
    417                 }
    418                 break;
    419             }
    420             case StartParseStatement: {
    421                 switch (m_lexer.currentToken().type) {
    422                     case TokLBracket:
    423                     case TokNumber:
    424                     case TokString:
    425                         goto startParseExpression;
    426 
    427                     case TokLParen: {
    428                         m_lexer.next();
    429                         stateStack.append(StartParseStatementEndStatement);
    430                         goto startParseExpression;
    431                     }
    432                     default:
    433                         return JSValue();
    434                 }
    435             }
    436             case StartParseStatementEndStatement: {
    437                 ASSERT(stateStack.isEmpty());
    438                 if (m_lexer.currentToken().type != TokRParen)
    439                     return JSValue();
    440                 if (m_lexer.next() == TokEnd)
    441                     return lastValue;
    442                 return JSValue();
    443             }
    444             default:
    445                 ASSERT_NOT_REACHED();
    446         }
    447         if (stateStack.isEmpty())
    448             return lastValue;
    449         state = stateStack.last();
    450         stateStack.removeLast();
    451         continue;
    452     }
    453 }
    454 
    455 }
    456