Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #include "config.h"
     27 #include "LiteralParser.h"
     28 
     29 #include "JSArray.h"
     30 #include "JSString.h"
     31 #include "Lexer.h"
     32 #include "UStringBuilder.h"
     33 #include <wtf/ASCIICType.h>
     34 #include <wtf/dtoa.h>
     35 
     36 namespace JSC {
     37 
     38 static inline bool isJSONWhiteSpace(const UChar& c)
     39 {
     40     // The JSON RFC 4627 defines a list of allowed characters to be considered
     41     // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar).
     42     return c == ' ' || c == 0x9 || c == 0xA || c == 0xD;
     43 }
     44 
     45 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
     46 {
     47     while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr))
     48         ++m_ptr;
     49 
     50     ASSERT(m_ptr <= m_end);
     51     if (m_ptr >= m_end) {
     52         token.type = TokEnd;
     53         token.start = token.end = m_ptr;
     54         return TokEnd;
     55     }
     56     token.type = TokError;
     57     token.start = m_ptr;
     58     switch (*m_ptr) {
     59         case '[':
     60             token.type = TokLBracket;
     61             token.end = ++m_ptr;
     62             return TokLBracket;
     63         case ']':
     64             token.type = TokRBracket;
     65             token.end = ++m_ptr;
     66             return TokRBracket;
     67         case '(':
     68             token.type = TokLParen;
     69             token.end = ++m_ptr;
     70             return TokLBracket;
     71         case ')':
     72             token.type = TokRParen;
     73             token.end = ++m_ptr;
     74             return TokRBracket;
     75         case '{':
     76             token.type = TokLBrace;
     77             token.end = ++m_ptr;
     78             return TokLBrace;
     79         case '}':
     80             token.type = TokRBrace;
     81             token.end = ++m_ptr;
     82             return TokRBrace;
     83         case ',':
     84             token.type = TokComma;
     85             token.end = ++m_ptr;
     86             return TokComma;
     87         case ':':
     88             token.type = TokColon;
     89             token.end = ++m_ptr;
     90             return TokColon;
     91         case '"':
     92             if (m_mode == StrictJSON)
     93                 return lexString<StrictJSON>(token);
     94             return lexString<NonStrictJSON>(token);
     95         case 't':
     96             if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') {
     97                 m_ptr += 4;
     98                 token.type = TokTrue;
     99                 token.end = m_ptr;
    100                 return TokTrue;
    101             }
    102             break;
    103         case 'f':
    104             if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') {
    105                 m_ptr += 5;
    106                 token.type = TokFalse;
    107                 token.end = m_ptr;
    108                 return TokFalse;
    109             }
    110             break;
    111         case 'n':
    112             if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') {
    113                 m_ptr += 4;
    114                 token.type = TokNull;
    115                 token.end = m_ptr;
    116                 return TokNull;
    117             }
    118             break;
    119         case '-':
    120         case '0':
    121         case '1':
    122         case '2':
    123         case '3':
    124         case '4':
    125         case '5':
    126         case '6':
    127         case '7':
    128         case '8':
    129         case '9':
    130             return lexNumber(token);
    131     }
    132     return TokError;
    133 }
    134 
    135 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
    136 {
    137     return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t';
    138 }
    139 
    140 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
    141 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
    142 {
    143     ++m_ptr;
    144     const UChar* runStart;
    145     UStringBuilder builder;
    146     do {
    147         runStart = m_ptr;
    148         while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
    149             ++m_ptr;
    150         if (runStart < m_ptr)
    151             builder.append(runStart, m_ptr - runStart);
    152         if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') {
    153             ++m_ptr;
    154             if (m_ptr >= m_end)
    155                 return TokError;
    156             switch (*m_ptr) {
    157                 case '"':
    158                     builder.append('"');
    159                     m_ptr++;
    160                     break;
    161                 case '\\':
    162                     builder.append('\\');
    163                     m_ptr++;
    164                     break;
    165                 case '/':
    166                     builder.append('/');
    167                     m_ptr++;
    168                     break;
    169                 case 'b':
    170                     builder.append('\b');
    171                     m_ptr++;
    172                     break;
    173                 case 'f':
    174                     builder.append('\f');
    175                     m_ptr++;
    176                     break;
    177                 case 'n':
    178                     builder.append('\n');
    179                     m_ptr++;
    180                     break;
    181                 case 'r':
    182                     builder.append('\r');
    183                     m_ptr++;
    184                     break;
    185                 case 't':
    186                     builder.append('\t');
    187                     m_ptr++;
    188                     break;
    189 
    190                 case 'u':
    191                     if ((m_end - m_ptr) < 5) // uNNNN == 5 characters
    192                         return TokError;
    193                     for (int i = 1; i < 5; i++) {
    194                         if (!isASCIIHexDigit(m_ptr[i]))
    195                             return TokError;
    196                     }
    197                     builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4]));
    198                     m_ptr += 5;
    199                     break;
    200 
    201                 default:
    202                     return TokError;
    203             }
    204         }
    205     } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"');
    206 
    207     if (m_ptr >= m_end || *m_ptr != '"')
    208         return TokError;
    209 
    210     token.stringToken = builder.toUString();
    211     token.type = TokString;
    212     token.end = ++m_ptr;
    213     return TokString;
    214 }
    215 
    216 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
    217 {
    218     // ES5 and json.org define numbers as
    219     // number
    220     //     int
    221     //     int frac? exp?
    222     //
    223     // int
    224     //     -? 0
    225     //     -? digit1-9 digits?
    226     //
    227     // digits
    228     //     digit digits?
    229     //
    230     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?
    231 
    232     if (m_ptr < m_end && *m_ptr == '-') // -?
    233         ++m_ptr;
    234 
    235     // (0 | [1-9][0-9]*)
    236     if (m_ptr < m_end && *m_ptr == '0') // 0
    237         ++m_ptr;
    238     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9]
    239         ++m_ptr;
    240         // [0-9]*
    241         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
    242             ++m_ptr;
    243     } else
    244         return TokError;
    245 
    246     // ('.' [0-9]+)?
    247     if (m_ptr < m_end && *m_ptr == '.') {
    248         ++m_ptr;
    249         // [0-9]+
    250         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
    251             return TokError;
    252 
    253         ++m_ptr;
    254         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
    255             ++m_ptr;
    256     }
    257 
    258     //  ([eE][+-]? [0-9]+)?
    259     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE]
    260         ++m_ptr;
    261 
    262         // [-+]?
    263         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+'))
    264             ++m_ptr;
    265 
    266         // [0-9]+
    267         if (m_ptr >= m_end || !isASCIIDigit(*m_ptr))
    268             return TokError;
    269 
    270         ++m_ptr;
    271         while (m_ptr < m_end && isASCIIDigit(*m_ptr))
    272             ++m_ptr;
    273     }
    274 
    275     token.type = TokNumber;
    276     token.end = m_ptr;
    277     Vector<char, 64> buffer(token.end - token.start + 1);
    278     int i;
    279     for (i = 0; i < token.end - token.start; i++) {
    280         ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
    281         buffer[i] = static_cast<char>(token.start[i]);
    282     }
    283     buffer[i] = 0;
    284     char* end;
    285     token.numberToken = WTF::strtod(buffer.data(), &end);
    286     ASSERT(buffer.data() + (token.end - token.start) == end);
    287     return TokNumber;
    288 }
    289 
    290 JSValue LiteralParser::parse(ParserState initialState)
    291 {
    292     ParserState state = initialState;
    293     MarkedArgumentBuffer objectStack;
    294     JSValue lastValue;
    295     Vector<ParserState, 16> stateStack;
    296     Vector<Identifier, 16> identifierStack;
    297     while (1) {
    298         switch(state) {
    299             startParseArray:
    300             case StartParseArray: {
    301                 JSArray* array = constructEmptyArray(m_exec);
    302                 objectStack.append(array);
    303                 // fallthrough
    304             }
    305             doParseArrayStartExpression:
    306             case DoParseArrayStartExpression: {
    307                 TokenType lastToken = m_lexer.currentToken().type;
    308                 if (m_lexer.next() == TokRBracket) {
    309                     if (lastToken == TokComma)
    310                         return JSValue();
    311                     m_lexer.next();
    312                     lastValue = objectStack.last();
    313                     objectStack.removeLast();
    314                     break;
    315                 }
    316 
    317                 stateStack.append(DoParseArrayEndExpression);
    318                 goto startParseExpression;
    319             }
    320             case DoParseArrayEndExpression: {
    321                  asArray(objectStack.last())->push(m_exec, lastValue);
    322 
    323                 if (m_lexer.currentToken().type == TokComma)
    324                     goto doParseArrayStartExpression;
    325 
    326                 if (m_lexer.currentToken().type != TokRBracket)
    327                     return JSValue();
    328 
    329                 m_lexer.next();
    330                 lastValue = objectStack.last();
    331                 objectStack.removeLast();
    332                 break;
    333             }
    334             startParseObject:
    335             case StartParseObject: {
    336                 JSObject* object = constructEmptyObject(m_exec);
    337                 objectStack.append(object);
    338 
    339                 TokenType type = m_lexer.next();
    340                 if (type == TokString) {
    341                     Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
    342 
    343                     // Check for colon
    344                     if (m_lexer.next() != TokColon)
    345                         return JSValue();
    346 
    347                     m_lexer.next();
    348                     identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
    349                     stateStack.append(DoParseObjectEndExpression);
    350                     goto startParseExpression;
    351                 } else if (type != TokRBrace)
    352                     return JSValue();
    353                 m_lexer.next();
    354                 lastValue = objectStack.last();
    355                 objectStack.removeLast();
    356                 break;
    357             }
    358             doParseObjectStartExpression:
    359             case DoParseObjectStartExpression: {
    360                 TokenType type = m_lexer.next();
    361                 if (type != TokString)
    362                     return JSValue();
    363                 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
    364 
    365                 // Check for colon
    366                 if (m_lexer.next() != TokColon)
    367                     return JSValue();
    368 
    369                 m_lexer.next();
    370                 identifierStack.append(Identifier(m_exec, identifierToken.stringToken));
    371                 stateStack.append(DoParseObjectEndExpression);
    372                 goto startParseExpression;
    373             }
    374             case DoParseObjectEndExpression:
    375             {
    376                 asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue);
    377                 identifierStack.removeLast();
    378                 if (m_lexer.currentToken().type == TokComma)
    379                     goto doParseObjectStartExpression;
    380                 if (m_lexer.currentToken().type != TokRBrace)
    381                     return JSValue();
    382                 m_lexer.next();
    383                 lastValue = objectStack.last();
    384                 objectStack.removeLast();
    385                 break;
    386             }
    387             startParseExpression:
    388             case StartParseExpression: {
    389                 switch (m_lexer.currentToken().type) {
    390                     case TokLBracket:
    391                         goto startParseArray;
    392                     case TokLBrace:
    393                         goto startParseObject;
    394                     case TokString: {
    395                         Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
    396                         m_lexer.next();
    397                         lastValue = jsString(m_exec, stringToken.stringToken);
    398                         break;
    399                     }
    400                     case TokNumber: {
    401                         Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
    402                         m_lexer.next();
    403                         lastValue = jsNumber(numberToken.numberToken);
    404                         break;
    405                     }
    406                     case TokNull:
    407                         m_lexer.next();
    408                         lastValue = jsNull();
    409                         break;
    410 
    411                     case TokTrue:
    412                         m_lexer.next();
    413                         lastValue = jsBoolean(true);
    414                         break;
    415 
    416                     case TokFalse:
    417                         m_lexer.next();
    418                         lastValue = jsBoolean(false);
    419                         break;
    420 
    421                     default:
    422                         // Error
    423                         return JSValue();
    424                 }
    425                 break;
    426             }
    427             case StartParseStatement: {
    428                 switch (m_lexer.currentToken().type) {
    429                     case TokLBracket:
    430                     case TokNumber:
    431                     case TokString:
    432                         goto startParseExpression;
    433 
    434                     case TokLParen: {
    435                         m_lexer.next();
    436                         stateStack.append(StartParseStatementEndStatement);
    437                         goto startParseExpression;
    438                     }
    439                     default:
    440                         return JSValue();
    441                 }
    442             }
    443             case StartParseStatementEndStatement: {
    444                 ASSERT(stateStack.isEmpty());
    445                 if (m_lexer.currentToken().type != TokRParen)
    446                     return JSValue();
    447                 if (m_lexer.next() == TokEnd)
    448                     return lastValue;
    449                 return JSValue();
    450             }
    451             default:
    452                 ASSERT_NOT_REACHED();
    453         }
    454         if (stateStack.isEmpty())
    455             return lastValue;
    456         state = stateStack.last();
    457         stateStack.removeLast();
    458         continue;
    459     }
    460 }
    461 
    462 }
    463