1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "LiteralParser.h" 28 29 #include "JSArray.h" 30 #include "JSString.h" 31 #include "Lexer.h" 32 #include "UStringBuilder.h" 33 #include <wtf/ASCIICType.h> 34 #include <wtf/dtoa.h> 35 36 namespace JSC { 37 38 static inline bool isJSONWhiteSpace(const UChar& c) 39 { 40 // The JSON RFC 4627 defines a list of allowed characters to be considered 41 // insignificant white space: http://www.ietf.org/rfc/rfc4627.txt (2. JSON Grammar). 42 return c == ' ' || c == 0x9 || c == 0xA || c == 0xD; 43 } 44 45 LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) 46 { 47 while (m_ptr < m_end && isJSONWhiteSpace(*m_ptr)) 48 ++m_ptr; 49 50 ASSERT(m_ptr <= m_end); 51 if (m_ptr >= m_end) { 52 token.type = TokEnd; 53 token.start = token.end = m_ptr; 54 return TokEnd; 55 } 56 token.type = TokError; 57 token.start = m_ptr; 58 switch (*m_ptr) { 59 case '[': 60 token.type = TokLBracket; 61 token.end = ++m_ptr; 62 return TokLBracket; 63 case ']': 64 token.type = TokRBracket; 65 token.end = ++m_ptr; 66 return TokRBracket; 67 case '(': 68 token.type = TokLParen; 69 token.end = ++m_ptr; 70 return TokLBracket; 71 case ')': 72 token.type = TokRParen; 73 token.end = ++m_ptr; 74 return TokRBracket; 75 case '{': 76 token.type = TokLBrace; 77 token.end = ++m_ptr; 78 return TokLBrace; 79 case '}': 80 token.type = TokRBrace; 81 token.end = ++m_ptr; 82 return TokRBrace; 83 case ',': 84 token.type = TokComma; 85 token.end = ++m_ptr; 86 return TokComma; 87 case ':': 88 token.type = TokColon; 89 token.end = ++m_ptr; 90 return TokColon; 91 case '"': 92 if (m_mode == StrictJSON) 93 return lexString<StrictJSON>(token); 94 return lexString<NonStrictJSON>(token); 95 case 't': 96 if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { 97 m_ptr += 4; 98 token.type = TokTrue; 99 token.end = m_ptr; 100 return TokTrue; 101 } 102 break; 103 case 'f': 104 if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { 105 m_ptr += 5; 106 token.type = TokFalse; 107 token.end = m_ptr; 108 return TokFalse; 109 } 110 break; 111 case 'n': 112 if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { 113 m_ptr += 4; 114 token.type = TokNull; 115 token.end = m_ptr; 116 return TokNull; 117 } 118 break; 119 case '-': 120 case '0': 121 case '1': 122 case '2': 123 case '3': 124 case '4': 125 case '5': 126 case '6': 127 case '7': 128 case '8': 129 case '9': 130 return lexNumber(token); 131 } 132 return TokError; 133 } 134 135 template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) 136 { 137 return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; 138 } 139 140 // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. 141 template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) 142 { 143 ++m_ptr; 144 const UChar* runStart; 145 UStringBuilder builder; 146 do { 147 runStart = m_ptr; 148 while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) 149 ++m_ptr; 150 if (runStart < m_ptr) 151 builder.append(runStart, m_ptr - runStart); 152 if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { 153 ++m_ptr; 154 if (m_ptr >= m_end) 155 return TokError; 156 switch (*m_ptr) { 157 case '"': 158 builder.append('"'); 159 m_ptr++; 160 break; 161 case '\\': 162 builder.append('\\'); 163 m_ptr++; 164 break; 165 case '/': 166 builder.append('/'); 167 m_ptr++; 168 break; 169 case 'b': 170 builder.append('\b'); 171 m_ptr++; 172 break; 173 case 'f': 174 builder.append('\f'); 175 m_ptr++; 176 break; 177 case 'n': 178 builder.append('\n'); 179 m_ptr++; 180 break; 181 case 'r': 182 builder.append('\r'); 183 m_ptr++; 184 break; 185 case 't': 186 builder.append('\t'); 187 m_ptr++; 188 break; 189 190 case 'u': 191 if ((m_end - m_ptr) < 5) // uNNNN == 5 characters 192 return TokError; 193 for (int i = 1; i < 5; i++) { 194 if (!isASCIIHexDigit(m_ptr[i])) 195 return TokError; 196 } 197 builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); 198 m_ptr += 5; 199 break; 200 201 default: 202 return TokError; 203 } 204 } 205 } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); 206 207 if (m_ptr >= m_end || *m_ptr != '"') 208 return TokError; 209 210 token.stringToken = builder.toUString(); 211 token.type = TokString; 212 token.end = ++m_ptr; 213 return TokString; 214 } 215 216 LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) 217 { 218 // ES5 and json.org define numbers as 219 // number 220 // int 221 // int frac? exp? 222 // 223 // int 224 // -? 0 225 // -? digit1-9 digits? 226 // 227 // digits 228 // digit digits? 229 // 230 // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? 231 232 if (m_ptr < m_end && *m_ptr == '-') // -? 233 ++m_ptr; 234 235 // (0 | [1-9][0-9]*) 236 if (m_ptr < m_end && *m_ptr == '0') // 0 237 ++m_ptr; 238 else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] 239 ++m_ptr; 240 // [0-9]* 241 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) 242 ++m_ptr; 243 } else 244 return TokError; 245 246 // ('.' [0-9]+)? 247 if (m_ptr < m_end && *m_ptr == '.') { 248 ++m_ptr; 249 // [0-9]+ 250 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) 251 return TokError; 252 253 ++m_ptr; 254 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) 255 ++m_ptr; 256 } 257 258 // ([eE][+-]? [0-9]+)? 259 if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] 260 ++m_ptr; 261 262 // [-+]? 263 if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) 264 ++m_ptr; 265 266 // [0-9]+ 267 if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) 268 return TokError; 269 270 ++m_ptr; 271 while (m_ptr < m_end && isASCIIDigit(*m_ptr)) 272 ++m_ptr; 273 } 274 275 token.type = TokNumber; 276 token.end = m_ptr; 277 Vector<char, 64> buffer(token.end - token.start + 1); 278 int i; 279 for (i = 0; i < token.end - token.start; i++) { 280 ASSERT(static_cast<char>(token.start[i]) == token.start[i]); 281 buffer[i] = static_cast<char>(token.start[i]); 282 } 283 buffer[i] = 0; 284 char* end; 285 token.numberToken = WTF::strtod(buffer.data(), &end); 286 ASSERT(buffer.data() + (token.end - token.start) == end); 287 return TokNumber; 288 } 289 290 JSValue LiteralParser::parse(ParserState initialState) 291 { 292 ParserState state = initialState; 293 MarkedArgumentBuffer objectStack; 294 JSValue lastValue; 295 Vector<ParserState, 16> stateStack; 296 Vector<Identifier, 16> identifierStack; 297 while (1) { 298 switch(state) { 299 startParseArray: 300 case StartParseArray: { 301 JSArray* array = constructEmptyArray(m_exec); 302 objectStack.append(array); 303 // fallthrough 304 } 305 doParseArrayStartExpression: 306 case DoParseArrayStartExpression: { 307 TokenType lastToken = m_lexer.currentToken().type; 308 if (m_lexer.next() == TokRBracket) { 309 if (lastToken == TokComma) 310 return JSValue(); 311 m_lexer.next(); 312 lastValue = objectStack.last(); 313 objectStack.removeLast(); 314 break; 315 } 316 317 stateStack.append(DoParseArrayEndExpression); 318 goto startParseExpression; 319 } 320 case DoParseArrayEndExpression: { 321 asArray(objectStack.last())->push(m_exec, lastValue); 322 323 if (m_lexer.currentToken().type == TokComma) 324 goto doParseArrayStartExpression; 325 326 if (m_lexer.currentToken().type != TokRBracket) 327 return JSValue(); 328 329 m_lexer.next(); 330 lastValue = objectStack.last(); 331 objectStack.removeLast(); 332 break; 333 } 334 startParseObject: 335 case StartParseObject: { 336 JSObject* object = constructEmptyObject(m_exec); 337 objectStack.append(object); 338 339 TokenType type = m_lexer.next(); 340 if (type == TokString) { 341 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); 342 343 // Check for colon 344 if (m_lexer.next() != TokColon) 345 return JSValue(); 346 347 m_lexer.next(); 348 identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); 349 stateStack.append(DoParseObjectEndExpression); 350 goto startParseExpression; 351 } else if (type != TokRBrace) 352 return JSValue(); 353 m_lexer.next(); 354 lastValue = objectStack.last(); 355 objectStack.removeLast(); 356 break; 357 } 358 doParseObjectStartExpression: 359 case DoParseObjectStartExpression: { 360 TokenType type = m_lexer.next(); 361 if (type != TokString) 362 return JSValue(); 363 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); 364 365 // Check for colon 366 if (m_lexer.next() != TokColon) 367 return JSValue(); 368 369 m_lexer.next(); 370 identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); 371 stateStack.append(DoParseObjectEndExpression); 372 goto startParseExpression; 373 } 374 case DoParseObjectEndExpression: 375 { 376 asObject(objectStack.last())->putDirect(m_exec->globalData(), identifierStack.last(), lastValue); 377 identifierStack.removeLast(); 378 if (m_lexer.currentToken().type == TokComma) 379 goto doParseObjectStartExpression; 380 if (m_lexer.currentToken().type != TokRBrace) 381 return JSValue(); 382 m_lexer.next(); 383 lastValue = objectStack.last(); 384 objectStack.removeLast(); 385 break; 386 } 387 startParseExpression: 388 case StartParseExpression: { 389 switch (m_lexer.currentToken().type) { 390 case TokLBracket: 391 goto startParseArray; 392 case TokLBrace: 393 goto startParseObject; 394 case TokString: { 395 Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); 396 m_lexer.next(); 397 lastValue = jsString(m_exec, stringToken.stringToken); 398 break; 399 } 400 case TokNumber: { 401 Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); 402 m_lexer.next(); 403 lastValue = jsNumber(numberToken.numberToken); 404 break; 405 } 406 case TokNull: 407 m_lexer.next(); 408 lastValue = jsNull(); 409 break; 410 411 case TokTrue: 412 m_lexer.next(); 413 lastValue = jsBoolean(true); 414 break; 415 416 case TokFalse: 417 m_lexer.next(); 418 lastValue = jsBoolean(false); 419 break; 420 421 default: 422 // Error 423 return JSValue(); 424 } 425 break; 426 } 427 case StartParseStatement: { 428 switch (m_lexer.currentToken().type) { 429 case TokLBracket: 430 case TokNumber: 431 case TokString: 432 goto startParseExpression; 433 434 case TokLParen: { 435 m_lexer.next(); 436 stateStack.append(StartParseStatementEndStatement); 437 goto startParseExpression; 438 } 439 default: 440 return JSValue(); 441 } 442 } 443 case StartParseStatementEndStatement: { 444 ASSERT(stateStack.isEmpty()); 445 if (m_lexer.currentToken().type != TokRParen) 446 return JSValue(); 447 if (m_lexer.next() == TokEnd) 448 return lastValue; 449 return JSValue(); 450 } 451 default: 452 ASSERT_NOT_REACHED(); 453 } 454 if (stateStack.isEmpty()) 455 return lastValue; 456 state = stateStack.last(); 457 stateStack.removeLast(); 458 continue; 459 } 460 } 461 462 } 463