1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_PREPARSER_H 29 #define V8_PREPARSER_H 30 31 #include "hashmap.h" 32 #include "token.h" 33 #include "scanner.h" 34 35 namespace v8 { 36 37 namespace internal { 38 class UnicodeCache; 39 } 40 41 namespace preparser { 42 43 typedef uint8_t byte; 44 45 // Preparsing checks a JavaScript program and emits preparse-data that helps 46 // a later parsing to be faster. 47 // See preparse-data-format.h for the data format. 48 49 // The PreParser checks that the syntax follows the grammar for JavaScript, 50 // and collects some information about the program along the way. 51 // The grammar check is only performed in order to understand the program 52 // sufficiently to deduce some information about it, that can be used 53 // to speed up later parsing. Finding errors is not the goal of pre-parsing, 54 // rather it is to speed up properly written and correct programs. 55 // That means that contextual checks (like a label being declared where 56 // it is used) are generally omitted. 57 58 namespace i = v8::internal; 59 60 class DuplicateFinder { 61 public: 62 explicit DuplicateFinder(i::UnicodeCache* constants) 63 : unicode_constants_(constants), 64 backing_store_(16), 65 map_(&Match) { } 66 67 int AddAsciiSymbol(i::Vector<const char> key, int value); 68 int AddUtf16Symbol(i::Vector<const uint16_t> key, int value); 69 // Add a a number literal by converting it (if necessary) 70 // to the string that ToString(ToNumber(literal)) would generate. 71 // and then adding that string with AddAsciiSymbol. 72 // This string is the actual value used as key in an object literal, 73 // and the one that must be different from the other keys. 74 int AddNumber(i::Vector<const char> key, int value); 75 76 private: 77 int AddSymbol(i::Vector<const byte> key, bool is_ascii, int value); 78 // Backs up the key and its length in the backing store. 79 // The backup is stored with a base 127 encoding of the 80 // length (plus a bit saying whether the string is ASCII), 81 // followed by the bytes of the key. 82 byte* BackupKey(i::Vector<const byte> key, bool is_ascii); 83 84 // Compare two encoded keys (both pointing into the backing store) 85 // for having the same base-127 encoded lengths and ASCII-ness, 86 // and then having the same 'length' bytes following. 87 static bool Match(void* first, void* second); 88 // Creates a hash from a sequence of bytes. 89 static uint32_t Hash(i::Vector<const byte> key, bool is_ascii); 90 // Checks whether a string containing a JS number is its canonical 91 // form. 92 static bool IsNumberCanonical(i::Vector<const char> key); 93 94 // Size of buffer. Sufficient for using it to call DoubleToCString in 95 // from conversions.h. 96 static const int kBufferSize = 100; 97 98 i::UnicodeCache* unicode_constants_; 99 // Backing store used to store strings used as hashmap keys. 100 i::SequenceCollector<unsigned char> backing_store_; 101 i::HashMap map_; 102 // Buffer used for string->number->canonical string conversions. 103 char number_buffer_[kBufferSize]; 104 }; 105 106 107 class PreParser { 108 public: 109 enum PreParseResult { 110 kPreParseStackOverflow, 111 kPreParseSuccess 112 }; 113 114 115 PreParser(i::Scanner* scanner, 116 i::ParserRecorder* log, 117 uintptr_t stack_limit, 118 bool allow_lazy, 119 bool allow_natives_syntax, 120 bool allow_modules) 121 : scanner_(scanner), 122 log_(log), 123 scope_(NULL), 124 stack_limit_(stack_limit), 125 strict_mode_violation_location_(i::Scanner::Location::invalid()), 126 strict_mode_violation_type_(NULL), 127 stack_overflow_(false), 128 allow_lazy_(allow_lazy), 129 allow_modules_(allow_modules), 130 allow_natives_syntax_(allow_natives_syntax), 131 parenthesized_function_(false), 132 harmony_scoping_(scanner->HarmonyScoping()) { } 133 134 ~PreParser() {} 135 136 // Pre-parse the program from the character stream; returns true on 137 // success (even if parsing failed, the pre-parse data successfully 138 // captured the syntax error), and false if a stack-overflow happened 139 // during parsing. 140 static PreParseResult PreParseProgram(i::Scanner* scanner, 141 i::ParserRecorder* log, 142 int flags, 143 uintptr_t stack_limit) { 144 bool allow_lazy = (flags & i::kAllowLazy) != 0; 145 bool allow_natives_syntax = (flags & i::kAllowNativesSyntax) != 0; 146 bool allow_modules = (flags & i::kAllowModules) != 0; 147 return PreParser(scanner, log, stack_limit, allow_lazy, 148 allow_natives_syntax, allow_modules).PreParse(); 149 } 150 151 // Parses a single function literal, from the opening parentheses before 152 // parameters to the closing brace after the body. 153 // Returns a FunctionEntry describing the body of the funciton in enough 154 // detail that it can be lazily compiled. 155 // The scanner is expected to have matched the "function" keyword and 156 // parameters, and have consumed the initial '{'. 157 // At return, unless an error occured, the scanner is positioned before the 158 // the final '}'. 159 PreParseResult PreParseLazyFunction(i::LanguageMode mode, 160 i::ParserRecorder* log); 161 162 private: 163 // Used to detect duplicates in object literals. Each of the values 164 // kGetterProperty, kSetterProperty and kValueProperty represents 165 // a type of object literal property. When parsing a property, its 166 // type value is stored in the DuplicateFinder for the property name. 167 // Values are chosen so that having intersection bits means the there is 168 // an incompatibility. 169 // I.e., you can add a getter to a property that already has a setter, since 170 // kGetterProperty and kSetterProperty doesn't intersect, but not if it 171 // already has a getter or a value. Adding the getter to an existing 172 // setter will store the value (kGetterProperty | kSetterProperty), which 173 // is incompatible with adding any further properties. 174 enum PropertyType { 175 kNone = 0, 176 // Bit patterns representing different object literal property types. 177 kGetterProperty = 1, 178 kSetterProperty = 2, 179 kValueProperty = 7, 180 // Helper constants. 181 kValueFlag = 4 182 }; 183 184 // Checks the type of conflict based on values coming from PropertyType. 185 bool HasConflict(int type1, int type2) { return (type1 & type2) != 0; } 186 bool IsDataDataConflict(int type1, int type2) { 187 return ((type1 & type2) & kValueFlag) != 0; 188 } 189 bool IsDataAccessorConflict(int type1, int type2) { 190 return ((type1 ^ type2) & kValueFlag) != 0; 191 } 192 bool IsAccessorAccessorConflict(int type1, int type2) { 193 return ((type1 | type2) & kValueFlag) == 0; 194 } 195 196 197 void CheckDuplicate(DuplicateFinder* finder, 198 i::Token::Value property, 199 int type, 200 bool* ok); 201 202 // These types form an algebra over syntactic categories that is just 203 // rich enough to let us recognize and propagate the constructs that 204 // are either being counted in the preparser data, or is important 205 // to throw the correct syntax error exceptions. 206 207 enum ScopeType { 208 kTopLevelScope, 209 kFunctionScope 210 }; 211 212 enum VariableDeclarationContext { 213 kSourceElement, 214 kStatement, 215 kForStatement 216 }; 217 218 // If a list of variable declarations includes any initializers. 219 enum VariableDeclarationProperties { 220 kHasInitializers, 221 kHasNoInitializers 222 }; 223 224 class Expression; 225 226 class Identifier { 227 public: 228 static Identifier Default() { 229 return Identifier(kUnknownIdentifier); 230 } 231 static Identifier Eval() { 232 return Identifier(kEvalIdentifier); 233 } 234 static Identifier Arguments() { 235 return Identifier(kArgumentsIdentifier); 236 } 237 static Identifier FutureReserved() { 238 return Identifier(kFutureReservedIdentifier); 239 } 240 static Identifier FutureStrictReserved() { 241 return Identifier(kFutureStrictReservedIdentifier); 242 } 243 bool IsEval() { return type_ == kEvalIdentifier; } 244 bool IsArguments() { return type_ == kArgumentsIdentifier; } 245 bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; } 246 bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; } 247 bool IsFutureStrictReserved() { 248 return type_ == kFutureStrictReservedIdentifier; 249 } 250 bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; } 251 252 private: 253 enum Type { 254 kUnknownIdentifier, 255 kFutureReservedIdentifier, 256 kFutureStrictReservedIdentifier, 257 kEvalIdentifier, 258 kArgumentsIdentifier 259 }; 260 explicit Identifier(Type type) : type_(type) { } 261 Type type_; 262 263 friend class Expression; 264 }; 265 266 // Bits 0 and 1 are used to identify the type of expression: 267 // If bit 0 is set, it's an identifier. 268 // if bit 1 is set, it's a string literal. 269 // If neither is set, it's no particular type, and both set isn't 270 // use yet. 271 // Bit 2 is used to mark the expression as being parenthesized, 272 // so "(foo)" isn't recognized as a pure identifier (and possible label). 273 class Expression { 274 public: 275 static Expression Default() { 276 return Expression(kUnknownExpression); 277 } 278 279 static Expression FromIdentifier(Identifier id) { 280 return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift)); 281 } 282 283 static Expression StringLiteral() { 284 return Expression(kUnknownStringLiteral); 285 } 286 287 static Expression UseStrictStringLiteral() { 288 return Expression(kUseStrictString); 289 } 290 291 static Expression This() { 292 return Expression(kThisExpression); 293 } 294 295 static Expression ThisProperty() { 296 return Expression(kThisPropertyExpression); 297 } 298 299 static Expression StrictFunction() { 300 return Expression(kStrictFunctionExpression); 301 } 302 303 bool IsIdentifier() { 304 return (code_ & kIdentifierFlag) != 0; 305 } 306 307 // Only works corretly if it is actually an identifier expression. 308 PreParser::Identifier AsIdentifier() { 309 return PreParser::Identifier( 310 static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift)); 311 } 312 313 bool IsParenthesized() { 314 // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized. 315 return (code_ & 7) > 4; 316 } 317 318 bool IsRawIdentifier() { 319 return !IsParenthesized() && IsIdentifier(); 320 } 321 322 bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; } 323 324 bool IsRawStringLiteral() { 325 return !IsParenthesized() && IsStringLiteral(); 326 } 327 328 bool IsUseStrictLiteral() { 329 return (code_ & kStringLiteralMask) == kUseStrictString; 330 } 331 332 bool IsThis() { 333 return code_ == kThisExpression; 334 } 335 336 bool IsThisProperty() { 337 return code_ == kThisPropertyExpression; 338 } 339 340 bool IsStrictFunction() { 341 return code_ == kStrictFunctionExpression; 342 } 343 344 Expression Parenthesize() { 345 int type = code_ & 3; 346 if (type != 0) { 347 // Identifiers and string literals can be parenthesized. 348 // They no longer work as labels or directive prologues, 349 // but are still recognized in other contexts. 350 return Expression(code_ | kParentesizedExpressionFlag); 351 } 352 // For other types of expressions, it's not important to remember 353 // the parentheses. 354 return *this; 355 } 356 357 private: 358 // First two/three bits are used as flags. 359 // Bit 0 and 1 represent identifiers or strings literals, and are 360 // mutually exclusive, but can both be absent. 361 // If bit 0 or 1 are set, bit 2 marks that the expression has 362 // been wrapped in parentheses (a string literal can no longer 363 // be a directive prologue, and an identifier can no longer be 364 // a label. 365 enum { 366 kUnknownExpression = 0, 367 // Identifiers 368 kIdentifierFlag = 1, // Used to detect labels. 369 kIdentifierShift = 3, 370 371 kStringLiteralFlag = 2, // Used to detect directive prologue. 372 kUnknownStringLiteral = kStringLiteralFlag, 373 kUseStrictString = kStringLiteralFlag | 8, 374 kStringLiteralMask = kUseStrictString, 375 376 kParentesizedExpressionFlag = 4, // Only if identifier or string literal. 377 378 // Below here applies if neither identifier nor string literal. 379 kThisExpression = 4, 380 kThisPropertyExpression = 8, 381 kStrictFunctionExpression = 12 382 }; 383 384 explicit Expression(int expression_code) : code_(expression_code) { } 385 386 int code_; 387 }; 388 389 class Statement { 390 public: 391 static Statement Default() { 392 return Statement(kUnknownStatement); 393 } 394 395 static Statement FunctionDeclaration() { 396 return Statement(kFunctionDeclaration); 397 } 398 399 // Creates expression statement from expression. 400 // Preserves being an unparenthesized string literal, possibly 401 // "use strict". 402 static Statement ExpressionStatement(Expression expression) { 403 if (!expression.IsParenthesized()) { 404 if (expression.IsUseStrictLiteral()) { 405 return Statement(kUseStrictExpressionStatement); 406 } 407 if (expression.IsStringLiteral()) { 408 return Statement(kStringLiteralExpressionStatement); 409 } 410 } 411 return Default(); 412 } 413 414 bool IsStringLiteral() { 415 return code_ != kUnknownStatement; 416 } 417 418 bool IsUseStrictLiteral() { 419 return code_ == kUseStrictExpressionStatement; 420 } 421 422 bool IsFunctionDeclaration() { 423 return code_ == kFunctionDeclaration; 424 } 425 426 private: 427 enum Type { 428 kUnknownStatement, 429 kStringLiteralExpressionStatement, 430 kUseStrictExpressionStatement, 431 kFunctionDeclaration 432 }; 433 434 explicit Statement(Type code) : code_(code) {} 435 Type code_; 436 }; 437 438 enum SourceElements { 439 kUnknownSourceElements 440 }; 441 442 typedef int Arguments; 443 444 class Scope { 445 public: 446 Scope(Scope** variable, ScopeType type) 447 : variable_(variable), 448 prev_(*variable), 449 type_(type), 450 materialized_literal_count_(0), 451 expected_properties_(0), 452 with_nesting_count_(0), 453 language_mode_( 454 (prev_ != NULL) ? prev_->language_mode() : i::CLASSIC_MODE) { 455 *variable = this; 456 } 457 ~Scope() { *variable_ = prev_; } 458 void NextMaterializedLiteralIndex() { materialized_literal_count_++; } 459 void AddProperty() { expected_properties_++; } 460 ScopeType type() { return type_; } 461 int expected_properties() { return expected_properties_; } 462 int materialized_literal_count() { return materialized_literal_count_; } 463 bool IsInsideWith() { return with_nesting_count_ != 0; } 464 bool is_classic_mode() { 465 return language_mode_ == i::CLASSIC_MODE; 466 } 467 i::LanguageMode language_mode() { 468 return language_mode_; 469 } 470 void set_language_mode(i::LanguageMode language_mode) { 471 language_mode_ = language_mode; 472 } 473 void EnterWith() { with_nesting_count_++; } 474 void LeaveWith() { with_nesting_count_--; } 475 476 private: 477 Scope** const variable_; 478 Scope* const prev_; 479 const ScopeType type_; 480 int materialized_literal_count_; 481 int expected_properties_; 482 int with_nesting_count_; 483 i::LanguageMode language_mode_; 484 }; 485 486 // Preparse the program. Only called in PreParseProgram after creating 487 // the instance. 488 PreParseResult PreParse() { 489 Scope top_scope(&scope_, kTopLevelScope); 490 bool ok = true; 491 int start_position = scanner_->peek_location().beg_pos; 492 ParseSourceElements(i::Token::EOS, &ok); 493 if (stack_overflow_) return kPreParseStackOverflow; 494 if (!ok) { 495 ReportUnexpectedToken(scanner_->current_token()); 496 } else if (!scope_->is_classic_mode()) { 497 CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok); 498 } 499 return kPreParseSuccess; 500 } 501 502 // Report syntax error 503 void ReportUnexpectedToken(i::Token::Value token); 504 void ReportMessageAt(i::Scanner::Location location, 505 const char* type, 506 const char* name_opt) { 507 log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt); 508 } 509 void ReportMessageAt(int start_pos, 510 int end_pos, 511 const char* type, 512 const char* name_opt) { 513 log_->LogMessage(start_pos, end_pos, type, name_opt); 514 } 515 516 void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok); 517 518 // All ParseXXX functions take as the last argument an *ok parameter 519 // which is set to false if parsing failed; it is unchanged otherwise. 520 // By making the 'exception handling' explicit, we are forced to check 521 // for failure at the call sites. 522 Statement ParseSourceElement(bool* ok); 523 SourceElements ParseSourceElements(int end_token, bool* ok); 524 Statement ParseStatement(bool* ok); 525 Statement ParseFunctionDeclaration(bool* ok); 526 Statement ParseBlock(bool* ok); 527 Statement ParseVariableStatement(VariableDeclarationContext var_context, 528 bool* ok); 529 Statement ParseVariableDeclarations(VariableDeclarationContext var_context, 530 VariableDeclarationProperties* decl_props, 531 int* num_decl, 532 bool* ok); 533 Statement ParseExpressionOrLabelledStatement(bool* ok); 534 Statement ParseIfStatement(bool* ok); 535 Statement ParseContinueStatement(bool* ok); 536 Statement ParseBreakStatement(bool* ok); 537 Statement ParseReturnStatement(bool* ok); 538 Statement ParseWithStatement(bool* ok); 539 Statement ParseSwitchStatement(bool* ok); 540 Statement ParseDoWhileStatement(bool* ok); 541 Statement ParseWhileStatement(bool* ok); 542 Statement ParseForStatement(bool* ok); 543 Statement ParseThrowStatement(bool* ok); 544 Statement ParseTryStatement(bool* ok); 545 Statement ParseDebuggerStatement(bool* ok); 546 547 Expression ParseExpression(bool accept_IN, bool* ok); 548 Expression ParseAssignmentExpression(bool accept_IN, bool* ok); 549 Expression ParseConditionalExpression(bool accept_IN, bool* ok); 550 Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok); 551 Expression ParseUnaryExpression(bool* ok); 552 Expression ParsePostfixExpression(bool* ok); 553 Expression ParseLeftHandSideExpression(bool* ok); 554 Expression ParseNewExpression(bool* ok); 555 Expression ParseMemberExpression(bool* ok); 556 Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok); 557 Expression ParsePrimaryExpression(bool* ok); 558 Expression ParseArrayLiteral(bool* ok); 559 Expression ParseObjectLiteral(bool* ok); 560 Expression ParseRegExpLiteral(bool seen_equal, bool* ok); 561 Expression ParseV8Intrinsic(bool* ok); 562 563 Arguments ParseArguments(bool* ok); 564 Expression ParseFunctionLiteral(bool* ok); 565 void ParseLazyFunctionLiteralBody(bool* ok); 566 567 Identifier ParseIdentifier(bool* ok); 568 Identifier ParseIdentifierName(bool* ok); 569 Identifier ParseIdentifierNameOrGetOrSet(bool* is_get, 570 bool* is_set, 571 bool* ok); 572 573 // Logs the currently parsed literal as a symbol in the preparser data. 574 void LogSymbol(); 575 // Log the currently parsed identifier. 576 Identifier GetIdentifierSymbol(); 577 // Log the currently parsed string literal. 578 Expression GetStringSymbol(); 579 580 i::Token::Value peek() { 581 if (stack_overflow_) return i::Token::ILLEGAL; 582 return scanner_->peek(); 583 } 584 585 i::Token::Value Next() { 586 if (stack_overflow_) return i::Token::ILLEGAL; 587 { 588 int marker; 589 if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) { 590 // Further calls to peek/Next will return illegal token. 591 // The current one will still be returned. It might already 592 // have been seen using peek. 593 stack_overflow_ = true; 594 } 595 } 596 return scanner_->Next(); 597 } 598 599 bool peek_any_identifier(); 600 601 void set_language_mode(i::LanguageMode language_mode) { 602 scope_->set_language_mode(language_mode); 603 } 604 605 bool is_classic_mode() { 606 return scope_->language_mode() == i::CLASSIC_MODE; 607 } 608 609 bool is_extended_mode() { 610 return scope_->language_mode() == i::EXTENDED_MODE; 611 } 612 613 i::LanguageMode language_mode() { return scope_->language_mode(); } 614 615 void Consume(i::Token::Value token) { Next(); } 616 617 void Expect(i::Token::Value token, bool* ok) { 618 if (Next() != token) { 619 *ok = false; 620 } 621 } 622 623 bool Check(i::Token::Value token) { 624 i::Token::Value next = peek(); 625 if (next == token) { 626 Consume(next); 627 return true; 628 } 629 return false; 630 } 631 void ExpectSemicolon(bool* ok); 632 633 static int Precedence(i::Token::Value tok, bool accept_IN); 634 635 void SetStrictModeViolation(i::Scanner::Location, 636 const char* type, 637 bool* ok); 638 639 void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok); 640 641 void StrictModeIdentifierViolation(i::Scanner::Location, 642 const char* eval_args_type, 643 Identifier identifier, 644 bool* ok); 645 646 i::Scanner* scanner_; 647 i::ParserRecorder* log_; 648 Scope* scope_; 649 uintptr_t stack_limit_; 650 i::Scanner::Location strict_mode_violation_location_; 651 const char* strict_mode_violation_type_; 652 bool stack_overflow_; 653 bool allow_lazy_; 654 bool allow_modules_; 655 bool allow_natives_syntax_; 656 bool parenthesized_function_; 657 bool harmony_scoping_; 658 }; 659 } } // v8::preparser 660 661 #endif // V8_PREPARSER_H 662