1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "v8.h" 29 30 #include "api.h" 31 #include "ast.h" 32 #include "bootstrapper.h" 33 #include "compiler.h" 34 #include "platform.h" 35 #include "runtime.h" 36 #include "parser.h" 37 #include "scopes.h" 38 #include "string-stream.h" 39 40 namespace v8 { 41 namespace internal { 42 43 class ParserFactory; 44 class ParserLog; 45 class TemporaryScope; 46 class Target; 47 48 template <typename T> class ZoneListWrapper; 49 50 51 // PositionStack is used for on-stack allocation of token positions for 52 // new expressions. Please look at ParseNewExpression. 53 54 class PositionStack { 55 public: 56 explicit PositionStack(bool* ok) : top_(NULL), ok_(ok) {} 57 ~PositionStack() { ASSERT(!*ok_ || is_empty()); } 58 59 class Element { 60 public: 61 Element(PositionStack* stack, int value) { 62 previous_ = stack->top(); 63 value_ = value; 64 stack->set_top(this); 65 } 66 67 private: 68 Element* previous() { return previous_; } 69 int value() { return value_; } 70 friend class PositionStack; 71 Element* previous_; 72 int value_; 73 }; 74 75 bool is_empty() { return top_ == NULL; } 76 int pop() { 77 ASSERT(!is_empty()); 78 int result = top_->value(); 79 top_ = top_->previous(); 80 return result; 81 } 82 83 private: 84 Element* top() { return top_; } 85 void set_top(Element* value) { top_ = value; } 86 Element* top_; 87 bool* ok_; 88 }; 89 90 91 class Parser { 92 public: 93 Parser(Handle<Script> script, bool allow_natives_syntax, 94 v8::Extension* extension, ParserMode is_pre_parsing, 95 ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data); 96 virtual ~Parser() { } 97 98 // Pre-parse the program from the character stream; returns true on 99 // success, false if a stack-overflow happened during parsing. 100 bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream); 101 102 void ReportMessage(const char* message, Vector<const char*> args); 103 virtual void ReportMessageAt(Scanner::Location loc, 104 const char* message, 105 Vector<const char*> args) = 0; 106 107 108 // Returns NULL if parsing failed. 109 FunctionLiteral* ParseProgram(Handle<String> source, 110 unibrow::CharacterStream* stream, 111 bool in_global_context); 112 FunctionLiteral* ParseLazy(Handle<String> source, 113 Handle<String> name, 114 int start_position, bool is_expression); 115 FunctionLiteral* ParseJson(Handle<String> source, 116 unibrow::CharacterStream* stream); 117 118 // The minimum number of contiguous assignment that will 119 // be treated as an initialization block. Benchmarks show that 120 // the overhead exceeds the savings below this limit. 121 static const int kMinInitializationBlock = 3; 122 123 protected: 124 125 enum Mode { 126 PARSE_LAZILY, 127 PARSE_EAGERLY 128 }; 129 130 // Report syntax error 131 void ReportUnexpectedToken(Token::Value token); 132 133 Handle<Script> script_; 134 Scanner scanner_; 135 136 Scope* top_scope_; 137 int with_nesting_level_; 138 139 TemporaryScope* temp_scope_; 140 Mode mode_; 141 142 Target* target_stack_; // for break, continue statements 143 bool allow_natives_syntax_; 144 v8::Extension* extension_; 145 ParserFactory* factory_; 146 ParserLog* log_; 147 bool is_pre_parsing_; 148 ScriptDataImpl* pre_data_; 149 150 bool inside_with() const { return with_nesting_level_ > 0; } 151 ParserFactory* factory() const { return factory_; } 152 ParserLog* log() const { return log_; } 153 Scanner& scanner() { return scanner_; } 154 Mode mode() const { return mode_; } 155 ScriptDataImpl* pre_data() const { return pre_data_; } 156 157 // All ParseXXX functions take as the last argument an *ok parameter 158 // which is set to false if parsing failed; it is unchanged otherwise. 159 // By making the 'exception handling' explicit, we are forced to check 160 // for failure at the call sites. 161 void* ParseSourceElements(ZoneListWrapper<Statement>* processor, 162 int end_token, bool* ok); 163 Statement* ParseStatement(ZoneStringList* labels, bool* ok); 164 Statement* ParseFunctionDeclaration(bool* ok); 165 Statement* ParseNativeDeclaration(bool* ok); 166 Block* ParseBlock(ZoneStringList* labels, bool* ok); 167 Block* ParseVariableStatement(bool* ok); 168 Block* ParseVariableDeclarations(bool accept_IN, Expression** var, bool* ok); 169 Statement* ParseExpressionOrLabelledStatement(ZoneStringList* labels, 170 bool* ok); 171 IfStatement* ParseIfStatement(ZoneStringList* labels, bool* ok); 172 Statement* ParseContinueStatement(bool* ok); 173 Statement* ParseBreakStatement(ZoneStringList* labels, bool* ok); 174 Statement* ParseReturnStatement(bool* ok); 175 Block* WithHelper(Expression* obj, 176 ZoneStringList* labels, 177 bool is_catch_block, 178 bool* ok); 179 Statement* ParseWithStatement(ZoneStringList* labels, bool* ok); 180 CaseClause* ParseCaseClause(bool* default_seen_ptr, bool* ok); 181 SwitchStatement* ParseSwitchStatement(ZoneStringList* labels, bool* ok); 182 DoWhileStatement* ParseDoWhileStatement(ZoneStringList* labels, bool* ok); 183 WhileStatement* ParseWhileStatement(ZoneStringList* labels, bool* ok); 184 Statement* ParseForStatement(ZoneStringList* labels, bool* ok); 185 Statement* ParseThrowStatement(bool* ok); 186 Expression* MakeCatchContext(Handle<String> id, VariableProxy* value); 187 TryStatement* ParseTryStatement(bool* ok); 188 DebuggerStatement* ParseDebuggerStatement(bool* ok); 189 190 Expression* ParseExpression(bool accept_IN, bool* ok); 191 Expression* ParseAssignmentExpression(bool accept_IN, bool* ok); 192 Expression* ParseConditionalExpression(bool accept_IN, bool* ok); 193 Expression* ParseBinaryExpression(int prec, bool accept_IN, bool* ok); 194 Expression* ParseUnaryExpression(bool* ok); 195 Expression* ParsePostfixExpression(bool* ok); 196 Expression* ParseLeftHandSideExpression(bool* ok); 197 Expression* ParseNewExpression(bool* ok); 198 Expression* ParseMemberExpression(bool* ok); 199 Expression* ParseNewPrefix(PositionStack* stack, bool* ok); 200 Expression* ParseMemberWithNewPrefixesExpression(PositionStack* stack, 201 bool* ok); 202 Expression* ParsePrimaryExpression(bool* ok); 203 Expression* ParseArrayLiteral(bool* ok); 204 Expression* ParseObjectLiteral(bool* ok); 205 Expression* ParseRegExpLiteral(bool seen_equal, bool* ok); 206 207 // Populate the constant properties fixed array for a materialized object 208 // literal. 209 void BuildObjectLiteralConstantProperties( 210 ZoneList<ObjectLiteral::Property*>* properties, 211 Handle<FixedArray> constants, 212 bool* is_simple, 213 int* depth); 214 215 // Populate the literals fixed array for a materialized array literal. 216 void BuildArrayLiteralBoilerplateLiterals(ZoneList<Expression*>* properties, 217 Handle<FixedArray> constants, 218 bool* is_simple, 219 int* depth); 220 221 // Decide if a property should be in the object boilerplate. 222 bool IsBoilerplateProperty(ObjectLiteral::Property* property); 223 // If the expression is a literal, return the literal value; 224 // if the expression is a materialized literal and is simple return a 225 // compile time value as encoded by CompileTimeValue::GetValue(). 226 // Otherwise, return undefined literal as the placeholder 227 // in the object literal boilerplate. 228 Handle<Object> GetBoilerplateValue(Expression* expression); 229 230 enum FunctionLiteralType { 231 EXPRESSION, 232 DECLARATION, 233 NESTED 234 }; 235 236 ZoneList<Expression*>* ParseArguments(bool* ok); 237 FunctionLiteral* ParseFunctionLiteral(Handle<String> var_name, 238 int function_token_position, 239 FunctionLiteralType type, 240 bool* ok); 241 242 243 // Magical syntax support. 244 Expression* ParseV8Intrinsic(bool* ok); 245 246 INLINE(Token::Value peek()) { return scanner_.peek(); } 247 INLINE(Token::Value Next()) { return scanner_.Next(); } 248 INLINE(void Consume(Token::Value token)); 249 void Expect(Token::Value token, bool* ok); 250 bool Check(Token::Value token); 251 void ExpectSemicolon(bool* ok); 252 253 // Get odd-ball literals. 254 Literal* GetLiteralUndefined(); 255 Literal* GetLiteralTheHole(); 256 Literal* GetLiteralNumber(double value); 257 258 Handle<String> ParseIdentifier(bool* ok); 259 Handle<String> ParseIdentifierOrGetOrSet(bool* is_get, 260 bool* is_set, 261 bool* ok); 262 263 // Parser support 264 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode, 265 FunctionLiteral* fun, 266 bool resolve, 267 bool* ok) = 0; 268 269 bool TargetStackContainsLabel(Handle<String> label); 270 BreakableStatement* LookupBreakTarget(Handle<String> label, bool* ok); 271 IterationStatement* LookupContinueTarget(Handle<String> label, bool* ok); 272 273 void RegisterTargetUse(BreakTarget* target, Target* stop); 274 275 // Create a number literal. 276 Literal* NewNumberLiteral(double value); 277 278 // Generate AST node that throw a ReferenceError with the given type. 279 Expression* NewThrowReferenceError(Handle<String> type); 280 281 // Generate AST node that throw a SyntaxError with the given 282 // type. The first argument may be null (in the handle sense) in 283 // which case no arguments are passed to the constructor. 284 Expression* NewThrowSyntaxError(Handle<String> type, Handle<Object> first); 285 286 // Generate AST node that throw a TypeError with the given 287 // type. Both arguments must be non-null (in the handle sense). 288 Expression* NewThrowTypeError(Handle<String> type, 289 Handle<Object> first, 290 Handle<Object> second); 291 292 // Generic AST generator for throwing errors from compiled code. 293 Expression* NewThrowError(Handle<String> constructor, 294 Handle<String> type, 295 Vector< Handle<Object> > arguments); 296 297 // JSON is a subset of JavaScript, as specified in, e.g., the ECMAScript 5 298 // specification section 15.12.1 (and appendix A.8). 299 // The grammar is given section 15.12.1.2 (and appendix A.8.2). 300 301 // Parse JSON input as a single JSON value. 302 Expression* ParseJson(bool* ok); 303 304 // Parse a single JSON value from input (grammar production JSONValue). 305 // A JSON value is either a (double-quoted) string literal, a number literal, 306 // one of "true", "false", or "null", or an object or array literal. 307 Expression* ParseJsonValue(bool* ok); 308 // Parse a JSON object literal (grammar production JSONObject). 309 // An object literal is a squiggly-braced and comma separated sequence 310 // (possibly empty) of key/value pairs, where the key is a JSON string 311 // literal, the value is a JSON value, and the two are spearated by a colon. 312 // A JavaScript object also allows numbers and identifiers as keys. 313 Expression* ParseJsonObject(bool* ok); 314 // Parses a JSON array literal (grammar production JSONArray). An array 315 // literal is a square-bracketed and comma separated sequence (possibly empty) 316 // of JSON values. 317 // A JavaScript array allows leaving out values from the sequence. 318 Expression* ParseJsonArray(bool* ok); 319 320 friend class Target; 321 friend class TargetScope; 322 friend class LexicalScope; 323 friend class TemporaryScope; 324 }; 325 326 327 template <typename T, int initial_size> 328 class BufferedZoneList { 329 public: 330 331 BufferedZoneList() : 332 list_(NULL), last_(NULL) {} 333 334 // Adds element at end of list. This element is buffered and can 335 // be read using last() or removed using RemoveLast until a new Add or until 336 // RemoveLast or GetList has been called. 337 void Add(T* value) { 338 if (last_ != NULL) { 339 if (list_ == NULL) { 340 list_ = new ZoneList<T*>(initial_size); 341 } 342 list_->Add(last_); 343 } 344 last_ = value; 345 } 346 347 T* last() { 348 ASSERT(last_ != NULL); 349 return last_; 350 } 351 352 T* RemoveLast() { 353 ASSERT(last_ != NULL); 354 T* result = last_; 355 if (list_ != NULL && list_->length() > 0) 356 last_ = list_->RemoveLast(); 357 else 358 last_ = NULL; 359 return result; 360 } 361 362 T* Get(int i) { 363 ASSERT(0 <= i && i < length()); 364 if (list_ == NULL) { 365 ASSERT_EQ(0, i); 366 return last_; 367 } else { 368 if (i == list_->length()) { 369 ASSERT(last_ != NULL); 370 return last_; 371 } else { 372 return list_->at(i); 373 } 374 } 375 } 376 377 void Clear() { 378 list_ = NULL; 379 last_ = NULL; 380 } 381 382 int length() { 383 int length = (list_ == NULL) ? 0 : list_->length(); 384 return length + ((last_ == NULL) ? 0 : 1); 385 } 386 387 ZoneList<T*>* GetList() { 388 if (list_ == NULL) { 389 list_ = new ZoneList<T*>(initial_size); 390 } 391 if (last_ != NULL) { 392 list_->Add(last_); 393 last_ = NULL; 394 } 395 return list_; 396 } 397 398 private: 399 ZoneList<T*>* list_; 400 T* last_; 401 }; 402 403 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 404 class RegExpBuilder: public ZoneObject { 405 public: 406 RegExpBuilder(); 407 void AddCharacter(uc16 character); 408 // "Adds" an empty expression. Does nothing except consume a 409 // following quantifier 410 void AddEmpty(); 411 void AddAtom(RegExpTree* tree); 412 void AddAssertion(RegExpTree* tree); 413 void NewAlternative(); // '|' 414 void AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type); 415 RegExpTree* ToRegExp(); 416 private: 417 void FlushCharacters(); 418 void FlushText(); 419 void FlushTerms(); 420 bool pending_empty_; 421 ZoneList<uc16>* characters_; 422 BufferedZoneList<RegExpTree, 2> terms_; 423 BufferedZoneList<RegExpTree, 2> text_; 424 BufferedZoneList<RegExpTree, 2> alternatives_; 425 #ifdef DEBUG 426 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 427 #define LAST(x) last_added_ = x; 428 #else 429 #define LAST(x) 430 #endif 431 }; 432 433 434 RegExpBuilder::RegExpBuilder() 435 : pending_empty_(false), 436 characters_(NULL), 437 terms_(), 438 alternatives_() 439 #ifdef DEBUG 440 , last_added_(ADD_NONE) 441 #endif 442 {} 443 444 445 void RegExpBuilder::FlushCharacters() { 446 pending_empty_ = false; 447 if (characters_ != NULL) { 448 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); 449 characters_ = NULL; 450 text_.Add(atom); 451 LAST(ADD_ATOM); 452 } 453 } 454 455 456 void RegExpBuilder::FlushText() { 457 FlushCharacters(); 458 int num_text = text_.length(); 459 if (num_text == 0) { 460 return; 461 } else if (num_text == 1) { 462 terms_.Add(text_.last()); 463 } else { 464 RegExpText* text = new RegExpText(); 465 for (int i = 0; i < num_text; i++) 466 text_.Get(i)->AppendToText(text); 467 terms_.Add(text); 468 } 469 text_.Clear(); 470 } 471 472 473 void RegExpBuilder::AddCharacter(uc16 c) { 474 pending_empty_ = false; 475 if (characters_ == NULL) { 476 characters_ = new ZoneList<uc16>(4); 477 } 478 characters_->Add(c); 479 LAST(ADD_CHAR); 480 } 481 482 483 void RegExpBuilder::AddEmpty() { 484 pending_empty_ = true; 485 } 486 487 488 void RegExpBuilder::AddAtom(RegExpTree* term) { 489 if (term->IsEmpty()) { 490 AddEmpty(); 491 return; 492 } 493 if (term->IsTextElement()) { 494 FlushCharacters(); 495 text_.Add(term); 496 } else { 497 FlushText(); 498 terms_.Add(term); 499 } 500 LAST(ADD_ATOM); 501 } 502 503 504 void RegExpBuilder::AddAssertion(RegExpTree* assert) { 505 FlushText(); 506 terms_.Add(assert); 507 LAST(ADD_ASSERT); 508 } 509 510 511 void RegExpBuilder::NewAlternative() { 512 FlushTerms(); 513 } 514 515 516 void RegExpBuilder::FlushTerms() { 517 FlushText(); 518 int num_terms = terms_.length(); 519 RegExpTree* alternative; 520 if (num_terms == 0) { 521 alternative = RegExpEmpty::GetInstance(); 522 } else if (num_terms == 1) { 523 alternative = terms_.last(); 524 } else { 525 alternative = new RegExpAlternative(terms_.GetList()); 526 } 527 alternatives_.Add(alternative); 528 terms_.Clear(); 529 LAST(ADD_NONE); 530 } 531 532 533 RegExpTree* RegExpBuilder::ToRegExp() { 534 FlushTerms(); 535 int num_alternatives = alternatives_.length(); 536 if (num_alternatives == 0) { 537 return RegExpEmpty::GetInstance(); 538 } 539 if (num_alternatives == 1) { 540 return alternatives_.last(); 541 } 542 return new RegExpDisjunction(alternatives_.GetList()); 543 } 544 545 546 void RegExpBuilder::AddQuantifierToAtom(int min, 547 int max, 548 RegExpQuantifier::Type type) { 549 if (pending_empty_) { 550 pending_empty_ = false; 551 return; 552 } 553 RegExpTree* atom; 554 if (characters_ != NULL) { 555 ASSERT(last_added_ == ADD_CHAR); 556 // Last atom was character. 557 Vector<const uc16> char_vector = characters_->ToConstVector(); 558 int num_chars = char_vector.length(); 559 if (num_chars > 1) { 560 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); 561 text_.Add(new RegExpAtom(prefix)); 562 char_vector = char_vector.SubVector(num_chars - 1, num_chars); 563 } 564 characters_ = NULL; 565 atom = new RegExpAtom(char_vector); 566 FlushText(); 567 } else if (text_.length() > 0) { 568 ASSERT(last_added_ == ADD_ATOM); 569 atom = text_.RemoveLast(); 570 FlushText(); 571 } else if (terms_.length() > 0) { 572 ASSERT(last_added_ == ADD_ATOM); 573 atom = terms_.RemoveLast(); 574 if (atom->max_match() == 0) { 575 // Guaranteed to only match an empty string. 576 LAST(ADD_TERM); 577 if (min == 0) { 578 return; 579 } 580 terms_.Add(atom); 581 return; 582 } 583 } else { 584 // Only call immediately after adding an atom or character! 585 UNREACHABLE(); 586 return; 587 } 588 terms_.Add(new RegExpQuantifier(min, max, type, atom)); 589 LAST(ADD_TERM); 590 } 591 592 593 class RegExpParser { 594 public: 595 RegExpParser(FlatStringReader* in, 596 Handle<String>* error, 597 bool multiline_mode); 598 RegExpTree* ParsePattern(); 599 RegExpTree* ParseDisjunction(); 600 RegExpTree* ParseGroup(); 601 RegExpTree* ParseCharacterClass(); 602 603 // Parses a {...,...} quantifier and stores the range in the given 604 // out parameters. 605 bool ParseIntervalQuantifier(int* min_out, int* max_out); 606 607 // Parses and returns a single escaped character. The character 608 // must not be 'b' or 'B' since they are usually handle specially. 609 uc32 ParseClassCharacterEscape(); 610 611 // Checks whether the following is a length-digit hexadecimal number, 612 // and sets the value if it is. 613 bool ParseHexEscape(int length, uc32* value); 614 615 uc32 ParseControlLetterEscape(); 616 uc32 ParseOctalLiteral(); 617 618 // Tries to parse the input as a back reference. If successful it 619 // stores the result in the output parameter and returns true. If 620 // it fails it will push back the characters read so the same characters 621 // can be reparsed. 622 bool ParseBackReferenceIndex(int* index_out); 623 624 CharacterRange ParseClassAtom(uc16* char_class); 625 RegExpTree* ReportError(Vector<const char> message); 626 void Advance(); 627 void Advance(int dist); 628 void Reset(int pos); 629 630 // Reports whether the pattern might be used as a literal search string. 631 // Only use if the result of the parse is a single atom node. 632 bool simple(); 633 bool contains_anchor() { return contains_anchor_; } 634 void set_contains_anchor() { contains_anchor_ = true; } 635 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 636 int position() { return next_pos_ - 1; } 637 bool failed() { return failed_; } 638 639 static const int kMaxCaptures = 1 << 16; 640 static const uc32 kEndMarker = (1 << 21); 641 private: 642 enum SubexpressionType { 643 INITIAL, 644 CAPTURE, // All positive values represent captures. 645 POSITIVE_LOOKAHEAD, 646 NEGATIVE_LOOKAHEAD, 647 GROUPING 648 }; 649 650 class RegExpParserState : public ZoneObject { 651 public: 652 RegExpParserState(RegExpParserState* previous_state, 653 SubexpressionType group_type, 654 int disjunction_capture_index) 655 : previous_state_(previous_state), 656 builder_(new RegExpBuilder()), 657 group_type_(group_type), 658 disjunction_capture_index_(disjunction_capture_index) {} 659 // Parser state of containing expression, if any. 660 RegExpParserState* previous_state() { return previous_state_; } 661 bool IsSubexpression() { return previous_state_ != NULL; } 662 // RegExpBuilder building this regexp's AST. 663 RegExpBuilder* builder() { return builder_; } 664 // Type of regexp being parsed (parenthesized group or entire regexp). 665 SubexpressionType group_type() { return group_type_; } 666 // Index in captures array of first capture in this sub-expression, if any. 667 // Also the capture index of this sub-expression itself, if group_type 668 // is CAPTURE. 669 int capture_index() { return disjunction_capture_index_; } 670 private: 671 // Linked list implementation of stack of states. 672 RegExpParserState* previous_state_; 673 // Builder for the stored disjunction. 674 RegExpBuilder* builder_; 675 // Stored disjunction type (capture, look-ahead or grouping), if any. 676 SubexpressionType group_type_; 677 // Stored disjunction's capture index (if any). 678 int disjunction_capture_index_; 679 }; 680 681 uc32 current() { return current_; } 682 bool has_more() { return has_more_; } 683 bool has_next() { return next_pos_ < in()->length(); } 684 uc32 Next(); 685 FlatStringReader* in() { return in_; } 686 void ScanForCaptures(); 687 uc32 current_; 688 bool has_more_; 689 bool multiline_; 690 int next_pos_; 691 FlatStringReader* in_; 692 Handle<String>* error_; 693 bool simple_; 694 bool contains_anchor_; 695 ZoneList<RegExpCapture*>* captures_; 696 bool is_scanned_for_captures_; 697 // The capture count is only valid after we have scanned for captures. 698 int capture_count_; 699 bool failed_; 700 }; 701 702 703 // A temporary scope stores information during parsing, just like 704 // a plain scope. However, temporary scopes are not kept around 705 // after parsing or referenced by syntax trees so they can be stack- 706 // allocated and hence used by the pre-parser. 707 class TemporaryScope BASE_EMBEDDED { 708 public: 709 explicit TemporaryScope(Parser* parser); 710 ~TemporaryScope(); 711 712 int NextMaterializedLiteralIndex() { 713 int next_index = 714 materialized_literal_count_ + JSFunction::kLiteralsPrefixSize; 715 materialized_literal_count_++; 716 return next_index; 717 } 718 int materialized_literal_count() { return materialized_literal_count_; } 719 720 void SetThisPropertyAssignmentInfo( 721 bool only_simple_this_property_assignments, 722 Handle<FixedArray> this_property_assignments) { 723 only_simple_this_property_assignments_ = 724 only_simple_this_property_assignments; 725 this_property_assignments_ = this_property_assignments; 726 } 727 bool only_simple_this_property_assignments() { 728 return only_simple_this_property_assignments_; 729 } 730 Handle<FixedArray> this_property_assignments() { 731 return this_property_assignments_; 732 } 733 734 void AddProperty() { expected_property_count_++; } 735 int expected_property_count() { return expected_property_count_; } 736 private: 737 // Captures the number of literals that need materialization in the 738 // function. Includes regexp literals, and boilerplate for object 739 // and array literals. 740 int materialized_literal_count_; 741 742 // Properties count estimation. 743 int expected_property_count_; 744 745 bool only_simple_this_property_assignments_; 746 Handle<FixedArray> this_property_assignments_; 747 748 // Bookkeeping 749 Parser* parser_; 750 TemporaryScope* parent_; 751 752 friend class Parser; 753 }; 754 755 756 TemporaryScope::TemporaryScope(Parser* parser) 757 : materialized_literal_count_(0), 758 expected_property_count_(0), 759 only_simple_this_property_assignments_(false), 760 this_property_assignments_(Factory::empty_fixed_array()), 761 parser_(parser), 762 parent_(parser->temp_scope_) { 763 parser->temp_scope_ = this; 764 } 765 766 767 TemporaryScope::~TemporaryScope() { 768 parser_->temp_scope_ = parent_; 769 } 770 771 772 // A zone list wrapper lets code either access a access a zone list 773 // or appear to do so while actually ignoring all operations. 774 template <typename T> 775 class ZoneListWrapper { 776 public: 777 ZoneListWrapper() : list_(NULL) { } 778 explicit ZoneListWrapper(int size) : list_(new ZoneList<T*>(size)) { } 779 void Add(T* that) { if (list_) list_->Add(that); } 780 int length() { return list_->length(); } 781 ZoneList<T*>* elements() { return list_; } 782 T* at(int index) { return list_->at(index); } 783 private: 784 ZoneList<T*>* list_; 785 }; 786 787 788 // Allocation macro that should be used to allocate objects that must 789 // only be allocated in real parsing mode. Note that in preparse mode 790 // not only is the syntax tree not created but the constructor 791 // arguments are not evaluated. 792 #define NEW(expr) (is_pre_parsing_ ? NULL : new expr) 793 794 795 class ParserFactory BASE_EMBEDDED { 796 public: 797 explicit ParserFactory(bool is_pre_parsing) : 798 is_pre_parsing_(is_pre_parsing) { } 799 800 virtual ~ParserFactory() { } 801 802 virtual Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with); 803 804 virtual Handle<String> LookupSymbol(const char* string, int length) { 805 return Handle<String>(); 806 } 807 808 virtual Handle<String> EmptySymbol() { 809 return Handle<String>(); 810 } 811 812 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) { 813 if (obj == VariableProxySentinel::this_proxy()) { 814 return Property::this_property(); 815 } else { 816 return ValidLeftHandSideSentinel::instance(); 817 } 818 } 819 820 virtual Expression* NewCall(Expression* expression, 821 ZoneList<Expression*>* arguments, 822 int pos) { 823 return Call::sentinel(); 824 } 825 826 virtual Statement* EmptyStatement() { 827 return NULL; 828 } 829 830 template <typename T> ZoneListWrapper<T> NewList(int size) { 831 return is_pre_parsing_ ? ZoneListWrapper<T>() : ZoneListWrapper<T>(size); 832 } 833 834 private: 835 bool is_pre_parsing_; 836 }; 837 838 839 class ParserLog BASE_EMBEDDED { 840 public: 841 virtual ~ParserLog() { } 842 843 // Records the occurrence of a function. The returned object is 844 // only guaranteed to be valid until the next function has been 845 // logged. 846 virtual FunctionEntry LogFunction(int start) { return FunctionEntry(); } 847 848 virtual void LogError() { } 849 }; 850 851 852 class AstBuildingParserFactory : public ParserFactory { 853 public: 854 AstBuildingParserFactory() : ParserFactory(false) { } 855 856 virtual Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with); 857 858 virtual Handle<String> LookupSymbol(const char* string, int length) { 859 return Factory::LookupSymbol(Vector<const char>(string, length)); 860 } 861 862 virtual Handle<String> EmptySymbol() { 863 return Factory::empty_symbol(); 864 } 865 866 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) { 867 return new Property(obj, key, pos); 868 } 869 870 virtual Expression* NewCall(Expression* expression, 871 ZoneList<Expression*>* arguments, 872 int pos) { 873 return new Call(expression, arguments, pos); 874 } 875 876 virtual Statement* EmptyStatement(); 877 }; 878 879 880 class ParserRecorder: public ParserLog { 881 public: 882 ParserRecorder(); 883 virtual FunctionEntry LogFunction(int start); 884 virtual void LogError() { } 885 virtual void LogMessage(Scanner::Location loc, 886 const char* message, 887 Vector<const char*> args); 888 void WriteString(Vector<const char> str); 889 static const char* ReadString(unsigned* start, int* chars); 890 List<unsigned>* store() { return &store_; } 891 private: 892 bool has_error_; 893 List<unsigned> store_; 894 }; 895 896 897 FunctionEntry ScriptDataImpl::GetFunctionEnd(int start) { 898 if (nth(last_entry_).start_pos() > start) { 899 // If the last entry we looked up is higher than what we're 900 // looking for then it's useless and we reset it. 901 last_entry_ = 0; 902 } 903 for (int i = last_entry_; i < EntryCount(); i++) { 904 FunctionEntry entry = nth(i); 905 if (entry.start_pos() == start) { 906 last_entry_ = i; 907 return entry; 908 } 909 } 910 return FunctionEntry(); 911 } 912 913 914 bool ScriptDataImpl::SanityCheck() { 915 if (store_.length() < static_cast<int>(ScriptDataImpl::kHeaderSize)) 916 return false; 917 if (magic() != ScriptDataImpl::kMagicNumber) 918 return false; 919 if (version() != ScriptDataImpl::kCurrentVersion) 920 return false; 921 return true; 922 } 923 924 925 int ScriptDataImpl::EntryCount() { 926 return (store_.length() - kHeaderSize) / FunctionEntry::kSize; 927 } 928 929 930 FunctionEntry ScriptDataImpl::nth(int n) { 931 int offset = kHeaderSize + n * FunctionEntry::kSize; 932 return FunctionEntry(Vector<unsigned>(store_.start() + offset, 933 FunctionEntry::kSize)); 934 } 935 936 937 ParserRecorder::ParserRecorder() 938 : has_error_(false), store_(4) { 939 Vector<unsigned> preamble = store()->AddBlock(0, ScriptDataImpl::kHeaderSize); 940 preamble[ScriptDataImpl::kMagicOffset] = ScriptDataImpl::kMagicNumber; 941 preamble[ScriptDataImpl::kVersionOffset] = ScriptDataImpl::kCurrentVersion; 942 preamble[ScriptDataImpl::kHasErrorOffset] = false; 943 } 944 945 946 void ParserRecorder::WriteString(Vector<const char> str) { 947 store()->Add(str.length()); 948 for (int i = 0; i < str.length(); i++) 949 store()->Add(str[i]); 950 } 951 952 953 const char* ParserRecorder::ReadString(unsigned* start, int* chars) { 954 int length = start[0]; 955 char* result = NewArray<char>(length + 1); 956 for (int i = 0; i < length; i++) 957 result[i] = start[i + 1]; 958 result[length] = '\0'; 959 if (chars != NULL) *chars = length; 960 return result; 961 } 962 963 964 void ParserRecorder::LogMessage(Scanner::Location loc, const char* message, 965 Vector<const char*> args) { 966 if (has_error_) return; 967 store()->Rewind(ScriptDataImpl::kHeaderSize); 968 store()->at(ScriptDataImpl::kHasErrorOffset) = true; 969 store()->Add(loc.beg_pos); 970 store()->Add(loc.end_pos); 971 store()->Add(args.length()); 972 WriteString(CStrVector(message)); 973 for (int i = 0; i < args.length(); i++) 974 WriteString(CStrVector(args[i])); 975 } 976 977 978 Scanner::Location ScriptDataImpl::MessageLocation() { 979 int beg_pos = Read(0); 980 int end_pos = Read(1); 981 return Scanner::Location(beg_pos, end_pos); 982 } 983 984 985 const char* ScriptDataImpl::BuildMessage() { 986 unsigned* start = ReadAddress(3); 987 return ParserRecorder::ReadString(start, NULL); 988 } 989 990 991 Vector<const char*> ScriptDataImpl::BuildArgs() { 992 int arg_count = Read(2); 993 const char** array = NewArray<const char*>(arg_count); 994 int pos = ScriptDataImpl::kHeaderSize + Read(3); 995 for (int i = 0; i < arg_count; i++) { 996 int count = 0; 997 array[i] = ParserRecorder::ReadString(ReadAddress(pos), &count); 998 pos += count + 1; 999 } 1000 return Vector<const char*>(array, arg_count); 1001 } 1002 1003 1004 unsigned ScriptDataImpl::Read(int position) { 1005 return store_[ScriptDataImpl::kHeaderSize + position]; 1006 } 1007 1008 1009 unsigned* ScriptDataImpl::ReadAddress(int position) { 1010 return &store_[ScriptDataImpl::kHeaderSize + position]; 1011 } 1012 1013 1014 FunctionEntry ParserRecorder::LogFunction(int start) { 1015 if (has_error_) return FunctionEntry(); 1016 FunctionEntry result(store()->AddBlock(0, FunctionEntry::kSize)); 1017 result.set_start_pos(start); 1018 return result; 1019 } 1020 1021 1022 class AstBuildingParser : public Parser { 1023 public: 1024 AstBuildingParser(Handle<Script> script, bool allow_natives_syntax, 1025 v8::Extension* extension, ScriptDataImpl* pre_data) 1026 : Parser(script, allow_natives_syntax, extension, PARSE, 1027 factory(), log(), pre_data) { } 1028 virtual void ReportMessageAt(Scanner::Location loc, const char* message, 1029 Vector<const char*> args); 1030 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode, 1031 FunctionLiteral* fun, bool resolve, bool* ok); 1032 AstBuildingParserFactory* factory() { return &factory_; } 1033 ParserLog* log() { return &log_; } 1034 1035 private: 1036 ParserLog log_; 1037 AstBuildingParserFactory factory_; 1038 }; 1039 1040 1041 class PreParser : public Parser { 1042 public: 1043 PreParser(Handle<Script> script, bool allow_natives_syntax, 1044 v8::Extension* extension) 1045 : Parser(script, allow_natives_syntax, extension, PREPARSE, 1046 factory(), recorder(), NULL), 1047 factory_(true) { } 1048 virtual void ReportMessageAt(Scanner::Location loc, const char* message, 1049 Vector<const char*> args); 1050 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode, 1051 FunctionLiteral* fun, bool resolve, bool* ok); 1052 ParserFactory* factory() { return &factory_; } 1053 ParserRecorder* recorder() { return &recorder_; } 1054 1055 private: 1056 ParserRecorder recorder_; 1057 ParserFactory factory_; 1058 }; 1059 1060 1061 Scope* AstBuildingParserFactory::NewScope(Scope* parent, Scope::Type type, 1062 bool inside_with) { 1063 Scope* result = new Scope(parent, type); 1064 result->Initialize(inside_with); 1065 return result; 1066 } 1067 1068 1069 Statement* AstBuildingParserFactory::EmptyStatement() { 1070 // Use a statically allocated empty statement singleton to avoid 1071 // allocating lots and lots of empty statements. 1072 static v8::internal::EmptyStatement empty; 1073 return ∅ 1074 } 1075 1076 1077 Scope* ParserFactory::NewScope(Scope* parent, Scope::Type type, 1078 bool inside_with) { 1079 ASSERT(parent != NULL); 1080 parent->type_ = type; 1081 return parent; 1082 } 1083 1084 1085 VariableProxy* PreParser::Declare(Handle<String> name, Variable::Mode mode, 1086 FunctionLiteral* fun, bool resolve, 1087 bool* ok) { 1088 return NULL; 1089 } 1090 1091 1092 1093 // ---------------------------------------------------------------------------- 1094 // Target is a support class to facilitate manipulation of the 1095 // Parser's target_stack_ (the stack of potential 'break' and 1096 // 'continue' statement targets). Upon construction, a new target is 1097 // added; it is removed upon destruction. 1098 1099 class Target BASE_EMBEDDED { 1100 public: 1101 Target(Parser* parser, AstNode* node) 1102 : parser_(parser), node_(node), previous_(parser_->target_stack_) { 1103 parser_->target_stack_ = this; 1104 } 1105 1106 ~Target() { 1107 parser_->target_stack_ = previous_; 1108 } 1109 1110 Target* previous() { return previous_; } 1111 AstNode* node() { return node_; } 1112 1113 private: 1114 Parser* parser_; 1115 AstNode* node_; 1116 Target* previous_; 1117 }; 1118 1119 1120 class TargetScope BASE_EMBEDDED { 1121 public: 1122 explicit TargetScope(Parser* parser) 1123 : parser_(parser), previous_(parser->target_stack_) { 1124 parser->target_stack_ = NULL; 1125 } 1126 1127 ~TargetScope() { 1128 parser_->target_stack_ = previous_; 1129 } 1130 1131 private: 1132 Parser* parser_; 1133 Target* previous_; 1134 }; 1135 1136 1137 // ---------------------------------------------------------------------------- 1138 // LexicalScope is a support class to facilitate manipulation of the 1139 // Parser's scope stack. The constructor sets the parser's top scope 1140 // to the incoming scope, and the destructor resets it. 1141 1142 class LexicalScope BASE_EMBEDDED { 1143 public: 1144 LexicalScope(Parser* parser, Scope* scope) 1145 : parser_(parser), 1146 prev_scope_(parser->top_scope_), 1147 prev_level_(parser->with_nesting_level_) { 1148 parser_->top_scope_ = scope; 1149 parser_->with_nesting_level_ = 0; 1150 } 1151 1152 ~LexicalScope() { 1153 parser_->top_scope_ = prev_scope_; 1154 parser_->with_nesting_level_ = prev_level_; 1155 } 1156 1157 private: 1158 Parser* parser_; 1159 Scope* prev_scope_; 1160 int prev_level_; 1161 }; 1162 1163 1164 // ---------------------------------------------------------------------------- 1165 // The CHECK_OK macro is a convenient macro to enforce error 1166 // handling for functions that may fail (by returning !*ok). 1167 // 1168 // CAUTION: This macro appends extra statements after a call, 1169 // thus it must never be used where only a single statement 1170 // is correct (e.g. an if statement branch w/o braces)! 1171 1172 #define CHECK_OK ok); \ 1173 if (!*ok) return NULL; \ 1174 ((void)0 1175 #define DUMMY ) // to make indentation work 1176 #undef DUMMY 1177 1178 #define CHECK_FAILED /**/); \ 1179 if (failed_) return NULL; \ 1180 ((void)0 1181 #define DUMMY ) // to make indentation work 1182 #undef DUMMY 1183 1184 // ---------------------------------------------------------------------------- 1185 // Implementation of Parser 1186 1187 Parser::Parser(Handle<Script> script, 1188 bool allow_natives_syntax, 1189 v8::Extension* extension, 1190 ParserMode is_pre_parsing, 1191 ParserFactory* factory, 1192 ParserLog* log, 1193 ScriptDataImpl* pre_data) 1194 : script_(script), 1195 scanner_(is_pre_parsing), 1196 top_scope_(NULL), 1197 with_nesting_level_(0), 1198 temp_scope_(NULL), 1199 target_stack_(NULL), 1200 allow_natives_syntax_(allow_natives_syntax), 1201 extension_(extension), 1202 factory_(factory), 1203 log_(log), 1204 is_pre_parsing_(is_pre_parsing == PREPARSE), 1205 pre_data_(pre_data) { 1206 } 1207 1208 1209 bool Parser::PreParseProgram(Handle<String> source, 1210 unibrow::CharacterStream* stream) { 1211 HistogramTimerScope timer(&Counters::pre_parse); 1212 AssertNoZoneAllocation assert_no_zone_allocation; 1213 AssertNoAllocation assert_no_allocation; 1214 NoHandleAllocation no_handle_allocation; 1215 scanner_.Init(source, stream, 0, JAVASCRIPT); 1216 ASSERT(target_stack_ == NULL); 1217 mode_ = PARSE_EAGERLY; 1218 DummyScope top_scope; 1219 LexicalScope scope(this, &top_scope); 1220 TemporaryScope temp_scope(this); 1221 ZoneListWrapper<Statement> processor; 1222 bool ok = true; 1223 ParseSourceElements(&processor, Token::EOS, &ok); 1224 return !scanner().stack_overflow(); 1225 } 1226 1227 1228 FunctionLiteral* Parser::ParseProgram(Handle<String> source, 1229 unibrow::CharacterStream* stream, 1230 bool in_global_context) { 1231 CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT); 1232 1233 HistogramTimerScope timer(&Counters::parse); 1234 Counters::total_parse_size.Increment(source->length()); 1235 1236 // Initialize parser state. 1237 source->TryFlattenIfNotFlat(); 1238 scanner_.Init(source, stream, 0, JAVASCRIPT); 1239 ASSERT(target_stack_ == NULL); 1240 1241 // Compute the parsing mode. 1242 mode_ = FLAG_lazy ? PARSE_LAZILY : PARSE_EAGERLY; 1243 if (allow_natives_syntax_ || extension_ != NULL) mode_ = PARSE_EAGERLY; 1244 1245 Scope::Type type = 1246 in_global_context 1247 ? Scope::GLOBAL_SCOPE 1248 : Scope::EVAL_SCOPE; 1249 Handle<String> no_name = factory()->EmptySymbol(); 1250 1251 FunctionLiteral* result = NULL; 1252 { Scope* scope = factory()->NewScope(top_scope_, type, inside_with()); 1253 LexicalScope lexical_scope(this, scope); 1254 TemporaryScope temp_scope(this); 1255 ZoneListWrapper<Statement> body(16); 1256 bool ok = true; 1257 ParseSourceElements(&body, Token::EOS, &ok); 1258 if (ok) { 1259 result = NEW(FunctionLiteral( 1260 no_name, 1261 top_scope_, 1262 body.elements(), 1263 temp_scope.materialized_literal_count(), 1264 temp_scope.expected_property_count(), 1265 temp_scope.only_simple_this_property_assignments(), 1266 temp_scope.this_property_assignments(), 1267 0, 1268 0, 1269 source->length(), 1270 false)); 1271 } else if (scanner().stack_overflow()) { 1272 Top::StackOverflow(); 1273 } 1274 } 1275 1276 // Make sure the target stack is empty. 1277 ASSERT(target_stack_ == NULL); 1278 1279 // If there was a syntax error we have to get rid of the AST 1280 // and it is not safe to do so before the scope has been deleted. 1281 if (result == NULL) zone_scope.DeleteOnExit(); 1282 return result; 1283 } 1284 1285 1286 FunctionLiteral* Parser::ParseLazy(Handle<String> source, 1287 Handle<String> name, 1288 int start_position, 1289 bool is_expression) { 1290 CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT); 1291 HistogramTimerScope timer(&Counters::parse_lazy); 1292 source->TryFlattenIfNotFlat(); 1293 Counters::total_parse_size.Increment(source->length()); 1294 SafeStringInputBuffer buffer(source.location()); 1295 1296 // Initialize parser state. 1297 scanner_.Init(source, &buffer, start_position, JAVASCRIPT); 1298 ASSERT(target_stack_ == NULL); 1299 mode_ = PARSE_EAGERLY; 1300 1301 // Place holder for the result. 1302 FunctionLiteral* result = NULL; 1303 1304 { 1305 // Parse the function literal. 1306 Handle<String> no_name = factory()->EmptySymbol(); 1307 Scope* scope = 1308 factory()->NewScope(top_scope_, Scope::GLOBAL_SCOPE, inside_with()); 1309 LexicalScope lexical_scope(this, scope); 1310 TemporaryScope temp_scope(this); 1311 1312 FunctionLiteralType type = is_expression ? EXPRESSION : DECLARATION; 1313 bool ok = true; 1314 result = ParseFunctionLiteral(name, RelocInfo::kNoPosition, type, &ok); 1315 // Make sure the results agree. 1316 ASSERT(ok == (result != NULL)); 1317 // The only errors should be stack overflows. 1318 ASSERT(ok || scanner_.stack_overflow()); 1319 } 1320 1321 // Make sure the target stack is empty. 1322 ASSERT(target_stack_ == NULL); 1323 1324 // If there was a stack overflow we have to get rid of AST and it is 1325 // not safe to do before scope has been deleted. 1326 if (result == NULL) { 1327 Top::StackOverflow(); 1328 zone_scope.DeleteOnExit(); 1329 } 1330 return result; 1331 } 1332 1333 FunctionLiteral* Parser::ParseJson(Handle<String> source, 1334 unibrow::CharacterStream* stream) { 1335 CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT); 1336 1337 HistogramTimerScope timer(&Counters::parse); 1338 Counters::total_parse_size.Increment(source->length()); 1339 1340 // Initialize parser state. 1341 source->TryFlattenIfNotFlat(); 1342 scanner_.Init(source, stream, 0, JSON); 1343 ASSERT(target_stack_ == NULL); 1344 1345 FunctionLiteral* result = NULL; 1346 Handle<String> no_name = factory()->EmptySymbol(); 1347 1348 { 1349 Scope* scope = factory()->NewScope(top_scope_, Scope::GLOBAL_SCOPE, false); 1350 LexicalScope lexical_scope(this, scope); 1351 TemporaryScope temp_scope(this); 1352 bool ok = true; 1353 Expression* expression = ParseJson(&ok); 1354 if (ok) { 1355 ZoneListWrapper<Statement> statement = factory()->NewList<Statement>(1); 1356 statement.Add(new ExpressionStatement(expression)); 1357 result = NEW(FunctionLiteral( 1358 no_name, 1359 top_scope_, 1360 statement.elements(), 1361 temp_scope.materialized_literal_count(), 1362 temp_scope.expected_property_count(), 1363 temp_scope.only_simple_this_property_assignments(), 1364 temp_scope.this_property_assignments(), 1365 0, 1366 0, 1367 source->length(), 1368 false)); 1369 } else if (scanner().stack_overflow()) { 1370 Top::StackOverflow(); 1371 } 1372 } 1373 1374 // Make sure the target stack is empty. 1375 ASSERT(target_stack_ == NULL); 1376 1377 // If there was a syntax error we have to get rid of the AST 1378 // and it is not safe to do so before the scope has been deleted. 1379 if (result == NULL) zone_scope.DeleteOnExit(); 1380 return result; 1381 } 1382 1383 void Parser::ReportMessage(const char* type, Vector<const char*> args) { 1384 Scanner::Location source_location = scanner_.location(); 1385 ReportMessageAt(source_location, type, args); 1386 } 1387 1388 1389 void AstBuildingParser::ReportMessageAt(Scanner::Location source_location, 1390 const char* type, 1391 Vector<const char*> args) { 1392 MessageLocation location(script_, 1393 source_location.beg_pos, source_location.end_pos); 1394 Handle<JSArray> array = Factory::NewJSArray(args.length()); 1395 for (int i = 0; i < args.length(); i++) { 1396 SetElement(array, i, Factory::NewStringFromUtf8(CStrVector(args[i]))); 1397 } 1398 Handle<Object> result = Factory::NewSyntaxError(type, array); 1399 Top::Throw(*result, &location); 1400 } 1401 1402 1403 void PreParser::ReportMessageAt(Scanner::Location source_location, 1404 const char* type, 1405 Vector<const char*> args) { 1406 recorder()->LogMessage(source_location, type, args); 1407 } 1408 1409 1410 // Base class containing common code for the different finder classes used by 1411 // the parser. 1412 class ParserFinder { 1413 protected: 1414 ParserFinder() {} 1415 static Assignment* AsAssignment(Statement* stat) { 1416 if (stat == NULL) return NULL; 1417 ExpressionStatement* exp_stat = stat->AsExpressionStatement(); 1418 if (exp_stat == NULL) return NULL; 1419 return exp_stat->expression()->AsAssignment(); 1420 } 1421 }; 1422 1423 1424 // An InitializationBlockFinder finds and marks sequences of statements of the 1425 // form expr.a = ...; expr.b = ...; etc. 1426 class InitializationBlockFinder : public ParserFinder { 1427 public: 1428 InitializationBlockFinder() 1429 : first_in_block_(NULL), last_in_block_(NULL), block_size_(0) {} 1430 1431 ~InitializationBlockFinder() { 1432 if (InBlock()) EndBlock(); 1433 } 1434 1435 void Update(Statement* stat) { 1436 Assignment* assignment = AsAssignment(stat); 1437 if (InBlock()) { 1438 if (BlockContinues(assignment)) { 1439 UpdateBlock(assignment); 1440 } else { 1441 EndBlock(); 1442 } 1443 } 1444 if (!InBlock() && (assignment != NULL) && 1445 (assignment->op() == Token::ASSIGN)) { 1446 StartBlock(assignment); 1447 } 1448 } 1449 1450 private: 1451 // Returns true if the expressions appear to denote the same object. 1452 // In the context of initialization blocks, we only consider expressions 1453 // of the form 'expr.x' or expr["x"]. 1454 static bool SameObject(Expression* e1, Expression* e2) { 1455 VariableProxy* v1 = e1->AsVariableProxy(); 1456 VariableProxy* v2 = e2->AsVariableProxy(); 1457 if (v1 != NULL && v2 != NULL) { 1458 return v1->name()->Equals(*v2->name()); 1459 } 1460 Property* p1 = e1->AsProperty(); 1461 Property* p2 = e2->AsProperty(); 1462 if ((p1 == NULL) || (p2 == NULL)) return false; 1463 Literal* key1 = p1->key()->AsLiteral(); 1464 Literal* key2 = p2->key()->AsLiteral(); 1465 if ((key1 == NULL) || (key2 == NULL)) return false; 1466 if (!key1->handle()->IsString() || !key2->handle()->IsString()) { 1467 return false; 1468 } 1469 String* name1 = String::cast(*key1->handle()); 1470 String* name2 = String::cast(*key2->handle()); 1471 if (!name1->Equals(name2)) return false; 1472 return SameObject(p1->obj(), p2->obj()); 1473 } 1474 1475 // Returns true if the expressions appear to denote different properties 1476 // of the same object. 1477 static bool PropertyOfSameObject(Expression* e1, Expression* e2) { 1478 Property* p1 = e1->AsProperty(); 1479 Property* p2 = e2->AsProperty(); 1480 if ((p1 == NULL) || (p2 == NULL)) return false; 1481 return SameObject(p1->obj(), p2->obj()); 1482 } 1483 1484 bool BlockContinues(Assignment* assignment) { 1485 if ((assignment == NULL) || (first_in_block_ == NULL)) return false; 1486 if (assignment->op() != Token::ASSIGN) return false; 1487 return PropertyOfSameObject(first_in_block_->target(), 1488 assignment->target()); 1489 } 1490 1491 void StartBlock(Assignment* assignment) { 1492 first_in_block_ = assignment; 1493 last_in_block_ = assignment; 1494 block_size_ = 1; 1495 } 1496 1497 void UpdateBlock(Assignment* assignment) { 1498 last_in_block_ = assignment; 1499 ++block_size_; 1500 } 1501 1502 void EndBlock() { 1503 if (block_size_ >= Parser::kMinInitializationBlock) { 1504 first_in_block_->mark_block_start(); 1505 last_in_block_->mark_block_end(); 1506 } 1507 last_in_block_ = first_in_block_ = NULL; 1508 block_size_ = 0; 1509 } 1510 1511 bool InBlock() { return first_in_block_ != NULL; } 1512 1513 Assignment* first_in_block_; 1514 Assignment* last_in_block_; 1515 int block_size_; 1516 1517 DISALLOW_COPY_AND_ASSIGN(InitializationBlockFinder); 1518 }; 1519 1520 1521 // A ThisNamedPropertyAssigmentFinder finds and marks statements of the form 1522 // this.x = ...;, where x is a named property. It also determines whether a 1523 // function contains only assignments of this type. 1524 class ThisNamedPropertyAssigmentFinder : public ParserFinder { 1525 public: 1526 ThisNamedPropertyAssigmentFinder() 1527 : only_simple_this_property_assignments_(true), 1528 names_(NULL), 1529 assigned_arguments_(NULL), 1530 assigned_constants_(NULL) {} 1531 1532 void Update(Scope* scope, Statement* stat) { 1533 // Bail out if function already has property assignment that are 1534 // not simple this property assignments. 1535 if (!only_simple_this_property_assignments_) { 1536 return; 1537 } 1538 1539 // Check whether this statement is of the form this.x = ...; 1540 Assignment* assignment = AsAssignment(stat); 1541 if (IsThisPropertyAssignment(assignment)) { 1542 HandleThisPropertyAssignment(scope, assignment); 1543 } else { 1544 only_simple_this_property_assignments_ = false; 1545 } 1546 } 1547 1548 // Returns whether only statements of the form this.x = y; where y is either a 1549 // constant or a function argument was encountered. 1550 bool only_simple_this_property_assignments() { 1551 return only_simple_this_property_assignments_; 1552 } 1553 1554 // Returns a fixed array containing three elements for each assignment of the 1555 // form this.x = y; 1556 Handle<FixedArray> GetThisPropertyAssignments() { 1557 if (names_ == NULL) { 1558 return Factory::empty_fixed_array(); 1559 } 1560 ASSERT(names_ != NULL); 1561 ASSERT(assigned_arguments_ != NULL); 1562 ASSERT_EQ(names_->length(), assigned_arguments_->length()); 1563 ASSERT_EQ(names_->length(), assigned_constants_->length()); 1564 Handle<FixedArray> assignments = 1565 Factory::NewFixedArray(names_->length() * 3); 1566 for (int i = 0; i < names_->length(); i++) { 1567 assignments->set(i * 3, *names_->at(i)); 1568 assignments->set(i * 3 + 1, Smi::FromInt(assigned_arguments_->at(i))); 1569 assignments->set(i * 3 + 2, *assigned_constants_->at(i)); 1570 } 1571 return assignments; 1572 } 1573 1574 private: 1575 bool IsThisPropertyAssignment(Assignment* assignment) { 1576 if (assignment != NULL) { 1577 Property* property = assignment->target()->AsProperty(); 1578 return assignment->op() == Token::ASSIGN 1579 && property != NULL 1580 && property->obj()->AsVariableProxy() != NULL 1581 && property->obj()->AsVariableProxy()->is_this(); 1582 } 1583 return false; 1584 } 1585 1586 void HandleThisPropertyAssignment(Scope* scope, Assignment* assignment) { 1587 // Check that the property assigned to is a named property. 1588 Property* property = assignment->target()->AsProperty(); 1589 ASSERT(property != NULL); 1590 Literal* literal = property->key()->AsLiteral(); 1591 uint32_t dummy; 1592 if (literal != NULL && 1593 literal->handle()->IsString() && 1594 !String::cast(*(literal->handle()))->AsArrayIndex(&dummy)) { 1595 Handle<String> key = Handle<String>::cast(literal->handle()); 1596 1597 // Check whether the value assigned is either a constant or matches the 1598 // name of one of the arguments to the function. 1599 if (assignment->value()->AsLiteral() != NULL) { 1600 // Constant assigned. 1601 Literal* literal = assignment->value()->AsLiteral(); 1602 AssignmentFromConstant(key, literal->handle()); 1603 return; 1604 } else if (assignment->value()->AsVariableProxy() != NULL) { 1605 // Variable assigned. 1606 Handle<String> name = 1607 assignment->value()->AsVariableProxy()->name(); 1608 // Check whether the variable assigned matches an argument name. 1609 for (int i = 0; i < scope->num_parameters(); i++) { 1610 if (*scope->parameter(i)->name() == *name) { 1611 // Assigned from function argument. 1612 AssignmentFromParameter(key, i); 1613 return; 1614 } 1615 } 1616 } 1617 } 1618 // It is not a simple "this.x = value;" assignment with a constant 1619 // or parameter value. 1620 AssignmentFromSomethingElse(); 1621 } 1622 1623 void AssignmentFromParameter(Handle<String> name, int index) { 1624 EnsureAllocation(); 1625 names_->Add(name); 1626 assigned_arguments_->Add(index); 1627 assigned_constants_->Add(Factory::undefined_value()); 1628 } 1629 1630 void AssignmentFromConstant(Handle<String> name, Handle<Object> value) { 1631 EnsureAllocation(); 1632 names_->Add(name); 1633 assigned_arguments_->Add(-1); 1634 assigned_constants_->Add(value); 1635 } 1636 1637 void AssignmentFromSomethingElse() { 1638 // The this assignment is not a simple one. 1639 only_simple_this_property_assignments_ = false; 1640 } 1641 1642 void EnsureAllocation() { 1643 if (names_ == NULL) { 1644 ASSERT(assigned_arguments_ == NULL); 1645 ASSERT(assigned_constants_ == NULL); 1646 names_ = new ZoneStringList(4); 1647 assigned_arguments_ = new ZoneList<int>(4); 1648 assigned_constants_ = new ZoneObjectList(4); 1649 } 1650 } 1651 1652 bool only_simple_this_property_assignments_; 1653 ZoneStringList* names_; 1654 ZoneList<int>* assigned_arguments_; 1655 ZoneObjectList* assigned_constants_; 1656 }; 1657 1658 1659 void* Parser::ParseSourceElements(ZoneListWrapper<Statement>* processor, 1660 int end_token, 1661 bool* ok) { 1662 // SourceElements :: 1663 // (Statement)* <end_token> 1664 1665 // Allocate a target stack to use for this set of source 1666 // elements. This way, all scripts and functions get their own 1667 // target stack thus avoiding illegal breaks and continues across 1668 // functions. 1669 TargetScope scope(this); 1670 1671 ASSERT(processor != NULL); 1672 InitializationBlockFinder block_finder; 1673 ThisNamedPropertyAssigmentFinder this_property_assignment_finder; 1674 while (peek() != end_token) { 1675 Statement* stat = ParseStatement(NULL, CHECK_OK); 1676 if (stat == NULL || stat->IsEmpty()) continue; 1677 // We find and mark the initialization blocks on top level code only. 1678 // This is because the optimization prevents reuse of the map transitions, 1679 // so it should be used only for code that will only be run once. 1680 if (top_scope_->is_global_scope()) { 1681 block_finder.Update(stat); 1682 } 1683 // Find and mark all assignments to named properties in this (this.x =) 1684 if (top_scope_->is_function_scope()) { 1685 this_property_assignment_finder.Update(top_scope_, stat); 1686 } 1687 processor->Add(stat); 1688 } 1689 1690 // Propagate the collected information on this property assignments. 1691 if (top_scope_->is_function_scope()) { 1692 bool only_simple_this_property_assignments = 1693 this_property_assignment_finder.only_simple_this_property_assignments() 1694 && top_scope_->declarations()->length() == 0; 1695 if (only_simple_this_property_assignments) { 1696 temp_scope_->SetThisPropertyAssignmentInfo( 1697 only_simple_this_property_assignments, 1698 this_property_assignment_finder.GetThisPropertyAssignments()); 1699 } 1700 } 1701 return 0; 1702 } 1703 1704 1705 Statement* Parser::ParseStatement(ZoneStringList* labels, bool* ok) { 1706 // Statement :: 1707 // Block 1708 // VariableStatement 1709 // EmptyStatement 1710 // ExpressionStatement 1711 // IfStatement 1712 // IterationStatement 1713 // ContinueStatement 1714 // BreakStatement 1715 // ReturnStatement 1716 // WithStatement 1717 // LabelledStatement 1718 // SwitchStatement 1719 // ThrowStatement 1720 // TryStatement 1721 // DebuggerStatement 1722 1723 // Note: Since labels can only be used by 'break' and 'continue' 1724 // statements, which themselves are only valid within blocks, 1725 // iterations or 'switch' statements (i.e., BreakableStatements), 1726 // labels can be simply ignored in all other cases; except for 1727 // trivial labeled break statements 'label: break label' which is 1728 // parsed into an empty statement. 1729 1730 // Keep the source position of the statement 1731 int statement_pos = scanner().peek_location().beg_pos; 1732 Statement* stmt = NULL; 1733 switch (peek()) { 1734 case Token::LBRACE: 1735 return ParseBlock(labels, ok); 1736 1737 case Token::CONST: // fall through 1738 case Token::VAR: 1739 stmt = ParseVariableStatement(ok); 1740 break; 1741 1742 case Token::SEMICOLON: 1743 Next(); 1744 return factory()->EmptyStatement(); 1745 1746 case Token::IF: 1747 stmt = ParseIfStatement(labels, ok); 1748 break; 1749 1750 case Token::DO: 1751 stmt = ParseDoWhileStatement(labels, ok); 1752 break; 1753 1754 case Token::WHILE: 1755 stmt = ParseWhileStatement(labels, ok); 1756 break; 1757 1758 case Token::FOR: 1759 stmt = ParseForStatement(labels, ok); 1760 break; 1761 1762 case Token::CONTINUE: 1763 stmt = ParseContinueStatement(ok); 1764 break; 1765 1766 case Token::BREAK: 1767 stmt = ParseBreakStatement(labels, ok); 1768 break; 1769 1770 case Token::RETURN: 1771 stmt = ParseReturnStatement(ok); 1772 break; 1773 1774 case Token::WITH: 1775 stmt = ParseWithStatement(labels, ok); 1776 break; 1777 1778 case Token::SWITCH: 1779 stmt = ParseSwitchStatement(labels, ok); 1780 break; 1781 1782 case Token::THROW: 1783 stmt = ParseThrowStatement(ok); 1784 break; 1785 1786 case Token::TRY: { 1787 // NOTE: It is somewhat complicated to have labels on 1788 // try-statements. When breaking out of a try-finally statement, 1789 // one must take great care not to treat it as a 1790 // fall-through. It is much easier just to wrap the entire 1791 // try-statement in a statement block and put the labels there 1792 Block* result = NEW(Block(labels, 1, false)); 1793 Target target(this, result); 1794 TryStatement* statement = ParseTryStatement(CHECK_OK); 1795 if (statement) { 1796 statement->set_statement_pos(statement_pos); 1797 } 1798 if (result) result->AddStatement(statement); 1799 return result; 1800 } 1801 1802 case Token::FUNCTION: 1803 return ParseFunctionDeclaration(ok); 1804 1805 case Token::NATIVE: 1806 return ParseNativeDeclaration(ok); 1807 1808 case Token::DEBUGGER: 1809 stmt = ParseDebuggerStatement(ok); 1810 break; 1811 1812 default: 1813 stmt = ParseExpressionOrLabelledStatement(labels, ok); 1814 } 1815 1816 // Store the source position of the statement 1817 if (stmt != NULL) stmt->set_statement_pos(statement_pos); 1818 return stmt; 1819 } 1820 1821 1822 VariableProxy* AstBuildingParser::Declare(Handle<String> name, 1823 Variable::Mode mode, 1824 FunctionLiteral* fun, 1825 bool resolve, 1826 bool* ok) { 1827 Variable* var = NULL; 1828 // If we are inside a function, a declaration of a variable 1829 // is a truly local variable, and the scope of the variable 1830 // is always the function scope. 1831 1832 // If a function scope exists, then we can statically declare this 1833 // variable and also set its mode. In any case, a Declaration node 1834 // will be added to the scope so that the declaration can be added 1835 // to the corresponding activation frame at runtime if necessary. 1836 // For instance declarations inside an eval scope need to be added 1837 // to the calling function context. 1838 if (top_scope_->is_function_scope()) { 1839 // Declare the variable in the function scope. 1840 var = top_scope_->LocalLookup(name); 1841 if (var == NULL) { 1842 // Declare the name. 1843 var = top_scope_->DeclareLocal(name, mode); 1844 } else { 1845 // The name was declared before; check for conflicting 1846 // re-declarations. If the previous declaration was a const or the 1847 // current declaration is a const then we have a conflict. There is 1848 // similar code in runtime.cc in the Declare functions. 1849 if ((mode == Variable::CONST) || (var->mode() == Variable::CONST)) { 1850 // We only have vars and consts in declarations. 1851 ASSERT(var->mode() == Variable::VAR || 1852 var->mode() == Variable::CONST); 1853 const char* type = (var->mode() == Variable::VAR) ? "var" : "const"; 1854 Handle<String> type_string = 1855 Factory::NewStringFromUtf8(CStrVector(type), TENURED); 1856 Expression* expression = 1857 NewThrowTypeError(Factory::redeclaration_symbol(), 1858 type_string, name); 1859 top_scope_->SetIllegalRedeclaration(expression); 1860 } 1861 } 1862 } 1863 1864 // We add a declaration node for every declaration. The compiler 1865 // will only generate code if necessary. In particular, declarations 1866 // for inner local variables that do not represent functions won't 1867 // result in any generated code. 1868 // 1869 // Note that we always add an unresolved proxy even if it's not 1870 // used, simply because we don't know in this method (w/o extra 1871 // parameters) if the proxy is needed or not. The proxy will be 1872 // bound during variable resolution time unless it was pre-bound 1873 // below. 1874 // 1875 // WARNING: This will lead to multiple declaration nodes for the 1876 // same variable if it is declared several times. This is not a 1877 // semantic issue as long as we keep the source order, but it may be 1878 // a performance issue since it may lead to repeated 1879 // Runtime::DeclareContextSlot() calls. 1880 VariableProxy* proxy = top_scope_->NewUnresolved(name, inside_with()); 1881 top_scope_->AddDeclaration(NEW(Declaration(proxy, mode, fun))); 1882 1883 // For global const variables we bind the proxy to a variable. 1884 if (mode == Variable::CONST && top_scope_->is_global_scope()) { 1885 ASSERT(resolve); // should be set by all callers 1886 Variable::Kind kind = Variable::NORMAL; 1887 var = NEW(Variable(top_scope_, name, Variable::CONST, true, kind)); 1888 } 1889 1890 // If requested and we have a local variable, bind the proxy to the variable 1891 // at parse-time. This is used for functions (and consts) declared inside 1892 // statements: the corresponding function (or const) variable must be in the 1893 // function scope and not a statement-local scope, e.g. as provided with a 1894 // 'with' statement: 1895 // 1896 // with (obj) { 1897 // function f() {} 1898 // } 1899 // 1900 // which is translated into: 1901 // 1902 // with (obj) { 1903 // // in this case this is not: 'var f; f = function () {};' 1904 // var f = function () {}; 1905 // } 1906 // 1907 // Note that if 'f' is accessed from inside the 'with' statement, it 1908 // will be allocated in the context (because we must be able to look 1909 // it up dynamically) but it will also be accessed statically, i.e., 1910 // with a context slot index and a context chain length for this 1911 // initialization code. Thus, inside the 'with' statement, we need 1912 // both access to the static and the dynamic context chain; the 1913 // runtime needs to provide both. 1914 if (resolve && var != NULL) proxy->BindTo(var); 1915 1916 return proxy; 1917 } 1918 1919 1920 // Language extension which is only enabled for source files loaded 1921 // through the API's extension mechanism. A native function 1922 // declaration is resolved by looking up the function through a 1923 // callback provided by the extension. 1924 Statement* Parser::ParseNativeDeclaration(bool* ok) { 1925 if (extension_ == NULL) { 1926 ReportUnexpectedToken(Token::NATIVE); 1927 *ok = false; 1928 return NULL; 1929 } 1930 1931 Expect(Token::NATIVE, CHECK_OK); 1932 Expect(Token::FUNCTION, CHECK_OK); 1933 Handle<String> name = ParseIdentifier(CHECK_OK); 1934 Expect(Token::LPAREN, CHECK_OK); 1935 bool done = (peek() == Token::RPAREN); 1936 while (!done) { 1937 ParseIdentifier(CHECK_OK); 1938 done = (peek() == Token::RPAREN); 1939 if (!done) Expect(Token::COMMA, CHECK_OK); 1940 } 1941 Expect(Token::RPAREN, CHECK_OK); 1942 Expect(Token::SEMICOLON, CHECK_OK); 1943 1944 if (is_pre_parsing_) return NULL; 1945 1946 // Make sure that the function containing the native declaration 1947 // isn't lazily compiled. The extension structures are only 1948 // accessible while parsing the first time not when reparsing 1949 // because of lazy compilation. 1950 top_scope_->ForceEagerCompilation(); 1951 1952 // Compute the function template for the native function. 1953 v8::Handle<v8::FunctionTemplate> fun_template = 1954 extension_->GetNativeFunction(v8::Utils::ToLocal(name)); 1955 ASSERT(!fun_template.IsEmpty()); 1956 1957 // Instantiate the function and create a boilerplate function from it. 1958 Handle<JSFunction> fun = Utils::OpenHandle(*fun_template->GetFunction()); 1959 const int literals = fun->NumberOfLiterals(); 1960 Handle<Code> code = Handle<Code>(fun->shared()->code()); 1961 Handle<Code> construct_stub = Handle<Code>(fun->shared()->construct_stub()); 1962 Handle<JSFunction> boilerplate = 1963 Factory::NewFunctionBoilerplate(name, literals, code); 1964 boilerplate->shared()->set_construct_stub(*construct_stub); 1965 1966 // Copy the function data to the boilerplate. Used by 1967 // builtins.cc:HandleApiCall to perform argument type checks and to 1968 // find the right native code to call. 1969 boilerplate->shared()->set_function_data(fun->shared()->function_data()); 1970 int parameters = fun->shared()->formal_parameter_count(); 1971 boilerplate->shared()->set_formal_parameter_count(parameters); 1972 1973 // TODO(1240846): It's weird that native function declarations are 1974 // introduced dynamically when we meet their declarations, whereas 1975 // other functions are setup when entering the surrounding scope. 1976 FunctionBoilerplateLiteral* lit = 1977 NEW(FunctionBoilerplateLiteral(boilerplate)); 1978 VariableProxy* var = Declare(name, Variable::VAR, NULL, true, CHECK_OK); 1979 return NEW(ExpressionStatement( 1980 new Assignment(Token::INIT_VAR, var, lit, RelocInfo::kNoPosition))); 1981 } 1982 1983 1984 Statement* Parser::ParseFunctionDeclaration(bool* ok) { 1985 // FunctionDeclaration :: 1986 // 'function' Identifier '(' FormalParameterListopt ')' '{' FunctionBody '}' 1987 Expect(Token::FUNCTION, CHECK_OK); 1988 int function_token_position = scanner().location().beg_pos; 1989 Handle<String> name = ParseIdentifier(CHECK_OK); 1990 FunctionLiteral* fun = ParseFunctionLiteral(name, 1991 function_token_position, 1992 DECLARATION, 1993 CHECK_OK); 1994 // Even if we're not at the top-level of the global or a function 1995 // scope, we treat is as such and introduce the function with it's 1996 // initial value upon entering the corresponding scope. 1997 Declare(name, Variable::VAR, fun, true, CHECK_OK); 1998 return factory()->EmptyStatement(); 1999 } 2000 2001 2002 Block* Parser::ParseBlock(ZoneStringList* labels, bool* ok) { 2003 // Block :: 2004 // '{' Statement* '}' 2005 2006 // Note that a Block does not introduce a new execution scope! 2007 // (ECMA-262, 3rd, 12.2) 2008 // 2009 // Construct block expecting 16 statements. 2010 Block* result = NEW(Block(labels, 16, false)); 2011 Target target(this, result); 2012 Expect(Token::LBRACE, CHECK_OK); 2013 while (peek() != Token::RBRACE) { 2014 Statement* stat = ParseStatement(NULL, CHECK_OK); 2015 if (stat && !stat->IsEmpty()) result->AddStatement(stat); 2016 } 2017 Expect(Token::RBRACE, CHECK_OK); 2018 return result; 2019 } 2020 2021 2022 Block* Parser::ParseVariableStatement(bool* ok) { 2023 // VariableStatement :: 2024 // VariableDeclarations ';' 2025 2026 Expression* dummy; // to satisfy the ParseVariableDeclarations() signature 2027 Block* result = ParseVariableDeclarations(true, &dummy, CHECK_OK); 2028 ExpectSemicolon(CHECK_OK); 2029 return result; 2030 } 2031 2032 2033 // If the variable declaration declares exactly one non-const 2034 // variable, then *var is set to that variable. In all other cases, 2035 // *var is untouched; in particular, it is the caller's responsibility 2036 // to initialize it properly. This mechanism is used for the parsing 2037 // of 'for-in' loops. 2038 Block* Parser::ParseVariableDeclarations(bool accept_IN, 2039 Expression** var, 2040 bool* ok) { 2041 // VariableDeclarations :: 2042 // ('var' | 'const') (Identifier ('=' AssignmentExpression)?)+[','] 2043 2044 Variable::Mode mode = Variable::VAR; 2045 bool is_const = false; 2046 if (peek() == Token::VAR) { 2047 Consume(Token::VAR); 2048 } else if (peek() == Token::CONST) { 2049 Consume(Token::CONST); 2050 mode = Variable::CONST; 2051 is_const = true; 2052 } else { 2053 UNREACHABLE(); // by current callers 2054 } 2055 2056 // The scope of a variable/const declared anywhere inside a function 2057 // is the entire function (ECMA-262, 3rd, 10.1.3, and 12.2). Thus we can 2058 // transform a source-level variable/const declaration into a (Function) 2059 // Scope declaration, and rewrite the source-level initialization into an 2060 // assignment statement. We use a block to collect multiple assignments. 2061 // 2062 // We mark the block as initializer block because we don't want the 2063 // rewriter to add a '.result' assignment to such a block (to get compliant 2064 // behavior for code such as print(eval('var x = 7')), and for cosmetic 2065 // reasons when pretty-printing. Also, unless an assignment (initialization) 2066 // is inside an initializer block, it is ignored. 2067 // 2068 // Create new block with one expected declaration. 2069 Block* block = NEW(Block(NULL, 1, true)); 2070 VariableProxy* last_var = NULL; // the last variable declared 2071 int nvars = 0; // the number of variables declared 2072 do { 2073 // Parse variable name. 2074 if (nvars > 0) Consume(Token::COMMA); 2075 Handle<String> name = ParseIdentifier(CHECK_OK); 2076 2077 // Declare variable. 2078 // Note that we *always* must treat the initial value via a separate init 2079 // assignment for variables and constants because the value must be assigned 2080 // when the variable is encountered in the source. But the variable/constant 2081 // is declared (and set to 'undefined') upon entering the function within 2082 // which the variable or constant is declared. Only function variables have 2083 // an initial value in the declaration (because they are initialized upon 2084 // entering the function). 2085 // 2086 // If we have a const declaration, in an inner scope, the proxy is always 2087 // bound to the declared variable (independent of possibly surrounding with 2088 // statements). 2089 last_var = Declare(name, mode, NULL, 2090 is_const /* always bound for CONST! */, 2091 CHECK_OK); 2092 nvars++; 2093 2094 // Parse initialization expression if present and/or needed. A 2095 // declaration of the form: 2096 // 2097 // var v = x; 2098 // 2099 // is syntactic sugar for: 2100 // 2101 // var v; v = x; 2102 // 2103 // In particular, we need to re-lookup 'v' as it may be a 2104 // different 'v' than the 'v' in the declaration (if we are inside 2105 // a 'with' statement that makes a object property with name 'v' 2106 // visible). 2107 // 2108 // However, note that const declarations are different! A const 2109 // declaration of the form: 2110 // 2111 // const c = x; 2112 // 2113 // is *not* syntactic sugar for: 2114 // 2115 // const c; c = x; 2116 // 2117 // The "variable" c initialized to x is the same as the declared 2118 // one - there is no re-lookup (see the last parameter of the 2119 // Declare() call above). 2120 2121 Expression* value = NULL; 2122 int position = -1; 2123 if (peek() == Token::ASSIGN) { 2124 Expect(Token::ASSIGN, CHECK_OK); 2125 position = scanner().location().beg_pos; 2126 value = ParseAssignmentExpression(accept_IN, CHECK_OK); 2127 } 2128 2129 // Make sure that 'const c' actually initializes 'c' to undefined 2130 // even though it seems like a stupid thing to do. 2131 if (value == NULL && is_const) { 2132 value = GetLiteralUndefined(); 2133 } 2134 2135 // Global variable declarations must be compiled in a specific 2136 // way. When the script containing the global variable declaration 2137 // is entered, the global variable must be declared, so that if it 2138 // doesn't exist (not even in a prototype of the global object) it 2139 // gets created with an initial undefined value. This is handled 2140 // by the declarations part of the function representing the 2141 // top-level global code; see Runtime::DeclareGlobalVariable. If 2142 // it already exists (in the object or in a prototype), it is 2143 // *not* touched until the variable declaration statement is 2144 // executed. 2145 // 2146 // Executing the variable declaration statement will always 2147 // guarantee to give the global object a "local" variable; a 2148 // variable defined in the global object and not in any 2149 // prototype. This way, global variable declarations can shadow 2150 // properties in the prototype chain, but only after the variable 2151 // declaration statement has been executed. This is important in 2152 // browsers where the global object (window) has lots of 2153 // properties defined in prototype objects. 2154 2155 if (!is_pre_parsing_ && top_scope_->is_global_scope()) { 2156 // Compute the arguments for the runtime call. 2157 ZoneList<Expression*>* arguments = new ZoneList<Expression*>(2); 2158 // Be careful not to assign a value to the global variable if 2159 // we're in a with. The initialization value should not 2160 // necessarily be stored in the global object in that case, 2161 // which is why we need to generate a separate assignment node. 2162 arguments->Add(NEW(Literal(name))); // we have at least 1 parameter 2163 if (is_const || (value != NULL && !inside_with())) { 2164 arguments->Add(value); 2165 value = NULL; // zap the value to avoid the unnecessary assignment 2166 } 2167 // Construct the call to Runtime::DeclareGlobal{Variable,Const}Locally 2168 // and add it to the initialization statement block. Note that 2169 // this function does different things depending on if we have 2170 // 1 or 2 parameters. 2171 CallRuntime* initialize; 2172 if (is_const) { 2173 initialize = 2174 NEW(CallRuntime( 2175 Factory::InitializeConstGlobal_symbol(), 2176 Runtime::FunctionForId(Runtime::kInitializeConstGlobal), 2177 arguments)); 2178 } else { 2179 initialize = 2180 NEW(CallRuntime( 2181 Factory::InitializeVarGlobal_symbol(), 2182 Runtime::FunctionForId(Runtime::kInitializeVarGlobal), 2183 arguments)); 2184 } 2185 block->AddStatement(NEW(ExpressionStatement(initialize))); 2186 } 2187 2188 // Add an assignment node to the initialization statement block if 2189 // we still have a pending initialization value. We must distinguish 2190 // between variables and constants: Variable initializations are simply 2191 // assignments (with all the consequences if they are inside a 'with' 2192 // statement - they may change a 'with' object property). Constant 2193 // initializations always assign to the declared constant which is 2194 // always at the function scope level. This is only relevant for 2195 // dynamically looked-up variables and constants (the start context 2196 // for constant lookups is always the function context, while it is 2197 // the top context for variables). Sigh... 2198 if (value != NULL) { 2199 Token::Value op = (is_const ? Token::INIT_CONST : Token::INIT_VAR); 2200 Assignment* assignment = NEW(Assignment(op, last_var, value, position)); 2201 if (block) block->AddStatement(NEW(ExpressionStatement(assignment))); 2202 } 2203 } while (peek() == Token::COMMA); 2204 2205 if (!is_const && nvars == 1) { 2206 // We have a single, non-const variable. 2207 if (is_pre_parsing_) { 2208 // If we're preparsing then we need to set the var to something 2209 // in order for for-in loops to parse correctly. 2210 *var = ValidLeftHandSideSentinel::instance(); 2211 } else { 2212 ASSERT(last_var != NULL); 2213 *var = last_var; 2214 } 2215 } 2216 2217 return block; 2218 } 2219 2220 2221 static bool ContainsLabel(ZoneStringList* labels, Handle<String> label) { 2222 ASSERT(!label.is_null()); 2223 if (labels != NULL) 2224 for (int i = labels->length(); i-- > 0; ) 2225 if (labels->at(i).is_identical_to(label)) 2226 return true; 2227 2228 return false; 2229 } 2230 2231 2232 Statement* Parser::ParseExpressionOrLabelledStatement(ZoneStringList* labels, 2233 bool* ok) { 2234 // ExpressionStatement | LabelledStatement :: 2235 // Expression ';' 2236 // Identifier ':' Statement 2237 2238 Expression* expr = ParseExpression(true, CHECK_OK); 2239 if (peek() == Token::COLON && expr && 2240 expr->AsVariableProxy() != NULL && 2241 !expr->AsVariableProxy()->is_this()) { 2242 VariableProxy* var = expr->AsVariableProxy(); 2243 Handle<String> label = var->name(); 2244 // TODO(1240780): We don't check for redeclaration of labels 2245 // during preparsing since keeping track of the set of active 2246 // labels requires nontrivial changes to the way scopes are 2247 // structured. However, these are probably changes we want to 2248 // make later anyway so we should go back and fix this then. 2249 if (!is_pre_parsing_) { 2250 if (ContainsLabel(labels, label) || TargetStackContainsLabel(label)) { 2251 SmartPointer<char> c_string = label->ToCString(DISALLOW_NULLS); 2252 const char* elms[2] = { "Label", *c_string }; 2253 Vector<const char*> args(elms, 2); 2254 ReportMessage("redeclaration", args); 2255 *ok = false; 2256 return NULL; 2257 } 2258 if (labels == NULL) labels = new ZoneStringList(4); 2259 labels->Add(label); 2260 // Remove the "ghost" variable that turned out to be a label 2261 // from the top scope. This way, we don't try to resolve it 2262 // during the scope processing. 2263 top_scope_->RemoveUnresolved(var); 2264 } 2265 Expect(Token::COLON, CHECK_OK); 2266 return ParseStatement(labels, ok); 2267 } 2268 2269 // Parsed expression statement. 2270 ExpectSemicolon(CHECK_OK); 2271 return NEW(ExpressionStatement(expr)); 2272 } 2273 2274 2275 IfStatement* Parser::ParseIfStatement(ZoneStringList* labels, bool* ok) { 2276 // IfStatement :: 2277 // 'if' '(' Expression ')' Statement ('else' Statement)? 2278 2279 Expect(Token::IF, CHECK_OK); 2280 Expect(Token::LPAREN, CHECK_OK); 2281 Expression* condition = ParseExpression(true, CHECK_OK); 2282 Expect(Token::RPAREN, CHECK_OK); 2283 Statement* then_statement = ParseStatement(labels, CHECK_OK); 2284 Statement* else_statement = NULL; 2285 if (peek() == Token::ELSE) { 2286 Next(); 2287 else_statement = ParseStatement(labels, CHECK_OK); 2288 } else if (!is_pre_parsing_) { 2289 else_statement = factory()->EmptyStatement(); 2290 } 2291 return NEW(IfStatement(condition, then_statement, else_statement)); 2292 } 2293 2294 2295 Statement* Parser::ParseContinueStatement(bool* ok) { 2296 // ContinueStatement :: 2297 // 'continue' Identifier? ';' 2298 2299 Expect(Token::CONTINUE, CHECK_OK); 2300 Handle<String> label = Handle<String>::null(); 2301 Token::Value tok = peek(); 2302 if (!scanner_.has_line_terminator_before_next() && 2303 tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) { 2304 label = ParseIdentifier(CHECK_OK); 2305 } 2306 IterationStatement* target = NULL; 2307 if (!is_pre_parsing_) { 2308 target = LookupContinueTarget(label, CHECK_OK); 2309 if (target == NULL) { 2310 // Illegal continue statement. To be consistent with KJS we delay 2311 // reporting of the syntax error until runtime. 2312 Handle<String> error_type = Factory::illegal_continue_symbol(); 2313 if (!label.is_null()) error_type = Factory::unknown_label_symbol(); 2314 Expression* throw_error = NewThrowSyntaxError(error_type, label); 2315 return NEW(ExpressionStatement(throw_error)); 2316 } 2317 } 2318 ExpectSemicolon(CHECK_OK); 2319 return NEW(ContinueStatement(target)); 2320 } 2321 2322 2323 Statement* Parser::ParseBreakStatement(ZoneStringList* labels, bool* ok) { 2324 // BreakStatement :: 2325 // 'break' Identifier? ';' 2326 2327 Expect(Token::BREAK, CHECK_OK); 2328 Handle<String> label; 2329 Token::Value tok = peek(); 2330 if (!scanner_.has_line_terminator_before_next() && 2331 tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) { 2332 label = ParseIdentifier(CHECK_OK); 2333 } 2334 // Parse labeled break statements that target themselves into 2335 // empty statements, e.g. 'l1: l2: l3: break l2;' 2336 if (!label.is_null() && ContainsLabel(labels, label)) { 2337 return factory()->EmptyStatement(); 2338 } 2339 BreakableStatement* target = NULL; 2340 if (!is_pre_parsing_) { 2341 target = LookupBreakTarget(label, CHECK_OK); 2342 if (target == NULL) { 2343 // Illegal break statement. To be consistent with KJS we delay 2344 // reporting of the syntax error until runtime. 2345 Handle<String> error_type = Factory::illegal_break_symbol(); 2346 if (!label.is_null()) error_type = Factory::unknown_label_symbol(); 2347 Expression* throw_error = NewThrowSyntaxError(error_type, label); 2348 return NEW(ExpressionStatement(throw_error)); 2349 } 2350 } 2351 ExpectSemicolon(CHECK_OK); 2352 return NEW(BreakStatement(target)); 2353 } 2354 2355 2356 Statement* Parser::ParseReturnStatement(bool* ok) { 2357 // ReturnStatement :: 2358 // 'return' Expression? ';' 2359 2360 // Consume the return token. It is necessary to do the before 2361 // reporting any errors on it, because of the way errors are 2362 // reported (underlining). 2363 Expect(Token::RETURN, CHECK_OK); 2364 2365 // An ECMAScript program is considered syntactically incorrect if it 2366 // contains a return statement that is not within the body of a 2367 // function. See ECMA-262, section 12.9, page 67. 2368 // 2369 // To be consistent with KJS we report the syntax error at runtime. 2370 if (!is_pre_parsing_ && !top_scope_->is_function_scope()) { 2371 Handle<String> type = Factory::illegal_return_symbol(); 2372 Expression* throw_error = NewThrowSyntaxError(type, Handle<Object>::null()); 2373 return NEW(ExpressionStatement(throw_error)); 2374 } 2375 2376 Token::Value tok = peek(); 2377 if (scanner_.has_line_terminator_before_next() || 2378 tok == Token::SEMICOLON || 2379 tok == Token::RBRACE || 2380 tok == Token::EOS) { 2381 ExpectSemicolon(CHECK_OK); 2382 return NEW(ReturnStatement(GetLiteralUndefined())); 2383 } 2384 2385 Expression* expr = ParseExpression(true, CHECK_OK); 2386 ExpectSemicolon(CHECK_OK); 2387 return NEW(ReturnStatement(expr)); 2388 } 2389 2390 2391 Block* Parser::WithHelper(Expression* obj, 2392 ZoneStringList* labels, 2393 bool is_catch_block, 2394 bool* ok) { 2395 // Parse the statement and collect escaping labels. 2396 ZoneList<BreakTarget*>* target_list = NEW(ZoneList<BreakTarget*>(0)); 2397 TargetCollector collector(target_list); 2398 Statement* stat; 2399 { Target target(this, &collector); 2400 with_nesting_level_++; 2401 top_scope_->RecordWithStatement(); 2402 stat = ParseStatement(labels, CHECK_OK); 2403 with_nesting_level_--; 2404 } 2405 // Create resulting block with two statements. 2406 // 1: Evaluate the with expression. 2407 // 2: The try-finally block evaluating the body. 2408 Block* result = NEW(Block(NULL, 2, false)); 2409 2410 if (result != NULL) { 2411 result->AddStatement(NEW(WithEnterStatement(obj, is_catch_block))); 2412 2413 // Create body block. 2414 Block* body = NEW(Block(NULL, 1, false)); 2415 body->AddStatement(stat); 2416 2417 // Create exit block. 2418 Block* exit = NEW(Block(NULL, 1, false)); 2419 exit->AddStatement(NEW(WithExitStatement())); 2420 2421 // Return a try-finally statement. 2422 TryFinallyStatement* wrapper = NEW(TryFinallyStatement(body, exit)); 2423 wrapper->set_escaping_targets(collector.targets()); 2424 result->AddStatement(wrapper); 2425 } 2426 return result; 2427 } 2428 2429 2430 Statement* Parser::ParseWithStatement(ZoneStringList* labels, bool* ok) { 2431 // WithStatement :: 2432 // 'with' '(' Expression ')' Statement 2433 2434 Expect(Token::WITH, CHECK_OK); 2435 Expect(Token::LPAREN, CHECK_OK); 2436 Expression* expr = ParseExpression(true, CHECK_OK); 2437 Expect(Token::RPAREN, CHECK_OK); 2438 2439 return WithHelper(expr, labels, false, CHECK_OK); 2440 } 2441 2442 2443 CaseClause* Parser::ParseCaseClause(bool* default_seen_ptr, bool* ok) { 2444 // CaseClause :: 2445 // 'case' Expression ':' Statement* 2446 // 'default' ':' Statement* 2447 2448 Expression* label = NULL; // NULL expression indicates default case 2449 if (peek() == Token::CASE) { 2450 Expect(Token::CASE, CHECK_OK); 2451 label = ParseExpression(true, CHECK_OK); 2452 } else { 2453 Expect(Token::DEFAULT, CHECK_OK); 2454 if (*default_seen_ptr) { 2455 ReportMessage("multiple_defaults_in_switch", 2456 Vector<const char*>::empty()); 2457 *ok = false; 2458 return NULL; 2459 } 2460 *default_seen_ptr = true; 2461 } 2462 Expect(Token::COLON, CHECK_OK); 2463 2464 ZoneListWrapper<Statement> statements = factory()->NewList<Statement>(5); 2465 while (peek() != Token::CASE && 2466 peek() != Token::DEFAULT && 2467 peek() != Token::RBRACE) { 2468 Statement* stat = ParseStatement(NULL, CHECK_OK); 2469 statements.Add(stat); 2470 } 2471 2472 return NEW(CaseClause(label, statements.elements())); 2473 } 2474 2475 2476 SwitchStatement* Parser::ParseSwitchStatement(ZoneStringList* labels, 2477 bool* ok) { 2478 // SwitchStatement :: 2479 // 'switch' '(' Expression ')' '{' CaseClause* '}' 2480 2481 SwitchStatement* statement = NEW(SwitchStatement(labels)); 2482 Target target(this, statement); 2483 2484 Expect(Token::SWITCH, CHECK_OK); 2485 Expect(Token::LPAREN, CHECK_OK); 2486 Expression* tag = ParseExpression(true, CHECK_OK); 2487 Expect(Token::RPAREN, CHECK_OK); 2488 2489 bool default_seen = false; 2490 ZoneListWrapper<CaseClause> cases = factory()->NewList<CaseClause>(4); 2491 Expect(Token::LBRACE, CHECK_OK); 2492 while (peek() != Token::RBRACE) { 2493 CaseClause* clause = ParseCaseClause(&default_seen, CHECK_OK); 2494 cases.Add(clause); 2495 } 2496 Expect(Token::RBRACE, CHECK_OK); 2497 2498 if (statement) statement->Initialize(tag, cases.elements()); 2499 return statement; 2500 } 2501 2502 2503 Statement* Parser::ParseThrowStatement(bool* ok) { 2504 // ThrowStatement :: 2505 // 'throw' Expression ';' 2506 2507 Expect(Token::THROW, CHECK_OK); 2508 int pos = scanner().location().beg_pos; 2509 if (scanner_.has_line_terminator_before_next()) { 2510 ReportMessage("newline_after_throw", Vector<const char*>::empty()); 2511 *ok = false; 2512 return NULL; 2513 } 2514 Expression* exception = ParseExpression(true, CHECK_OK); 2515 ExpectSemicolon(CHECK_OK); 2516 2517 return NEW(ExpressionStatement(new Throw(exception, pos))); 2518 } 2519 2520 2521 TryStatement* Parser::ParseTryStatement(bool* ok) { 2522 // TryStatement :: 2523 // 'try' Block Catch 2524 // 'try' Block Finally 2525 // 'try' Block Catch Finally 2526 // 2527 // Catch :: 2528 // 'catch' '(' Identifier ')' Block 2529 // 2530 // Finally :: 2531 // 'finally' Block 2532 2533 Expect(Token::TRY, CHECK_OK); 2534 2535 ZoneList<BreakTarget*>* target_list = NEW(ZoneList<BreakTarget*>(0)); 2536 TargetCollector collector(target_list); 2537 Block* try_block; 2538 2539 { Target target(this, &collector); 2540 try_block = ParseBlock(NULL, CHECK_OK); 2541 } 2542 2543 Block* catch_block = NULL; 2544 VariableProxy* catch_var = NULL; 2545 Block* finally_block = NULL; 2546 2547 Token::Value tok = peek(); 2548 if (tok != Token::CATCH && tok != Token::FINALLY) { 2549 ReportMessage("no_catch_or_finally", Vector<const char*>::empty()); 2550 *ok = false; 2551 return NULL; 2552 } 2553 2554 // If we can break out from the catch block and there is a finally block, 2555 // then we will need to collect jump targets from the catch block. Since 2556 // we don't know yet if there will be a finally block, we always collect 2557 // the jump targets. 2558 ZoneList<BreakTarget*>* catch_target_list = NEW(ZoneList<BreakTarget*>(0)); 2559 TargetCollector catch_collector(catch_target_list); 2560 bool has_catch = false; 2561 if (tok == Token::CATCH) { 2562 has_catch = true; 2563 Consume(Token::CATCH); 2564 2565 Expect(Token::LPAREN, CHECK_OK); 2566 Handle<String> name = ParseIdentifier(CHECK_OK); 2567 Expect(Token::RPAREN, CHECK_OK); 2568 2569 if (peek() == Token::LBRACE) { 2570 // Allocate a temporary for holding the finally state while 2571 // executing the finally block. 2572 catch_var = top_scope_->NewTemporary(Factory::catch_var_symbol()); 2573 Literal* name_literal = NEW(Literal(name)); 2574 Expression* obj = NEW(CatchExtensionObject(name_literal, catch_var)); 2575 { Target target(this, &catch_collector); 2576 catch_block = WithHelper(obj, NULL, true, CHECK_OK); 2577 } 2578 } else { 2579 Expect(Token::LBRACE, CHECK_OK); 2580 } 2581 2582 tok = peek(); 2583 } 2584 2585 if (tok == Token::FINALLY || !has_catch) { 2586 Consume(Token::FINALLY); 2587 // Declare a variable for holding the finally state while 2588 // executing the finally block. 2589 finally_block = ParseBlock(NULL, CHECK_OK); 2590 } 2591 2592 // Simplify the AST nodes by converting: 2593 // 'try { } catch { } finally { }' 2594 // to: 2595 // 'try { try { } catch { } } finally { }' 2596 2597 if (!is_pre_parsing_ && catch_block != NULL && finally_block != NULL) { 2598 TryCatchStatement* statement = 2599 NEW(TryCatchStatement(try_block, catch_var, catch_block)); 2600 statement->set_escaping_targets(collector.targets()); 2601 try_block = NEW(Block(NULL, 1, false)); 2602 try_block->AddStatement(statement); 2603 catch_block = NULL; 2604 } 2605 2606 TryStatement* result = NULL; 2607 if (!is_pre_parsing_) { 2608 if (catch_block != NULL) { 2609 ASSERT(finally_block == NULL); 2610 result = NEW(TryCatchStatement(try_block, catch_var, catch_block)); 2611 result->set_escaping_targets(collector.targets()); 2612 } else { 2613 ASSERT(finally_block != NULL); 2614 result = NEW(TryFinallyStatement(try_block, finally_block)); 2615 // Add the jump targets of the try block and the catch block. 2616 for (int i = 0; i < collector.targets()->length(); i++) { 2617 catch_collector.AddTarget(collector.targets()->at(i)); 2618 } 2619 result->set_escaping_targets(catch_collector.targets()); 2620 } 2621 } 2622 2623 return result; 2624 } 2625 2626 2627 DoWhileStatement* Parser::ParseDoWhileStatement(ZoneStringList* labels, 2628 bool* ok) { 2629 // DoStatement :: 2630 // 'do' Statement 'while' '(' Expression ')' ';' 2631 2632 DoWhileStatement* loop = NEW(DoWhileStatement(labels)); 2633 Target target(this, loop); 2634 2635 Expect(Token::DO, CHECK_OK); 2636 Statement* body = ParseStatement(NULL, CHECK_OK); 2637 Expect(Token::WHILE, CHECK_OK); 2638 Expect(Token::LPAREN, CHECK_OK); 2639 2640 if (loop != NULL) { 2641 int position = scanner().location().beg_pos; 2642 loop->set_condition_position(position); 2643 } 2644 2645 Expression* cond = ParseExpression(true, CHECK_OK); 2646 Expect(Token::RPAREN, CHECK_OK); 2647 2648 // Allow do-statements to be terminated with and without 2649 // semi-colons. This allows code such as 'do;while(0)return' to 2650 // parse, which would not be the case if we had used the 2651 // ExpectSemicolon() functionality here. 2652 if (peek() == Token::SEMICOLON) Consume(Token::SEMICOLON); 2653 2654 if (loop != NULL) loop->Initialize(cond, body); 2655 return loop; 2656 } 2657 2658 2659 WhileStatement* Parser::ParseWhileStatement(ZoneStringList* labels, bool* ok) { 2660 // WhileStatement :: 2661 // 'while' '(' Expression ')' Statement 2662 2663 WhileStatement* loop = NEW(WhileStatement(labels)); 2664 Target target(this, loop); 2665 2666 Expect(Token::WHILE, CHECK_OK); 2667 Expect(Token::LPAREN, CHECK_OK); 2668 Expression* cond = ParseExpression(true, CHECK_OK); 2669 Expect(Token::RPAREN, CHECK_OK); 2670 Statement* body = ParseStatement(NULL, CHECK_OK); 2671 2672 if (loop != NULL) loop->Initialize(cond, body); 2673 return loop; 2674 } 2675 2676 2677 Statement* Parser::ParseForStatement(ZoneStringList* labels, bool* ok) { 2678 // ForStatement :: 2679 // 'for' '(' Expression? ';' Expression? ';' Expression? ')' Statement 2680 2681 Statement* init = NULL; 2682 2683 Expect(Token::FOR, CHECK_OK); 2684 Expect(Token::LPAREN, CHECK_OK); 2685 if (peek() != Token::SEMICOLON) { 2686 if (peek() == Token::VAR || peek() == Token::CONST) { 2687 Expression* each = NULL; 2688 Block* variable_statement = 2689 ParseVariableDeclarations(false, &each, CHECK_OK); 2690 if (peek() == Token::IN && each != NULL) { 2691 ForInStatement* loop = NEW(ForInStatement(labels)); 2692 Target target(this, loop); 2693 2694 Expect(Token::IN, CHECK_OK); 2695 Expression* enumerable = ParseExpression(true, CHECK_OK); 2696 Expect(Token::RPAREN, CHECK_OK); 2697 2698 Statement* body = ParseStatement(NULL, CHECK_OK); 2699 if (is_pre_parsing_) { 2700 return NULL; 2701 } else { 2702 loop->Initialize(each, enumerable, body); 2703 Block* result = NEW(Block(NULL, 2, false)); 2704 result->AddStatement(variable_statement); 2705 result->AddStatement(loop); 2706 // Parsed for-in loop w/ variable/const declaration. 2707 return result; 2708 } 2709 2710 } else { 2711 init = variable_statement; 2712 } 2713 2714 } else { 2715 Expression* expression = ParseExpression(false, CHECK_OK); 2716 if (peek() == Token::IN) { 2717 // Signal a reference error if the expression is an invalid 2718 // left-hand side expression. We could report this as a syntax 2719 // error here but for compatibility with JSC we choose to report 2720 // the error at runtime. 2721 if (expression == NULL || !expression->IsValidLeftHandSide()) { 2722 Handle<String> type = Factory::invalid_lhs_in_for_in_symbol(); 2723 expression = NewThrowReferenceError(type); 2724 } 2725 ForInStatement* loop = NEW(ForInStatement(labels)); 2726 Target target(this, loop); 2727 2728 Expect(Token::IN, CHECK_OK); 2729 Expression* enumerable = ParseExpression(true, CHECK_OK); 2730 Expect(Token::RPAREN, CHECK_OK); 2731 2732 Statement* body = ParseStatement(NULL, CHECK_OK); 2733 if (loop) loop->Initialize(expression, enumerable, body); 2734 2735 // Parsed for-in loop. 2736 return loop; 2737 2738 } else { 2739 init = NEW(ExpressionStatement(expression)); 2740 } 2741 } 2742 } 2743 2744 // Standard 'for' loop 2745 ForStatement* loop = NEW(ForStatement(labels)); 2746 Target target(this, loop); 2747 2748 // Parsed initializer at this point. 2749 Expect(Token::SEMICOLON, CHECK_OK); 2750 2751 Expression* cond = NULL; 2752 if (peek() != Token::SEMICOLON) { 2753 cond = ParseExpression(true, CHECK_OK); 2754 if (cond && cond->AsCompareOperation()) { 2755 cond->AsCompareOperation()->set_is_for_loop_condition(); 2756 } 2757 } 2758 Expect(Token::SEMICOLON, CHECK_OK); 2759 2760 Statement* next = NULL; 2761 if (peek() != Token::RPAREN) { 2762 Expression* exp = ParseExpression(true, CHECK_OK); 2763 next = NEW(ExpressionStatement(exp)); 2764 } 2765 Expect(Token::RPAREN, CHECK_OK); 2766 2767 Statement* body = ParseStatement(NULL, CHECK_OK); 2768 2769 if (loop) loop->Initialize(init, cond, next, body); 2770 return loop; 2771 } 2772 2773 2774 // Precedence = 1 2775 Expression* Parser::ParseExpression(bool accept_IN, bool* ok) { 2776 // Expression :: 2777 // AssignmentExpression 2778 // Expression ',' AssignmentExpression 2779 2780 Expression* result = ParseAssignmentExpression(accept_IN, CHECK_OK); 2781 while (peek() == Token::COMMA) { 2782 Expect(Token::COMMA, CHECK_OK); 2783 Expression* right = ParseAssignmentExpression(accept_IN, CHECK_OK); 2784 result = NEW(BinaryOperation(Token::COMMA, result, right)); 2785 } 2786 return result; 2787 } 2788 2789 2790 // Precedence = 2 2791 Expression* Parser::ParseAssignmentExpression(bool accept_IN, bool* ok) { 2792 // AssignmentExpression :: 2793 // ConditionalExpression 2794 // LeftHandSideExpression AssignmentOperator AssignmentExpression 2795 2796 Expression* expression = ParseConditionalExpression(accept_IN, CHECK_OK); 2797 2798 if (!Token::IsAssignmentOp(peek())) { 2799 // Parsed conditional expression only (no assignment). 2800 return expression; 2801 } 2802 2803 // Signal a reference error if the expression is an invalid left-hand 2804 // side expression. We could report this as a syntax error here but 2805 // for compatibility with JSC we choose to report the error at 2806 // runtime. 2807 if (expression == NULL || !expression->IsValidLeftHandSide()) { 2808 Handle<String> type = Factory::invalid_lhs_in_assignment_symbol(); 2809 expression = NewThrowReferenceError(type); 2810 } 2811 2812 Token::Value op = Next(); // Get assignment operator. 2813 int pos = scanner().location().beg_pos; 2814 Expression* right = ParseAssignmentExpression(accept_IN, CHECK_OK); 2815 2816 // TODO(1231235): We try to estimate the set of properties set by 2817 // constructors. We define a new property whenever there is an 2818 // assignment to a property of 'this'. We should probably only add 2819 // properties if we haven't seen them before. Otherwise we'll 2820 // probably overestimate the number of properties. 2821 Property* property = expression ? expression->AsProperty() : NULL; 2822 if (op == Token::ASSIGN && 2823 property != NULL && 2824 property->obj()->AsVariableProxy() != NULL && 2825 property->obj()->AsVariableProxy()->is_this()) { 2826 temp_scope_->AddProperty(); 2827 } 2828 2829 return NEW(Assignment(op, expression, right, pos)); 2830 } 2831 2832 2833 // Precedence = 3 2834 Expression* Parser::ParseConditionalExpression(bool accept_IN, bool* ok) { 2835 // ConditionalExpression :: 2836 // LogicalOrExpression 2837 // LogicalOrExpression '?' AssignmentExpression ':' AssignmentExpression 2838 2839 // We start using the binary expression parser for prec >= 4 only! 2840 Expression* expression = ParseBinaryExpression(4, accept_IN, CHECK_OK); 2841 if (peek() != Token::CONDITIONAL) return expression; 2842 Consume(Token::CONDITIONAL); 2843 // In parsing the first assignment expression in conditional 2844 // expressions we always accept the 'in' keyword; see ECMA-262, 2845 // section 11.12, page 58. 2846 Expression* left = ParseAssignmentExpression(true, CHECK_OK); 2847 Expect(Token::COLON, CHECK_OK); 2848 Expression* right = ParseAssignmentExpression(accept_IN, CHECK_OK); 2849 return NEW(Conditional(expression, left, right)); 2850 } 2851 2852 2853 static int Precedence(Token::Value tok, bool accept_IN) { 2854 if (tok == Token::IN && !accept_IN) 2855 return 0; // 0 precedence will terminate binary expression parsing 2856 2857 return Token::Precedence(tok); 2858 } 2859 2860 2861 // Precedence >= 4 2862 Expression* Parser::ParseBinaryExpression(int prec, bool accept_IN, bool* ok) { 2863 ASSERT(prec >= 4); 2864 Expression* x = ParseUnaryExpression(CHECK_OK); 2865 for (int prec1 = Precedence(peek(), accept_IN); prec1 >= prec; prec1--) { 2866 // prec1 >= 4 2867 while (Precedence(peek(), accept_IN) == prec1) { 2868 Token::Value op = Next(); 2869 Expression* y = ParseBinaryExpression(prec1 + 1, accept_IN, CHECK_OK); 2870 2871 // Compute some expressions involving only number literals. 2872 if (x && x->AsLiteral() && x->AsLiteral()->handle()->IsNumber() && 2873 y && y->AsLiteral() && y->AsLiteral()->handle()->IsNumber()) { 2874 double x_val = x->AsLiteral()->handle()->Number(); 2875 double y_val = y->AsLiteral()->handle()->Number(); 2876 2877 switch (op) { 2878 case Token::ADD: 2879 x = NewNumberLiteral(x_val + y_val); 2880 continue; 2881 case Token::SUB: 2882 x = NewNumberLiteral(x_val - y_val); 2883 continue; 2884 case Token::MUL: 2885 x = NewNumberLiteral(x_val * y_val); 2886 continue; 2887 case Token::DIV: 2888 x = NewNumberLiteral(x_val / y_val); 2889 continue; 2890 case Token::BIT_OR: 2891 x = NewNumberLiteral(DoubleToInt32(x_val) | DoubleToInt32(y_val)); 2892 continue; 2893 case Token::BIT_AND: 2894 x = NewNumberLiteral(DoubleToInt32(x_val) & DoubleToInt32(y_val)); 2895 continue; 2896 case Token::BIT_XOR: 2897 x = NewNumberLiteral(DoubleToInt32(x_val) ^ DoubleToInt32(y_val)); 2898 continue; 2899 case Token::SHL: { 2900 int value = DoubleToInt32(x_val) << (DoubleToInt32(y_val) & 0x1f); 2901 x = NewNumberLiteral(value); 2902 continue; 2903 } 2904 case Token::SHR: { 2905 uint32_t shift = DoubleToInt32(y_val) & 0x1f; 2906 uint32_t value = DoubleToUint32(x_val) >> shift; 2907 x = NewNumberLiteral(value); 2908 continue; 2909 } 2910 case Token::SAR: { 2911 uint32_t shift = DoubleToInt32(y_val) & 0x1f; 2912 int value = ArithmeticShiftRight(DoubleToInt32(x_val), shift); 2913 x = NewNumberLiteral(value); 2914 continue; 2915 } 2916 default: 2917 break; 2918 } 2919 } 2920 2921 // Convert constant divisions to multiplications for speed. 2922 if (op == Token::DIV && 2923 y && y->AsLiteral() && y->AsLiteral()->handle()->IsNumber()) { 2924 double y_val = y->AsLiteral()->handle()->Number(); 2925 int64_t y_int = static_cast<int64_t>(y_val); 2926 // There are rounding issues with this optimization, but they don't 2927 // apply if the number to be divided with has a reciprocal that can be 2928 // precisely represented as a floating point number. This is the case 2929 // if the number is an integer power of 2. Negative integer powers of 2930 // 2 work too, but for -2, -1, 1 and 2 we don't do the strength 2931 // reduction because the inlined optimistic idiv has a reasonable 2932 // chance of succeeding by producing a Smi answer with no remainder. 2933 if (static_cast<double>(y_int) == y_val && 2934 (IsPowerOf2(y_int) || IsPowerOf2(-y_int)) && 2935 (y_int > 2 || y_int < -2)) { 2936 y = NewNumberLiteral(1 / y_val); 2937 op = Token::MUL; 2938 } 2939 } 2940 2941 // For now we distinguish between comparisons and other binary 2942 // operations. (We could combine the two and get rid of this 2943 // code an AST node eventually.) 2944 if (Token::IsCompareOp(op)) { 2945 // We have a comparison. 2946 Token::Value cmp = op; 2947 switch (op) { 2948 case Token::NE: cmp = Token::EQ; break; 2949 case Token::NE_STRICT: cmp = Token::EQ_STRICT; break; 2950 default: break; 2951 } 2952 x = NEW(CompareOperation(cmp, x, y)); 2953 if (cmp != op) { 2954 // The comparison was negated - add a NOT. 2955 x = NEW(UnaryOperation(Token::NOT, x)); 2956 } 2957 2958 } else { 2959 // We have a "normal" binary operation. 2960 x = NEW(BinaryOperation(op, x, y)); 2961 } 2962 } 2963 } 2964 return x; 2965 } 2966 2967 2968 Expression* Parser::ParseUnaryExpression(bool* ok) { 2969 // UnaryExpression :: 2970 // PostfixExpression 2971 // 'delete' UnaryExpression 2972 // 'void' UnaryExpression 2973 // 'typeof' UnaryExpression 2974 // '++' UnaryExpression 2975 // '--' UnaryExpression 2976 // '+' UnaryExpression 2977 // '-' UnaryExpression 2978 // '~' UnaryExpression 2979 // '!' UnaryExpression 2980 2981 Token::Value op = peek(); 2982 if (Token::IsUnaryOp(op)) { 2983 op = Next(); 2984 Expression* expression = ParseUnaryExpression(CHECK_OK); 2985 2986 // Compute some expressions involving only number literals. 2987 if (expression != NULL && expression->AsLiteral() && 2988 expression->AsLiteral()->handle()->IsNumber()) { 2989 double value = expression->AsLiteral()->handle()->Number(); 2990 switch (op) { 2991 case Token::ADD: 2992 return expression; 2993 case Token::SUB: 2994 return NewNumberLiteral(-value); 2995 case Token::BIT_NOT: 2996 return NewNumberLiteral(~DoubleToInt32(value)); 2997 default: break; 2998 } 2999 } 3000 3001 return NEW(UnaryOperation(op, expression)); 3002 3003 } else if (Token::IsCountOp(op)) { 3004 op = Next(); 3005 Expression* expression = ParseUnaryExpression(CHECK_OK); 3006 // Signal a reference error if the expression is an invalid 3007 // left-hand side expression. We could report this as a syntax 3008 // error here but for compatibility with JSC we choose to report the 3009 // error at runtime. 3010 if (expression == NULL || !expression->IsValidLeftHandSide()) { 3011 Handle<String> type = Factory::invalid_lhs_in_prefix_op_symbol(); 3012 expression = NewThrowReferenceError(type); 3013 } 3014 return NEW(CountOperation(true /* prefix */, op, expression)); 3015 3016 } else { 3017 return ParsePostfixExpression(ok); 3018 } 3019 } 3020 3021 3022 Expression* Parser::ParsePostfixExpression(bool* ok) { 3023 // PostfixExpression :: 3024 // LeftHandSideExpression ('++' | '--')? 3025 3026 Expression* expression = ParseLeftHandSideExpression(CHECK_OK); 3027 if (!scanner_.has_line_terminator_before_next() && Token::IsCountOp(peek())) { 3028 // Signal a reference error if the expression is an invalid 3029 // left-hand side expression. We could report this as a syntax 3030 // error here but for compatibility with JSC we choose to report the 3031 // error at runtime. 3032 if (expression == NULL || !expression->IsValidLeftHandSide()) { 3033 Handle<String> type = Factory::invalid_lhs_in_postfix_op_symbol(); 3034 expression = NewThrowReferenceError(type); 3035 } 3036 Token::Value next = Next(); 3037 expression = NEW(CountOperation(false /* postfix */, next, expression)); 3038 } 3039 return expression; 3040 } 3041 3042 3043 Expression* Parser::ParseLeftHandSideExpression(bool* ok) { 3044 // LeftHandSideExpression :: 3045 // (NewExpression | MemberExpression) ... 3046 3047 Expression* result; 3048 if (peek() == Token::NEW) { 3049 result = ParseNewExpression(CHECK_OK); 3050 } else { 3051 result = ParseMemberExpression(CHECK_OK); 3052 } 3053 3054 while (true) { 3055 switch (peek()) { 3056 case Token::LBRACK: { 3057 Consume(Token::LBRACK); 3058 int pos = scanner().location().beg_pos; 3059 Expression* index = ParseExpression(true, CHECK_OK); 3060 result = factory()->NewProperty(result, index, pos); 3061 Expect(Token::RBRACK, CHECK_OK); 3062 break; 3063 } 3064 3065 case Token::LPAREN: { 3066 int pos = scanner().location().beg_pos; 3067 ZoneList<Expression*>* args = ParseArguments(CHECK_OK); 3068 3069 // Keep track of eval() calls since they disable all local variable 3070 // optimizations. 3071 // The calls that need special treatment are the 3072 // direct (i.e. not aliased) eval calls. These calls are all of the 3073 // form eval(...) with no explicit receiver object where eval is not 3074 // declared in the current scope chain. These calls are marked as 3075 // potentially direct eval calls. Whether they are actually direct calls 3076 // to eval is determined at run time. 3077 if (!is_pre_parsing_) { 3078 VariableProxy* callee = result->AsVariableProxy(); 3079 if (callee != NULL && callee->IsVariable(Factory::eval_symbol())) { 3080 Handle<String> name = callee->name(); 3081 Variable* var = top_scope_->Lookup(name); 3082 if (var == NULL) { 3083 top_scope_->RecordEvalCall(); 3084 } 3085 } 3086 } 3087 result = factory()->NewCall(result, args, pos); 3088 break; 3089 } 3090 3091 case Token::PERIOD: { 3092 Consume(Token::PERIOD); 3093 int pos = scanner().location().beg_pos; 3094 Handle<String> name = ParseIdentifier(CHECK_OK); 3095 result = factory()->NewProperty(result, NEW(Literal(name)), pos); 3096 break; 3097 } 3098 3099 default: 3100 return result; 3101 } 3102 } 3103 } 3104 3105 3106 3107 Expression* Parser::ParseNewPrefix(PositionStack* stack, bool* ok) { 3108 // NewExpression :: 3109 // ('new')+ MemberExpression 3110 3111 // The grammar for new expressions is pretty warped. The keyword 3112 // 'new' can either be a part of the new expression (where it isn't 3113 // followed by an argument list) or a part of the member expression, 3114 // where it must be followed by an argument list. To accommodate 3115 // this, we parse the 'new' keywords greedily and keep track of how 3116 // many we have parsed. This information is then passed on to the 3117 // member expression parser, which is only allowed to match argument 3118 // lists as long as it has 'new' prefixes left 3119 Expect(Token::NEW, CHECK_OK); 3120 PositionStack::Element pos(stack, scanner().location().beg_pos); 3121 3122 Expression* result; 3123 if (peek() == Token::NEW) { 3124 result = ParseNewPrefix(stack, CHECK_OK); 3125 } else { 3126 result = ParseMemberWithNewPrefixesExpression(stack, CHECK_OK); 3127 } 3128 3129 if (!stack->is_empty()) { 3130 int last = stack->pop(); 3131 result = NEW(CallNew(result, new ZoneList<Expression*>(0), last)); 3132 } 3133 return result; 3134 } 3135 3136 3137 Expression* Parser::ParseNewExpression(bool* ok) { 3138 PositionStack stack(ok); 3139 return ParseNewPrefix(&stack, ok); 3140 } 3141 3142 3143 Expression* Parser::ParseMemberExpression(bool* ok) { 3144 return ParseMemberWithNewPrefixesExpression(NULL, ok); 3145 } 3146 3147 3148 Expression* Parser::ParseMemberWithNewPrefixesExpression(PositionStack* stack, 3149 bool* ok) { 3150 // MemberExpression :: 3151 // (PrimaryExpression | FunctionLiteral) 3152 // ('[' Expression ']' | '.' Identifier | Arguments)* 3153 3154 // Parse the initial primary or function expression. 3155 Expression* result = NULL; 3156 if (peek() == Token::FUNCTION) { 3157 Expect(Token::FUNCTION, CHECK_OK); 3158 int function_token_position = scanner().location().beg_pos; 3159 Handle<String> name; 3160 if (peek() == Token::IDENTIFIER) name = ParseIdentifier(CHECK_OK); 3161 result = ParseFunctionLiteral(name, function_token_position, 3162 NESTED, CHECK_OK); 3163 } else { 3164 result = ParsePrimaryExpression(CHECK_OK); 3165 } 3166 3167 while (true) { 3168 switch (peek()) { 3169 case Token::LBRACK: { 3170 Consume(Token::LBRACK); 3171 int pos = scanner().location().beg_pos; 3172 Expression* index = ParseExpression(true, CHECK_OK); 3173 result = factory()->NewProperty(result, index, pos); 3174 Expect(Token::RBRACK, CHECK_OK); 3175 break; 3176 } 3177 case Token::PERIOD: { 3178 Consume(Token::PERIOD); 3179 int pos = scanner().location().beg_pos; 3180 Handle<String> name = ParseIdentifier(CHECK_OK); 3181 result = factory()->NewProperty(result, NEW(Literal(name)), pos); 3182 break; 3183 } 3184 case Token::LPAREN: { 3185 if ((stack == NULL) || stack->is_empty()) return result; 3186 // Consume one of the new prefixes (already parsed). 3187 ZoneList<Expression*>* args = ParseArguments(CHECK_OK); 3188 int last = stack->pop(); 3189 result = NEW(CallNew(result, args, last)); 3190 break; 3191 } 3192 default: 3193 return result; 3194 } 3195 } 3196 } 3197 3198 3199 DebuggerStatement* Parser::ParseDebuggerStatement(bool* ok) { 3200 // In ECMA-262 'debugger' is defined as a reserved keyword. In some browser 3201 // contexts this is used as a statement which invokes the debugger as i a 3202 // break point is present. 3203 // DebuggerStatement :: 3204 // 'debugger' ';' 3205 3206 Expect(Token::DEBUGGER, CHECK_OK); 3207 ExpectSemicolon(CHECK_OK); 3208 return NEW(DebuggerStatement()); 3209 } 3210 3211 3212 void Parser::ReportUnexpectedToken(Token::Value token) { 3213 // We don't report stack overflows here, to avoid increasing the 3214 // stack depth even further. Instead we report it after parsing is 3215 // over, in ParseProgram/ParseJson. 3216 if (token == Token::ILLEGAL && scanner().stack_overflow()) 3217 return; 3218 // Four of the tokens are treated specially 3219 switch (token) { 3220 case Token::EOS: 3221 return ReportMessage("unexpected_eos", Vector<const char*>::empty()); 3222 case Token::NUMBER: 3223 return ReportMessage("unexpected_token_number", 3224 Vector<const char*>::empty()); 3225 case Token::STRING: 3226 return ReportMessage("unexpected_token_string", 3227 Vector<const char*>::empty()); 3228 case Token::IDENTIFIER: 3229 return ReportMessage("unexpected_token_identifier", 3230 Vector<const char*>::empty()); 3231 default: 3232 const char* name = Token::String(token); 3233 ASSERT(name != NULL); 3234 ReportMessage("unexpected_token", Vector<const char*>(&name, 1)); 3235 } 3236 } 3237 3238 3239 Expression* Parser::ParsePrimaryExpression(bool* ok) { 3240 // PrimaryExpression :: 3241 // 'this' 3242 // 'null' 3243 // 'true' 3244 // 'false' 3245 // Identifier 3246 // Number 3247 // String 3248 // ArrayLiteral 3249 // ObjectLiteral 3250 // RegExpLiteral 3251 // '(' Expression ')' 3252 3253 Expression* result = NULL; 3254 switch (peek()) { 3255 case Token::THIS: { 3256 Consume(Token::THIS); 3257 if (is_pre_parsing_) { 3258 result = VariableProxySentinel::this_proxy(); 3259 } else { 3260 VariableProxy* recv = top_scope_->receiver(); 3261 recv->var_uses()->RecordRead(1); 3262 result = recv; 3263 } 3264 break; 3265 } 3266 3267 case Token::NULL_LITERAL: 3268 Consume(Token::NULL_LITERAL); 3269 result = NEW(Literal(Factory::null_value())); 3270 break; 3271 3272 case Token::TRUE_LITERAL: 3273 Consume(Token::TRUE_LITERAL); 3274 result = NEW(Literal(Factory::true_value())); 3275 break; 3276 3277 case Token::FALSE_LITERAL: 3278 Consume(Token::FALSE_LITERAL); 3279 result = NEW(Literal(Factory::false_value())); 3280 break; 3281 3282 case Token::IDENTIFIER: { 3283 Handle<String> name = ParseIdentifier(CHECK_OK); 3284 if (is_pre_parsing_) { 3285 result = VariableProxySentinel::identifier_proxy(); 3286 } else { 3287 result = top_scope_->NewUnresolved(name, inside_with()); 3288 } 3289 break; 3290 } 3291 3292 case Token::NUMBER: { 3293 Consume(Token::NUMBER); 3294 double value = 3295 StringToDouble(scanner_.literal_string(), ALLOW_HEX | ALLOW_OCTALS); 3296 result = NewNumberLiteral(value); 3297 break; 3298 } 3299 3300 case Token::STRING: { 3301 Consume(Token::STRING); 3302 Handle<String> symbol = 3303 factory()->LookupSymbol(scanner_.literal_string(), 3304 scanner_.literal_length()); 3305 result = NEW(Literal(symbol)); 3306 break; 3307 } 3308 3309 case Token::ASSIGN_DIV: 3310 result = ParseRegExpLiteral(true, CHECK_OK); 3311 break; 3312 3313 case Token::DIV: 3314 result = ParseRegExpLiteral(false, CHECK_OK); 3315 break; 3316 3317 case Token::LBRACK: 3318 result = ParseArrayLiteral(CHECK_OK); 3319 break; 3320 3321 case Token::LBRACE: 3322 result = ParseObjectLiteral(CHECK_OK); 3323 break; 3324 3325 case Token::LPAREN: 3326 Consume(Token::LPAREN); 3327 result = ParseExpression(true, CHECK_OK); 3328 Expect(Token::RPAREN, CHECK_OK); 3329 break; 3330 3331 case Token::MOD: 3332 if (allow_natives_syntax_ || extension_ != NULL) { 3333 result = ParseV8Intrinsic(CHECK_OK); 3334 break; 3335 } 3336 // If we're not allowing special syntax we fall-through to the 3337 // default case. 3338 3339 default: { 3340 Token::Value tok = peek(); 3341 // Token::Peek returns the value of the next token but 3342 // location() gives info about the current token. 3343 // Therefore, we need to read ahead to the next token 3344 Next(); 3345 ReportUnexpectedToken(tok); 3346 *ok = false; 3347 return NULL; 3348 } 3349 } 3350 3351 return result; 3352 } 3353 3354 3355 void Parser::BuildArrayLiteralBoilerplateLiterals(ZoneList<Expression*>* values, 3356 Handle<FixedArray> literals, 3357 bool* is_simple, 3358 int* depth) { 3359 // Fill in the literals. 3360 // Accumulate output values in local variables. 3361 bool is_simple_acc = true; 3362 int depth_acc = 1; 3363 for (int i = 0; i < values->length(); i++) { 3364 MaterializedLiteral* m_literal = values->at(i)->AsMaterializedLiteral(); 3365 if (m_literal != NULL && m_literal->depth() >= depth_acc) { 3366 depth_acc = m_literal->depth() + 1; 3367 } 3368 Handle<Object> boilerplate_value = GetBoilerplateValue(values->at(i)); 3369 if (boilerplate_value->IsUndefined()) { 3370 literals->set_the_hole(i); 3371 is_simple_acc = false; 3372 } else { 3373 literals->set(i, *boilerplate_value); 3374 } 3375 } 3376 3377 *is_simple = is_simple_acc; 3378 *depth = depth_acc; 3379 } 3380 3381 3382 Expression* Parser::ParseArrayLiteral(bool* ok) { 3383 // ArrayLiteral :: 3384 // '[' Expression? (',' Expression?)* ']' 3385 3386 ZoneListWrapper<Expression> values = factory()->NewList<Expression>(4); 3387 Expect(Token::LBRACK, CHECK_OK); 3388 while (peek() != Token::RBRACK) { 3389 Expression* elem; 3390 if (peek() == Token::COMMA) { 3391 elem = GetLiteralTheHole(); 3392 } else { 3393 elem = ParseAssignmentExpression(true, CHECK_OK); 3394 } 3395 values.Add(elem); 3396 if (peek() != Token::RBRACK) { 3397 Expect(Token::COMMA, CHECK_OK); 3398 } 3399 } 3400 Expect(Token::RBRACK, CHECK_OK); 3401 3402 // Update the scope information before the pre-parsing bailout. 3403 int literal_index = temp_scope_->NextMaterializedLiteralIndex(); 3404 3405 if (is_pre_parsing_) return NULL; 3406 3407 // Allocate a fixed array with all the literals. 3408 Handle<FixedArray> literals = 3409 Factory::NewFixedArray(values.length(), TENURED); 3410 3411 // Fill in the literals. 3412 bool is_simple = true; 3413 int depth = 1; 3414 for (int i = 0; i < values.length(); i++) { 3415 MaterializedLiteral* m_literal = values.at(i)->AsMaterializedLiteral(); 3416 if (m_literal != NULL && m_literal->depth() + 1 > depth) { 3417 depth = m_literal->depth() + 1; 3418 } 3419 Handle<Object> boilerplate_value = GetBoilerplateValue(values.at(i)); 3420 if (boilerplate_value->IsUndefined()) { 3421 literals->set_the_hole(i); 3422 is_simple = false; 3423 } else { 3424 literals->set(i, *boilerplate_value); 3425 } 3426 } 3427 3428 return NEW(ArrayLiteral(literals, values.elements(), 3429 literal_index, is_simple, depth)); 3430 } 3431 3432 3433 bool Parser::IsBoilerplateProperty(ObjectLiteral::Property* property) { 3434 return property != NULL && 3435 property->kind() != ObjectLiteral::Property::PROTOTYPE; 3436 } 3437 3438 3439 bool CompileTimeValue::IsCompileTimeValue(Expression* expression) { 3440 MaterializedLiteral* lit = expression->AsMaterializedLiteral(); 3441 return lit != NULL && lit->is_simple(); 3442 } 3443 3444 Handle<FixedArray> CompileTimeValue::GetValue(Expression* expression) { 3445 ASSERT(IsCompileTimeValue(expression)); 3446 Handle<FixedArray> result = Factory::NewFixedArray(2, TENURED); 3447 ObjectLiteral* object_literal = expression->AsObjectLiteral(); 3448 if (object_literal != NULL) { 3449 ASSERT(object_literal->is_simple()); 3450 result->set(kTypeSlot, Smi::FromInt(OBJECT_LITERAL)); 3451 result->set(kElementsSlot, *object_literal->constant_properties()); 3452 } else { 3453 ArrayLiteral* array_literal = expression->AsArrayLiteral(); 3454 ASSERT(array_literal != NULL && array_literal->is_simple()); 3455 result->set(kTypeSlot, Smi::FromInt(ARRAY_LITERAL)); 3456 result->set(kElementsSlot, *array_literal->constant_elements()); 3457 } 3458 return result; 3459 } 3460 3461 3462 CompileTimeValue::Type CompileTimeValue::GetType(Handle<FixedArray> value) { 3463 Smi* type_value = Smi::cast(value->get(kTypeSlot)); 3464 return static_cast<Type>(type_value->value()); 3465 } 3466 3467 3468 Handle<FixedArray> CompileTimeValue::GetElements(Handle<FixedArray> value) { 3469 return Handle<FixedArray>(FixedArray::cast(value->get(kElementsSlot))); 3470 } 3471 3472 3473 Handle<Object> Parser::GetBoilerplateValue(Expression* expression) { 3474 if (expression->AsLiteral() != NULL) { 3475 return expression->AsLiteral()->handle(); 3476 } 3477 if (CompileTimeValue::IsCompileTimeValue(expression)) { 3478 return CompileTimeValue::GetValue(expression); 3479 } 3480 return Factory::undefined_value(); 3481 } 3482 3483 3484 void Parser::BuildObjectLiteralConstantProperties( 3485 ZoneList<ObjectLiteral::Property*>* properties, 3486 Handle<FixedArray> constant_properties, 3487 bool* is_simple, 3488 int* depth) { 3489 int position = 0; 3490 // Accumulate the value in local variables and store it at the end. 3491 bool is_simple_acc = true; 3492 int depth_acc = 1; 3493 for (int i = 0; i < properties->length(); i++) { 3494 ObjectLiteral::Property* property = properties->at(i); 3495 if (!IsBoilerplateProperty(property)) { 3496 is_simple_acc = false; 3497 continue; 3498 } 3499 MaterializedLiteral* m_literal = property->value()->AsMaterializedLiteral(); 3500 if (m_literal != NULL && m_literal->depth() >= depth_acc) { 3501 depth_acc = m_literal->depth() + 1; 3502 } 3503 3504 // Add CONSTANT and COMPUTED properties to boilerplate. Use undefined 3505 // value for COMPUTED properties, the real value is filled in at 3506 // runtime. The enumeration order is maintained. 3507 Handle<Object> key = property->key()->handle(); 3508 Handle<Object> value = GetBoilerplateValue(property->value()); 3509 is_simple_acc = is_simple_acc && !value->IsUndefined(); 3510 3511 // Add name, value pair to the fixed array. 3512 constant_properties->set(position++, *key); 3513 constant_properties->set(position++, *value); 3514 } 3515 3516 *is_simple = is_simple_acc; 3517 *depth = depth_acc; 3518 } 3519 3520 3521 Expression* Parser::ParseObjectLiteral(bool* ok) { 3522 // ObjectLiteral :: 3523 // '{' ( 3524 // ((Identifier | String | Number) ':' AssignmentExpression) 3525 // | (('get' | 'set') FunctionLiteral) 3526 // )*[','] '}' 3527 3528 ZoneListWrapper<ObjectLiteral::Property> properties = 3529 factory()->NewList<ObjectLiteral::Property>(4); 3530 int number_of_boilerplate_properties = 0; 3531 3532 Expect(Token::LBRACE, CHECK_OK); 3533 while (peek() != Token::RBRACE) { 3534 Literal* key = NULL; 3535 switch (peek()) { 3536 case Token::IDENTIFIER: { 3537 // Store identifier keys as literal symbols to avoid 3538 // resolving them when compiling code for the object 3539 // literal. 3540 bool is_getter = false; 3541 bool is_setter = false; 3542 Handle<String> id = 3543 ParseIdentifierOrGetOrSet(&is_getter, &is_setter, CHECK_OK); 3544 if (is_getter || is_setter) { 3545 // Special handling of getter and setter syntax. 3546 if (peek() == Token::IDENTIFIER) { 3547 Handle<String> name = ParseIdentifier(CHECK_OK); 3548 FunctionLiteral* value = 3549 ParseFunctionLiteral(name, RelocInfo::kNoPosition, 3550 DECLARATION, CHECK_OK); 3551 ObjectLiteral::Property* property = 3552 NEW(ObjectLiteral::Property(is_getter, value)); 3553 if (IsBoilerplateProperty(property)) 3554 number_of_boilerplate_properties++; 3555 properties.Add(property); 3556 if (peek() != Token::RBRACE) Expect(Token::COMMA, CHECK_OK); 3557 continue; // restart the while 3558 } 3559 } 3560 key = NEW(Literal(id)); 3561 break; 3562 } 3563 3564 case Token::STRING: { 3565 Consume(Token::STRING); 3566 Handle<String> string = 3567 factory()->LookupSymbol(scanner_.literal_string(), 3568 scanner_.literal_length()); 3569 uint32_t index; 3570 if (!string.is_null() && string->AsArrayIndex(&index)) { 3571 key = NewNumberLiteral(index); 3572 } else { 3573 key = NEW(Literal(string)); 3574 } 3575 break; 3576 } 3577 3578 case Token::NUMBER: { 3579 Consume(Token::NUMBER); 3580 double value = 3581 StringToDouble(scanner_.literal_string(), ALLOW_HEX | ALLOW_OCTALS); 3582 key = NewNumberLiteral(value); 3583 break; 3584 } 3585 3586 default: 3587 Expect(Token::RBRACE, CHECK_OK); 3588 break; 3589 } 3590 3591 Expect(Token::COLON, CHECK_OK); 3592 Expression* value = ParseAssignmentExpression(true, CHECK_OK); 3593 3594 ObjectLiteral::Property* property = 3595 NEW(ObjectLiteral::Property(key, value)); 3596 3597 // Count CONSTANT or COMPUTED properties to maintain the enumeration order. 3598 if (IsBoilerplateProperty(property)) number_of_boilerplate_properties++; 3599 properties.Add(property); 3600 3601 // TODO(1240767): Consider allowing trailing comma. 3602 if (peek() != Token::RBRACE) Expect(Token::COMMA, CHECK_OK); 3603 } 3604 Expect(Token::RBRACE, CHECK_OK); 3605 // Computation of literal_index must happen before pre parse bailout. 3606 int literal_index = temp_scope_->NextMaterializedLiteralIndex(); 3607 if (is_pre_parsing_) return NULL; 3608 3609 Handle<FixedArray> constant_properties = 3610 Factory::NewFixedArray(number_of_boilerplate_properties * 2, TENURED); 3611 3612 bool is_simple = true; 3613 int depth = 1; 3614 BuildObjectLiteralConstantProperties(properties.elements(), 3615 constant_properties, 3616 &is_simple, 3617 &depth); 3618 return new ObjectLiteral(constant_properties, 3619 properties.elements(), 3620 literal_index, 3621 is_simple, 3622 depth); 3623 } 3624 3625 3626 Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) { 3627 if (!scanner_.ScanRegExpPattern(seen_equal)) { 3628 Next(); 3629 ReportMessage("unterminated_regexp", Vector<const char*>::empty()); 3630 *ok = false; 3631 return NULL; 3632 } 3633 3634 int literal_index = temp_scope_->NextMaterializedLiteralIndex(); 3635 3636 if (is_pre_parsing_) { 3637 // If we're preparsing we just do all the parsing stuff without 3638 // building anything. 3639 if (!scanner_.ScanRegExpFlags()) { 3640 Next(); 3641 ReportMessage("invalid_regexp_flags", Vector<const char*>::empty()); 3642 *ok = false; 3643 return NULL; 3644 } 3645 Next(); 3646 return NULL; 3647 } 3648 3649 Handle<String> js_pattern = 3650 Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED); 3651 scanner_.ScanRegExpFlags(); 3652 Handle<String> js_flags = 3653 Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED); 3654 Next(); 3655 3656 return new RegExpLiteral(js_pattern, js_flags, literal_index); 3657 } 3658 3659 3660 ZoneList<Expression*>* Parser::ParseArguments(bool* ok) { 3661 // Arguments :: 3662 // '(' (AssignmentExpression)*[','] ')' 3663 3664 ZoneListWrapper<Expression> result = factory()->NewList<Expression>(4); 3665 Expect(Token::LPAREN, CHECK_OK); 3666 bool done = (peek() == Token::RPAREN); 3667 while (!done) { 3668 Expression* argument = ParseAssignmentExpression(true, CHECK_OK); 3669 result.Add(argument); 3670 done = (peek() == Token::RPAREN); 3671 if (!done) Expect(Token::COMMA, CHECK_OK); 3672 } 3673 Expect(Token::RPAREN, CHECK_OK); 3674 return result.elements(); 3675 } 3676 3677 3678 FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name, 3679 int function_token_position, 3680 FunctionLiteralType type, 3681 bool* ok) { 3682 // Function :: 3683 // '(' FormalParameterList? ')' '{' FunctionBody '}' 3684 3685 bool is_named = !var_name.is_null(); 3686 3687 // The name associated with this function. If it's a function expression, 3688 // this is the actual function name, otherwise this is the name of the 3689 // variable declared and initialized with the function (expression). In 3690 // that case, we don't have a function name (it's empty). 3691 Handle<String> name = is_named ? var_name : factory()->EmptySymbol(); 3692 // The function name, if any. 3693 Handle<String> function_name = factory()->EmptySymbol(); 3694 if (is_named && (type == EXPRESSION || type == NESTED)) { 3695 function_name = name; 3696 } 3697 3698 int num_parameters = 0; 3699 // Parse function body. 3700 { Scope::Type type = Scope::FUNCTION_SCOPE; 3701 Scope* scope = factory()->NewScope(top_scope_, type, inside_with()); 3702 LexicalScope lexical_scope(this, scope); 3703 TemporaryScope temp_scope(this); 3704 top_scope_->SetScopeName(name); 3705 3706 // FormalParameterList :: 3707 // '(' (Identifier)*[','] ')' 3708 Expect(Token::LPAREN, CHECK_OK); 3709 int start_pos = scanner_.location().beg_pos; 3710 bool done = (peek() == Token::RPAREN); 3711 while (!done) { 3712 Handle<String> param_name = ParseIdentifier(CHECK_OK); 3713 if (!is_pre_parsing_) { 3714 top_scope_->AddParameter(top_scope_->DeclareLocal(param_name, 3715 Variable::VAR)); 3716 num_parameters++; 3717 } 3718 done = (peek() == Token::RPAREN); 3719 if (!done) Expect(Token::COMMA, CHECK_OK); 3720 } 3721 Expect(Token::RPAREN, CHECK_OK); 3722 3723 Expect(Token::LBRACE, CHECK_OK); 3724 ZoneListWrapper<Statement> body = factory()->NewList<Statement>(8); 3725 3726 // If we have a named function expression, we add a local variable 3727 // declaration to the body of the function with the name of the 3728 // function and let it refer to the function itself (closure). 3729 // NOTE: We create a proxy and resolve it here so that in the 3730 // future we can change the AST to only refer to VariableProxies 3731 // instead of Variables and Proxis as is the case now. 3732 if (!function_name.is_null() && function_name->length() > 0) { 3733 Variable* fvar = top_scope_->DeclareFunctionVar(function_name); 3734 VariableProxy* fproxy = 3735 top_scope_->NewUnresolved(function_name, inside_with()); 3736 fproxy->BindTo(fvar); 3737 body.Add(new ExpressionStatement( 3738 new Assignment(Token::INIT_CONST, fproxy, 3739 NEW(ThisFunction()), 3740 RelocInfo::kNoPosition))); 3741 } 3742 3743 // Determine if the function will be lazily compiled. The mode can 3744 // only be PARSE_LAZILY if the --lazy flag is true. 3745 bool is_lazily_compiled = 3746 mode() == PARSE_LAZILY && top_scope_->HasTrivialOuterContext(); 3747 3748 int materialized_literal_count; 3749 int expected_property_count; 3750 bool only_simple_this_property_assignments; 3751 Handle<FixedArray> this_property_assignments; 3752 if (is_lazily_compiled && pre_data() != NULL) { 3753 FunctionEntry entry = pre_data()->GetFunctionEnd(start_pos); 3754 int end_pos = entry.end_pos(); 3755 Counters::total_preparse_skipped.Increment(end_pos - start_pos); 3756 scanner_.SeekForward(end_pos); 3757 materialized_literal_count = entry.literal_count(); 3758 expected_property_count = entry.property_count(); 3759 only_simple_this_property_assignments = false; 3760 this_property_assignments = Factory::empty_fixed_array(); 3761 } else { 3762 ParseSourceElements(&body, Token::RBRACE, CHECK_OK); 3763 materialized_literal_count = temp_scope.materialized_literal_count(); 3764 expected_property_count = temp_scope.expected_property_count(); 3765 only_simple_this_property_assignments = 3766 temp_scope.only_simple_this_property_assignments(); 3767 this_property_assignments = temp_scope.this_property_assignments(); 3768 } 3769 3770 Expect(Token::RBRACE, CHECK_OK); 3771 int end_pos = scanner_.location().end_pos; 3772 3773 FunctionEntry entry = log()->LogFunction(start_pos); 3774 if (entry.is_valid()) { 3775 entry.set_end_pos(end_pos); 3776 entry.set_literal_count(materialized_literal_count); 3777 entry.set_property_count(expected_property_count); 3778 } 3779 3780 FunctionLiteral* function_literal = 3781 NEW(FunctionLiteral(name, 3782 top_scope_, 3783 body.elements(), 3784 materialized_literal_count, 3785 expected_property_count, 3786 only_simple_this_property_assignments, 3787 this_property_assignments, 3788 num_parameters, 3789 start_pos, 3790 end_pos, 3791 function_name->length() > 0)); 3792 if (!is_pre_parsing_) { 3793 function_literal->set_function_token_position(function_token_position); 3794 } 3795 return function_literal; 3796 } 3797 } 3798 3799 3800 Expression* Parser::ParseV8Intrinsic(bool* ok) { 3801 // CallRuntime :: 3802 // '%' Identifier Arguments 3803 3804 Expect(Token::MOD, CHECK_OK); 3805 Handle<String> name = ParseIdentifier(CHECK_OK); 3806 Runtime::Function* function = 3807 Runtime::FunctionForName(scanner_.literal_string()); 3808 ZoneList<Expression*>* args = ParseArguments(CHECK_OK); 3809 if (function == NULL && extension_ != NULL) { 3810 // The extension structures are only accessible while parsing the 3811 // very first time not when reparsing because of lazy compilation. 3812 top_scope_->ForceEagerCompilation(); 3813 } 3814 3815 // Check for built-in macros. 3816 if (!is_pre_parsing_) { 3817 if (function == Runtime::FunctionForId(Runtime::kIS_VAR)) { 3818 // %IS_VAR(x) 3819 // evaluates to x if x is a variable, 3820 // leads to a parse error otherwise 3821 if (args->length() == 1 && args->at(0)->AsVariableProxy() != NULL) { 3822 return args->at(0); 3823 } 3824 *ok = false; 3825 // Check here for other macros. 3826 // } else if (function == Runtime::FunctionForId(Runtime::kIS_VAR)) { 3827 // ... 3828 } 3829 3830 if (!*ok) { 3831 // We found a macro but it failed. 3832 ReportMessage("unable_to_parse", Vector<const char*>::empty()); 3833 return NULL; 3834 } 3835 } 3836 3837 // Otherwise we have a runtime call. 3838 return NEW(CallRuntime(name, function, args)); 3839 } 3840 3841 3842 void Parser::Consume(Token::Value token) { 3843 Token::Value next = Next(); 3844 USE(next); 3845 USE(token); 3846 ASSERT(next == token); 3847 } 3848 3849 3850 void Parser::Expect(Token::Value token, bool* ok) { 3851 Token::Value next = Next(); 3852 if (next == token) return; 3853 ReportUnexpectedToken(next); 3854 *ok = false; 3855 } 3856 3857 3858 bool Parser::Check(Token::Value token) { 3859 Token::Value next = peek(); 3860 if (next == token) { 3861 Consume(next); 3862 return true; 3863 } 3864 return false; 3865 } 3866 3867 3868 void Parser::ExpectSemicolon(bool* ok) { 3869 // Check for automatic semicolon insertion according to 3870 // the rules given in ECMA-262, section 7.9, page 21. 3871 Token::Value tok = peek(); 3872 if (tok == Token::SEMICOLON) { 3873 Next(); 3874 return; 3875 } 3876 if (scanner_.has_line_terminator_before_next() || 3877 tok == Token::RBRACE || 3878 tok == Token::EOS) { 3879 return; 3880 } 3881 Expect(Token::SEMICOLON, ok); 3882 } 3883 3884 3885 Literal* Parser::GetLiteralUndefined() { 3886 return NEW(Literal(Factory::undefined_value())); 3887 } 3888 3889 3890 Literal* Parser::GetLiteralTheHole() { 3891 return NEW(Literal(Factory::the_hole_value())); 3892 } 3893 3894 3895 Literal* Parser::GetLiteralNumber(double value) { 3896 return NewNumberLiteral(value); 3897 } 3898 3899 3900 Handle<String> Parser::ParseIdentifier(bool* ok) { 3901 Expect(Token::IDENTIFIER, ok); 3902 if (!*ok) return Handle<String>(); 3903 return factory()->LookupSymbol(scanner_.literal_string(), 3904 scanner_.literal_length()); 3905 } 3906 3907 // This function reads an identifier and determines whether or not it 3908 // is 'get' or 'set'. The reason for not using ParseIdentifier and 3909 // checking on the output is that this involves heap allocation which 3910 // we can't do during preparsing. 3911 Handle<String> Parser::ParseIdentifierOrGetOrSet(bool* is_get, 3912 bool* is_set, 3913 bool* ok) { 3914 Expect(Token::IDENTIFIER, ok); 3915 if (!*ok) return Handle<String>(); 3916 if (scanner_.literal_length() == 3) { 3917 const char* token = scanner_.literal_string(); 3918 *is_get = strcmp(token, "get") == 0; 3919 *is_set = !*is_get && strcmp(token, "set") == 0; 3920 } 3921 return factory()->LookupSymbol(scanner_.literal_string(), 3922 scanner_.literal_length()); 3923 } 3924 3925 3926 // ---------------------------------------------------------------------------- 3927 // Parser support 3928 3929 3930 bool Parser::TargetStackContainsLabel(Handle<String> label) { 3931 for (Target* t = target_stack_; t != NULL; t = t->previous()) { 3932 BreakableStatement* stat = t->node()->AsBreakableStatement(); 3933 if (stat != NULL && ContainsLabel(stat->labels(), label)) 3934 return true; 3935 } 3936 return false; 3937 } 3938 3939 3940 BreakableStatement* Parser::LookupBreakTarget(Handle<String> label, bool* ok) { 3941 bool anonymous = label.is_null(); 3942 for (Target* t = target_stack_; t != NULL; t = t->previous()) { 3943 BreakableStatement* stat = t->node()->AsBreakableStatement(); 3944 if (stat == NULL) continue; 3945 if ((anonymous && stat->is_target_for_anonymous()) || 3946 (!anonymous && ContainsLabel(stat->labels(), label))) { 3947 RegisterTargetUse(stat->break_target(), t->previous()); 3948 return stat; 3949 } 3950 } 3951 return NULL; 3952 } 3953 3954 3955 IterationStatement* Parser::LookupContinueTarget(Handle<String> label, 3956 bool* ok) { 3957 bool anonymous = label.is_null(); 3958 for (Target* t = target_stack_; t != NULL; t = t->previous()) { 3959 IterationStatement* stat = t->node()->AsIterationStatement(); 3960 if (stat == NULL) continue; 3961 3962 ASSERT(stat->is_target_for_anonymous()); 3963 if (anonymous || ContainsLabel(stat->labels(), label)) { 3964 RegisterTargetUse(stat->continue_target(), t->previous()); 3965 return stat; 3966 } 3967 } 3968 return NULL; 3969 } 3970 3971 3972 void Parser::RegisterTargetUse(BreakTarget* target, Target* stop) { 3973 // Register that a break target found at the given stop in the 3974 // target stack has been used from the top of the target stack. Add 3975 // the break target to any TargetCollectors passed on the stack. 3976 for (Target* t = target_stack_; t != stop; t = t->previous()) { 3977 TargetCollector* collector = t->node()->AsTargetCollector(); 3978 if (collector != NULL) collector->AddTarget(target); 3979 } 3980 } 3981 3982 3983 Literal* Parser::NewNumberLiteral(double number) { 3984 return NEW(Literal(Factory::NewNumber(number, TENURED))); 3985 } 3986 3987 3988 Expression* Parser::NewThrowReferenceError(Handle<String> type) { 3989 return NewThrowError(Factory::MakeReferenceError_symbol(), 3990 type, HandleVector<Object>(NULL, 0)); 3991 } 3992 3993 3994 Expression* Parser::NewThrowSyntaxError(Handle<String> type, 3995 Handle<Object> first) { 3996 int argc = first.is_null() ? 0 : 1; 3997 Vector< Handle<Object> > arguments = HandleVector<Object>(&first, argc); 3998 return NewThrowError(Factory::MakeSyntaxError_symbol(), type, arguments); 3999 } 4000 4001 4002 Expression* Parser::NewThrowTypeError(Handle<String> type, 4003 Handle<Object> first, 4004 Handle<Object> second) { 4005 ASSERT(!first.is_null() && !second.is_null()); 4006 Handle<Object> elements[] = { first, second }; 4007 Vector< Handle<Object> > arguments = 4008 HandleVector<Object>(elements, ARRAY_SIZE(elements)); 4009 return NewThrowError(Factory::MakeTypeError_symbol(), type, arguments); 4010 } 4011 4012 4013 Expression* Parser::NewThrowError(Handle<String> constructor, 4014 Handle<String> type, 4015 Vector< Handle<Object> > arguments) { 4016 if (is_pre_parsing_) return NULL; 4017 4018 int argc = arguments.length(); 4019 Handle<JSArray> array = Factory::NewJSArray(argc, TENURED); 4020 ASSERT(array->IsJSArray() && array->HasFastElements()); 4021 for (int i = 0; i < argc; i++) { 4022 Handle<Object> element = arguments[i]; 4023 if (!element.is_null()) { 4024 array->SetFastElement(i, *element); 4025 } 4026 } 4027 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); 4028 args->Add(new Literal(type)); 4029 args->Add(new Literal(array)); 4030 return new Throw(new CallRuntime(constructor, NULL, args), 4031 scanner().location().beg_pos); 4032 } 4033 4034 // ---------------------------------------------------------------------------- 4035 // JSON 4036 4037 Expression* Parser::ParseJson(bool* ok) { 4038 Expression* result = ParseJsonValue(CHECK_OK); 4039 Expect(Token::EOS, CHECK_OK); 4040 return result; 4041 } 4042 4043 4044 // Parse any JSON value. 4045 Expression* Parser::ParseJsonValue(bool* ok) { 4046 Token::Value token = peek(); 4047 switch (token) { 4048 case Token::STRING: { 4049 Consume(Token::STRING); 4050 int literal_length = scanner_.literal_length(); 4051 const char* literal_string = scanner_.literal_string(); 4052 if (literal_length == 0) { 4053 return NEW(Literal(Factory::empty_string())); 4054 } 4055 Vector<const char> literal(literal_string, literal_length); 4056 return NEW(Literal(Factory::NewStringFromUtf8(literal, TENURED))); 4057 } 4058 case Token::NUMBER: { 4059 Consume(Token::NUMBER); 4060 ASSERT(scanner_.literal_length() > 0); 4061 double value = StringToDouble(scanner_.literal_string(), 4062 NO_FLAGS, // Hex, octal or trailing junk. 4063 OS::nan_value()); 4064 return NewNumberLiteral(value); 4065 } 4066 case Token::FALSE_LITERAL: 4067 Consume(Token::FALSE_LITERAL); 4068 return NEW(Literal(Factory::false_value())); 4069 case Token::TRUE_LITERAL: 4070 Consume(Token::TRUE_LITERAL); 4071 return NEW(Literal(Factory::true_value())); 4072 case Token::NULL_LITERAL: 4073 Consume(Token::NULL_LITERAL); 4074 return NEW(Literal(Factory::null_value())); 4075 case Token::LBRACE: { 4076 Expression* result = ParseJsonObject(CHECK_OK); 4077 return result; 4078 } 4079 case Token::LBRACK: { 4080 Expression* result = ParseJsonArray(CHECK_OK); 4081 return result; 4082 } 4083 default: 4084 *ok = false; 4085 ReportUnexpectedToken(token); 4086 return NULL; 4087 } 4088 } 4089 4090 4091 // Parse a JSON object. Scanner must be right after '{' token. 4092 Expression* Parser::ParseJsonObject(bool* ok) { 4093 Consume(Token::LBRACE); 4094 ZoneListWrapper<ObjectLiteral::Property> properties = 4095 factory()->NewList<ObjectLiteral::Property>(4); 4096 int boilerplate_properties = 0; 4097 if (peek() != Token::RBRACE) { 4098 do { 4099 Expect(Token::STRING, CHECK_OK); 4100 Handle<String> key = factory()->LookupSymbol(scanner_.literal_string(), 4101 scanner_.literal_length()); 4102 Expect(Token::COLON, CHECK_OK); 4103 Expression* value = ParseJsonValue(CHECK_OK); 4104 Literal* key_literal; 4105 uint32_t index; 4106 if (key->AsArrayIndex(&index)) { 4107 key_literal = NewNumberLiteral(index); 4108 } else { 4109 key_literal = NEW(Literal(key)); 4110 } 4111 ObjectLiteral::Property* property = 4112 NEW(ObjectLiteral::Property(key_literal, value)); 4113 properties.Add(property); 4114 4115 if (IsBoilerplateProperty(property)) { 4116 boilerplate_properties++; 4117 } 4118 } while (Check(Token::COMMA)); 4119 } 4120 Expect(Token::RBRACE, CHECK_OK); 4121 4122 int literal_index = temp_scope_->NextMaterializedLiteralIndex(); 4123 if (is_pre_parsing_) return NULL; 4124 4125 Handle<FixedArray> constant_properties = 4126 Factory::NewFixedArray(boilerplate_properties * 2, TENURED); 4127 bool is_simple = true; 4128 int depth = 1; 4129 BuildObjectLiteralConstantProperties(properties.elements(), 4130 constant_properties, 4131 &is_simple, 4132 &depth); 4133 return new ObjectLiteral(constant_properties, 4134 properties.elements(), 4135 literal_index, 4136 is_simple, 4137 depth); 4138 } 4139 4140 4141 // Parse a JSON array. Scanner must be right after '[' token. 4142 Expression* Parser::ParseJsonArray(bool* ok) { 4143 Consume(Token::LBRACK); 4144 4145 ZoneListWrapper<Expression> values = factory()->NewList<Expression>(4); 4146 if (peek() != Token::RBRACK) { 4147 do { 4148 Expression* exp = ParseJsonValue(CHECK_OK); 4149 values.Add(exp); 4150 } while (Check(Token::COMMA)); 4151 } 4152 Expect(Token::RBRACK, CHECK_OK); 4153 4154 // Update the scope information before the pre-parsing bailout. 4155 int literal_index = temp_scope_->NextMaterializedLiteralIndex(); 4156 4157 if (is_pre_parsing_) return NULL; 4158 4159 // Allocate a fixed array with all the literals. 4160 Handle<FixedArray> literals = 4161 Factory::NewFixedArray(values.length(), TENURED); 4162 4163 bool is_simple; 4164 int depth; 4165 BuildArrayLiteralBoilerplateLiterals(values.elements(), 4166 literals, 4167 &is_simple, 4168 &depth); 4169 return NEW(ArrayLiteral(literals, values.elements(), 4170 literal_index, is_simple, depth)); 4171 } 4172 4173 4174 // ---------------------------------------------------------------------------- 4175 // Regular expressions 4176 4177 4178 RegExpParser::RegExpParser(FlatStringReader* in, 4179 Handle<String>* error, 4180 bool multiline) 4181 : current_(kEndMarker), 4182 has_more_(true), 4183 multiline_(multiline), 4184 next_pos_(0), 4185 in_(in), 4186 error_(error), 4187 simple_(false), 4188 contains_anchor_(false), 4189 captures_(NULL), 4190 is_scanned_for_captures_(false), 4191 capture_count_(0), 4192 failed_(false) { 4193 Advance(1); 4194 } 4195 4196 4197 uc32 RegExpParser::Next() { 4198 if (has_next()) { 4199 return in()->Get(next_pos_); 4200 } else { 4201 return kEndMarker; 4202 } 4203 } 4204 4205 4206 void RegExpParser::Advance() { 4207 if (next_pos_ < in()->length()) { 4208 StackLimitCheck check; 4209 if (check.HasOverflowed()) { 4210 ReportError(CStrVector(Top::kStackOverflowMessage)); 4211 } else if (Zone::excess_allocation()) { 4212 ReportError(CStrVector("Regular expression too large")); 4213 } else { 4214 current_ = in()->Get(next_pos_); 4215 next_pos_++; 4216 } 4217 } else { 4218 current_ = kEndMarker; 4219 has_more_ = false; 4220 } 4221 } 4222 4223 4224 void RegExpParser::Reset(int pos) { 4225 next_pos_ = pos; 4226 Advance(); 4227 } 4228 4229 4230 void RegExpParser::Advance(int dist) { 4231 for (int i = 0; i < dist; i++) 4232 Advance(); 4233 } 4234 4235 4236 bool RegExpParser::simple() { 4237 return simple_; 4238 } 4239 4240 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { 4241 failed_ = true; 4242 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); 4243 // Zip to the end to make sure the no more input is read. 4244 current_ = kEndMarker; 4245 next_pos_ = in()->length(); 4246 return NULL; 4247 } 4248 4249 4250 // Pattern :: 4251 // Disjunction 4252 RegExpTree* RegExpParser::ParsePattern() { 4253 RegExpTree* result = ParseDisjunction(CHECK_FAILED); 4254 ASSERT(!has_more()); 4255 // If the result of parsing is a literal string atom, and it has the 4256 // same length as the input, then the atom is identical to the input. 4257 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { 4258 simple_ = true; 4259 } 4260 return result; 4261 } 4262 4263 4264 // Disjunction :: 4265 // Alternative 4266 // Alternative | Disjunction 4267 // Alternative :: 4268 // [empty] 4269 // Term Alternative 4270 // Term :: 4271 // Assertion 4272 // Atom 4273 // Atom Quantifier 4274 RegExpTree* RegExpParser::ParseDisjunction() { 4275 // Used to store current state while parsing subexpressions. 4276 RegExpParserState initial_state(NULL, INITIAL, 0); 4277 RegExpParserState* stored_state = &initial_state; 4278 // Cache the builder in a local variable for quick access. 4279 RegExpBuilder* builder = initial_state.builder(); 4280 while (true) { 4281 switch (current()) { 4282 case kEndMarker: 4283 if (stored_state->IsSubexpression()) { 4284 // Inside a parenthesized group when hitting end of input. 4285 ReportError(CStrVector("Unterminated group") CHECK_FAILED); 4286 } 4287 ASSERT_EQ(INITIAL, stored_state->group_type()); 4288 // Parsing completed successfully. 4289 return builder->ToRegExp(); 4290 case ')': { 4291 if (!stored_state->IsSubexpression()) { 4292 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); 4293 } 4294 ASSERT_NE(INITIAL, stored_state->group_type()); 4295 4296 Advance(); 4297 // End disjunction parsing and convert builder content to new single 4298 // regexp atom. 4299 RegExpTree* body = builder->ToRegExp(); 4300 4301 int end_capture_index = captures_started(); 4302 4303 int capture_index = stored_state->capture_index(); 4304 SubexpressionType type = stored_state->group_type(); 4305 4306 // Restore previous state. 4307 stored_state = stored_state->previous_state(); 4308 builder = stored_state->builder(); 4309 4310 // Build result of subexpression. 4311 if (type == CAPTURE) { 4312 RegExpCapture* capture = new RegExpCapture(body, capture_index); 4313 captures_->at(capture_index - 1) = capture; 4314 body = capture; 4315 } else if (type != GROUPING) { 4316 ASSERT(type == POSITIVE_LOOKAHEAD || type == NEGATIVE_LOOKAHEAD); 4317 bool is_positive = (type == POSITIVE_LOOKAHEAD); 4318 body = new RegExpLookahead(body, 4319 is_positive, 4320 end_capture_index - capture_index, 4321 capture_index); 4322 } 4323 builder->AddAtom(body); 4324 break; 4325 } 4326 case '|': { 4327 Advance(); 4328 builder->NewAlternative(); 4329 continue; 4330 } 4331 case '*': 4332 case '+': 4333 case '?': 4334 return ReportError(CStrVector("Nothing to repeat")); 4335 case '^': { 4336 Advance(); 4337 if (multiline_) { 4338 builder->AddAssertion( 4339 new RegExpAssertion(RegExpAssertion::START_OF_LINE)); 4340 } else { 4341 builder->AddAssertion( 4342 new RegExpAssertion(RegExpAssertion::START_OF_INPUT)); 4343 set_contains_anchor(); 4344 } 4345 continue; 4346 } 4347 case '$': { 4348 Advance(); 4349 RegExpAssertion::Type type = 4350 multiline_ ? RegExpAssertion::END_OF_LINE : 4351 RegExpAssertion::END_OF_INPUT; 4352 builder->AddAssertion(new RegExpAssertion(type)); 4353 continue; 4354 } 4355 case '.': { 4356 Advance(); 4357 // everything except \x0a, \x0d, \u2028 and \u2029 4358 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 4359 CharacterRange::AddClassEscape('.', ranges); 4360 RegExpTree* atom = new RegExpCharacterClass(ranges, false); 4361 builder->AddAtom(atom); 4362 break; 4363 } 4364 case '(': { 4365 SubexpressionType type = CAPTURE; 4366 Advance(); 4367 if (current() == '?') { 4368 switch (Next()) { 4369 case ':': 4370 type = GROUPING; 4371 break; 4372 case '=': 4373 type = POSITIVE_LOOKAHEAD; 4374 break; 4375 case '!': 4376 type = NEGATIVE_LOOKAHEAD; 4377 break; 4378 default: 4379 ReportError(CStrVector("Invalid group") CHECK_FAILED); 4380 break; 4381 } 4382 Advance(2); 4383 } else { 4384 if (captures_ == NULL) { 4385 captures_ = new ZoneList<RegExpCapture*>(2); 4386 } 4387 if (captures_started() >= kMaxCaptures) { 4388 ReportError(CStrVector("Too many captures") CHECK_FAILED); 4389 } 4390 captures_->Add(NULL); 4391 } 4392 // Store current state and begin new disjunction parsing. 4393 stored_state = new RegExpParserState(stored_state, 4394 type, 4395 captures_started()); 4396 builder = stored_state->builder(); 4397 break; 4398 } 4399 case '[': { 4400 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); 4401 builder->AddAtom(atom); 4402 break; 4403 } 4404 // Atom :: 4405 // \ AtomEscape 4406 case '\\': 4407 switch (Next()) { 4408 case kEndMarker: 4409 return ReportError(CStrVector("\\ at end of pattern")); 4410 case 'b': 4411 Advance(2); 4412 builder->AddAssertion( 4413 new RegExpAssertion(RegExpAssertion::BOUNDARY)); 4414 continue; 4415 case 'B': 4416 Advance(2); 4417 builder->AddAssertion( 4418 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); 4419 continue; 4420 // AtomEscape :: 4421 // CharacterClassEscape 4422 // 4423 // CharacterClassEscape :: one of 4424 // d D s S w W 4425 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { 4426 uc32 c = Next(); 4427 Advance(2); 4428 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 4429 CharacterRange::AddClassEscape(c, ranges); 4430 RegExpTree* atom = new RegExpCharacterClass(ranges, false); 4431 builder->AddAtom(atom); 4432 break; 4433 } 4434 case '1': case '2': case '3': case '4': case '5': case '6': 4435 case '7': case '8': case '9': { 4436 int index = 0; 4437 if (ParseBackReferenceIndex(&index)) { 4438 RegExpCapture* capture = NULL; 4439 if (captures_ != NULL && index <= captures_->length()) { 4440 capture = captures_->at(index - 1); 4441 } 4442 if (capture == NULL) { 4443 builder->AddEmpty(); 4444 break; 4445 } 4446 RegExpTree* atom = new RegExpBackReference(capture); 4447 builder->AddAtom(atom); 4448 break; 4449 } 4450 uc32 first_digit = Next(); 4451 if (first_digit == '8' || first_digit == '9') { 4452 // Treat as identity escape 4453 builder->AddCharacter(first_digit); 4454 Advance(2); 4455 break; 4456 } 4457 } 4458 // FALLTHROUGH 4459 case '0': { 4460 Advance(); 4461 uc32 octal = ParseOctalLiteral(); 4462 builder->AddCharacter(octal); 4463 break; 4464 } 4465 // ControlEscape :: one of 4466 // f n r t v 4467 case 'f': 4468 Advance(2); 4469 builder->AddCharacter('\f'); 4470 break; 4471 case 'n': 4472 Advance(2); 4473 builder->AddCharacter('\n'); 4474 break; 4475 case 'r': 4476 Advance(2); 4477 builder->AddCharacter('\r'); 4478 break; 4479 case 't': 4480 Advance(2); 4481 builder->AddCharacter('\t'); 4482 break; 4483 case 'v': 4484 Advance(2); 4485 builder->AddCharacter('\v'); 4486 break; 4487 case 'c': { 4488 Advance(2); 4489 uc32 control = ParseControlLetterEscape(); 4490 builder->AddCharacter(control); 4491 break; 4492 } 4493 case 'x': { 4494 Advance(2); 4495 uc32 value; 4496 if (ParseHexEscape(2, &value)) { 4497 builder->AddCharacter(value); 4498 } else { 4499 builder->AddCharacter('x'); 4500 } 4501 break; 4502 } 4503 case 'u': { 4504 Advance(2); 4505 uc32 value; 4506 if (ParseHexEscape(4, &value)) { 4507 builder->AddCharacter(value); 4508 } else { 4509 builder->AddCharacter('u'); 4510 } 4511 break; 4512 } 4513 default: 4514 // Identity escape. 4515 builder->AddCharacter(Next()); 4516 Advance(2); 4517 break; 4518 } 4519 break; 4520 case '{': { 4521 int dummy; 4522 if (ParseIntervalQuantifier(&dummy, &dummy)) { 4523 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 4524 } 4525 // fallthrough 4526 } 4527 default: 4528 builder->AddCharacter(current()); 4529 Advance(); 4530 break; 4531 } // end switch(current()) 4532 4533 int min; 4534 int max; 4535 switch (current()) { 4536 // QuantifierPrefix :: 4537 // * 4538 // + 4539 // ? 4540 // { 4541 case '*': 4542 min = 0; 4543 max = RegExpTree::kInfinity; 4544 Advance(); 4545 break; 4546 case '+': 4547 min = 1; 4548 max = RegExpTree::kInfinity; 4549 Advance(); 4550 break; 4551 case '?': 4552 min = 0; 4553 max = 1; 4554 Advance(); 4555 break; 4556 case '{': 4557 if (ParseIntervalQuantifier(&min, &max)) { 4558 if (max < min) { 4559 ReportError(CStrVector("numbers out of order in {} quantifier.") 4560 CHECK_FAILED); 4561 } 4562 break; 4563 } else { 4564 continue; 4565 } 4566 default: 4567 continue; 4568 } 4569 RegExpQuantifier::Type type = RegExpQuantifier::GREEDY; 4570 if (current() == '?') { 4571 type = RegExpQuantifier::NON_GREEDY; 4572 Advance(); 4573 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { 4574 // FLAG_regexp_possessive_quantifier is a debug-only flag. 4575 type = RegExpQuantifier::POSSESSIVE; 4576 Advance(); 4577 } 4578 builder->AddQuantifierToAtom(min, max, type); 4579 } 4580 } 4581 4582 class SourceCharacter { 4583 public: 4584 static bool Is(uc32 c) { 4585 switch (c) { 4586 // case ']': case '}': 4587 // In spidermonkey and jsc these are treated as source characters 4588 // so we do too. 4589 case '^': case '$': case '\\': case '.': case '*': case '+': 4590 case '?': case '(': case ')': case '[': case '{': case '|': 4591 case RegExpParser::kEndMarker: 4592 return false; 4593 default: 4594 return true; 4595 } 4596 } 4597 }; 4598 4599 4600 static unibrow::Predicate<SourceCharacter> source_character; 4601 4602 4603 static inline bool IsSourceCharacter(uc32 c) { 4604 return source_character.get(c); 4605 } 4606 4607 #ifdef DEBUG 4608 // Currently only used in an ASSERT. 4609 static bool IsSpecialClassEscape(uc32 c) { 4610 switch (c) { 4611 case 'd': case 'D': 4612 case 's': case 'S': 4613 case 'w': case 'W': 4614 return true; 4615 default: 4616 return false; 4617 } 4618 } 4619 #endif 4620 4621 4622 // In order to know whether an escape is a backreference or not we have to scan 4623 // the entire regexp and find the number of capturing parentheses. However we 4624 // don't want to scan the regexp twice unless it is necessary. This mini-parser 4625 // is called when needed. It can see the difference between capturing and 4626 // noncapturing parentheses and can skip character classes and backslash-escaped 4627 // characters. 4628 void RegExpParser::ScanForCaptures() { 4629 // Start with captures started previous to current position 4630 int capture_count = captures_started(); 4631 // Add count of captures after this position. 4632 int n; 4633 while ((n = current()) != kEndMarker) { 4634 Advance(); 4635 switch (n) { 4636 case '\\': 4637 Advance(); 4638 break; 4639 case '[': { 4640 int c; 4641 while ((c = current()) != kEndMarker) { 4642 Advance(); 4643 if (c == '\\') { 4644 Advance(); 4645 } else { 4646 if (c == ']') break; 4647 } 4648 } 4649 break; 4650 } 4651 case '(': 4652 if (current() != '?') capture_count++; 4653 break; 4654 } 4655 } 4656 capture_count_ = capture_count; 4657 is_scanned_for_captures_ = true; 4658 } 4659 4660 4661 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { 4662 ASSERT_EQ('\\', current()); 4663 ASSERT('1' <= Next() && Next() <= '9'); 4664 // Try to parse a decimal literal that is no greater than the total number 4665 // of left capturing parentheses in the input. 4666 int start = position(); 4667 int value = Next() - '0'; 4668 Advance(2); 4669 while (true) { 4670 uc32 c = current(); 4671 if (IsDecimalDigit(c)) { 4672 value = 10 * value + (c - '0'); 4673 if (value > kMaxCaptures) { 4674 Reset(start); 4675 return false; 4676 } 4677 Advance(); 4678 } else { 4679 break; 4680 } 4681 } 4682 if (value > captures_started()) { 4683 if (!is_scanned_for_captures_) { 4684 int saved_position = position(); 4685 ScanForCaptures(); 4686 Reset(saved_position); 4687 } 4688 if (value > capture_count_) { 4689 Reset(start); 4690 return false; 4691 } 4692 } 4693 *index_out = value; 4694 return true; 4695 } 4696 4697 4698 // QuantifierPrefix :: 4699 // { DecimalDigits } 4700 // { DecimalDigits , } 4701 // { DecimalDigits , DecimalDigits } 4702 // 4703 // Returns true if parsing succeeds, and set the min_out and max_out 4704 // values. Values are truncated to RegExpTree::kInfinity if they overflow. 4705 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { 4706 ASSERT_EQ(current(), '{'); 4707 int start = position(); 4708 Advance(); 4709 int min = 0; 4710 if (!IsDecimalDigit(current())) { 4711 Reset(start); 4712 return false; 4713 } 4714 while (IsDecimalDigit(current())) { 4715 int next = current() - '0'; 4716 if (min > (RegExpTree::kInfinity - next) / 10) { 4717 // Overflow. Skip past remaining decimal digits and return -1. 4718 do { 4719 Advance(); 4720 } while (IsDecimalDigit(current())); 4721 min = RegExpTree::kInfinity; 4722 break; 4723 } 4724 min = 10 * min + next; 4725 Advance(); 4726 } 4727 int max = 0; 4728 if (current() == '}') { 4729 max = min; 4730 Advance(); 4731 } else if (current() == ',') { 4732 Advance(); 4733 if (current() == '}') { 4734 max = RegExpTree::kInfinity; 4735 Advance(); 4736 } else { 4737 while (IsDecimalDigit(current())) { 4738 int next = current() - '0'; 4739 if (max > (RegExpTree::kInfinity - next) / 10) { 4740 do { 4741 Advance(); 4742 } while (IsDecimalDigit(current())); 4743 max = RegExpTree::kInfinity; 4744 break; 4745 } 4746 max = 10 * max + next; 4747 Advance(); 4748 } 4749 if (current() != '}') { 4750 Reset(start); 4751 return false; 4752 } 4753 Advance(); 4754 } 4755 } else { 4756 Reset(start); 4757 return false; 4758 } 4759 *min_out = min; 4760 *max_out = max; 4761 return true; 4762 } 4763 4764 4765 // Upper and lower case letters differ by one bit. 4766 STATIC_CHECK(('a' ^ 'A') == 0x20); 4767 4768 uc32 RegExpParser::ParseControlLetterEscape() { 4769 if (!has_more()) 4770 return 'c'; 4771 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. 4772 if (letter < 'A' || 'Z' < letter) { 4773 // Non-spec error-correction: "\c" followed by non-control letter is 4774 // interpreted as an IdentityEscape of 'c'. 4775 return 'c'; 4776 } 4777 Advance(); 4778 return letter & 0x1f; // Remainder modulo 32, per specification. 4779 } 4780 4781 4782 uc32 RegExpParser::ParseOctalLiteral() { 4783 ASSERT('0' <= current() && current() <= '7'); 4784 // For compatibility with some other browsers (not all), we parse 4785 // up to three octal digits with a value below 256. 4786 uc32 value = current() - '0'; 4787 Advance(); 4788 if ('0' <= current() && current() <= '7') { 4789 value = value * 8 + current() - '0'; 4790 Advance(); 4791 if (value < 32 && '0' <= current() && current() <= '7') { 4792 value = value * 8 + current() - '0'; 4793 Advance(); 4794 } 4795 } 4796 return value; 4797 } 4798 4799 4800 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { 4801 int start = position(); 4802 uc32 val = 0; 4803 bool done = false; 4804 for (int i = 0; !done; i++) { 4805 uc32 c = current(); 4806 int d = HexValue(c); 4807 if (d < 0) { 4808 Reset(start); 4809 return false; 4810 } 4811 val = val * 16 + d; 4812 Advance(); 4813 if (i == length - 1) { 4814 done = true; 4815 } 4816 } 4817 *value = val; 4818 return true; 4819 } 4820 4821 4822 uc32 RegExpParser::ParseClassCharacterEscape() { 4823 ASSERT(current() == '\\'); 4824 ASSERT(has_next() && !IsSpecialClassEscape(Next())); 4825 Advance(); 4826 switch (current()) { 4827 case 'b': 4828 Advance(); 4829 return '\b'; 4830 // ControlEscape :: one of 4831 // f n r t v 4832 case 'f': 4833 Advance(); 4834 return '\f'; 4835 case 'n': 4836 Advance(); 4837 return '\n'; 4838 case 'r': 4839 Advance(); 4840 return '\r'; 4841 case 't': 4842 Advance(); 4843 return '\t'; 4844 case 'v': 4845 Advance(); 4846 return '\v'; 4847 case 'c': 4848 Advance(); 4849 return ParseControlLetterEscape(); 4850 case '0': case '1': case '2': case '3': case '4': case '5': 4851 case '6': case '7': 4852 // For compatibility, we interpret a decimal escape that isn't 4853 // a back reference (and therefore either \0 or not valid according 4854 // to the specification) as a 1..3 digit octal character code. 4855 return ParseOctalLiteral(); 4856 case 'x': { 4857 Advance(); 4858 uc32 value; 4859 if (ParseHexEscape(2, &value)) { 4860 return value; 4861 } 4862 // If \x is not followed by a two-digit hexadecimal, treat it 4863 // as an identity escape. 4864 return 'x'; 4865 } 4866 case 'u': { 4867 Advance(); 4868 uc32 value; 4869 if (ParseHexEscape(4, &value)) { 4870 return value; 4871 } 4872 // If \u is not followed by a four-digit hexadecimal, treat it 4873 // as an identity escape. 4874 return 'u'; 4875 } 4876 default: { 4877 // Extended identity escape. We accept any character that hasn't 4878 // been matched by a more specific case, not just the subset required 4879 // by the ECMAScript specification. 4880 uc32 result = current(); 4881 Advance(); 4882 return result; 4883 } 4884 } 4885 return 0; 4886 } 4887 4888 4889 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { 4890 ASSERT_EQ(0, *char_class); 4891 uc32 first = current(); 4892 if (first == '\\') { 4893 switch (Next()) { 4894 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { 4895 *char_class = Next(); 4896 Advance(2); 4897 return CharacterRange::Singleton(0); // Return dummy value. 4898 } 4899 case kEndMarker: 4900 return ReportError(CStrVector("\\ at end of pattern")); 4901 default: 4902 uc32 c = ParseClassCharacterEscape(CHECK_FAILED); 4903 return CharacterRange::Singleton(c); 4904 } 4905 } else { 4906 Advance(); 4907 return CharacterRange::Singleton(first); 4908 } 4909 } 4910 4911 4912 RegExpTree* RegExpParser::ParseCharacterClass() { 4913 static const char* kUnterminated = "Unterminated character class"; 4914 static const char* kRangeOutOfOrder = "Range out of order in character class"; 4915 4916 ASSERT_EQ(current(), '['); 4917 Advance(); 4918 bool is_negated = false; 4919 if (current() == '^') { 4920 is_negated = true; 4921 Advance(); 4922 } 4923 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 4924 while (has_more() && current() != ']') { 4925 uc16 char_class = 0; 4926 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); 4927 if (char_class) { 4928 CharacterRange::AddClassEscape(char_class, ranges); 4929 continue; 4930 } 4931 if (current() == '-') { 4932 Advance(); 4933 if (current() == kEndMarker) { 4934 // If we reach the end we break out of the loop and let the 4935 // following code report an error. 4936 break; 4937 } else if (current() == ']') { 4938 ranges->Add(first); 4939 ranges->Add(CharacterRange::Singleton('-')); 4940 break; 4941 } 4942 CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED); 4943 if (char_class) { 4944 ranges->Add(first); 4945 ranges->Add(CharacterRange::Singleton('-')); 4946 CharacterRange::AddClassEscape(char_class, ranges); 4947 continue; 4948 } 4949 if (first.from() > next.to()) { 4950 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); 4951 } 4952 ranges->Add(CharacterRange::Range(first.from(), next.to())); 4953 } else { 4954 ranges->Add(first); 4955 } 4956 } 4957 if (!has_more()) { 4958 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); 4959 } 4960 Advance(); 4961 if (ranges->length() == 0) { 4962 ranges->Add(CharacterRange::Everything()); 4963 is_negated = !is_negated; 4964 } 4965 return new RegExpCharacterClass(ranges, is_negated); 4966 } 4967 4968 4969 // ---------------------------------------------------------------------------- 4970 // The Parser interface. 4971 4972 // MakeAST() is just a wrapper for the corresponding Parser calls 4973 // so we don't have to expose the entire Parser class in the .h file. 4974 4975 static bool always_allow_natives_syntax = false; 4976 4977 4978 ParserMessage::~ParserMessage() { 4979 for (int i = 0; i < args().length(); i++) 4980 DeleteArray(args()[i]); 4981 DeleteArray(args().start()); 4982 } 4983 4984 4985 ScriptDataImpl::~ScriptDataImpl() { 4986 store_.Dispose(); 4987 } 4988 4989 4990 int ScriptDataImpl::Length() { 4991 return store_.length(); 4992 } 4993 4994 4995 unsigned* ScriptDataImpl::Data() { 4996 return store_.start(); 4997 } 4998 4999 5000 bool ScriptDataImpl::HasError() { 5001 return has_error(); 5002 } 5003 5004 5005 ScriptDataImpl* PreParse(Handle<String> source, 5006 unibrow::CharacterStream* stream, 5007 v8::Extension* extension) { 5008 Handle<Script> no_script; 5009 bool allow_natives_syntax = 5010 always_allow_natives_syntax || 5011 FLAG_allow_natives_syntax || 5012 Bootstrapper::IsActive(); 5013 PreParser parser(no_script, allow_natives_syntax, extension); 5014 if (!parser.PreParseProgram(source, stream)) return NULL; 5015 // The list owns the backing store so we need to clone the vector. 5016 // That way, the result will be exactly the right size rather than 5017 // the expected 50% too large. 5018 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); 5019 return new ScriptDataImpl(store); 5020 } 5021 5022 5023 bool ParseRegExp(FlatStringReader* input, 5024 bool multiline, 5025 RegExpCompileData* result) { 5026 ASSERT(result != NULL); 5027 RegExpParser parser(input, &result->error, multiline); 5028 RegExpTree* tree = parser.ParsePattern(); 5029 if (parser.failed()) { 5030 ASSERT(tree == NULL); 5031 ASSERT(!result->error.is_null()); 5032 } else { 5033 ASSERT(tree != NULL); 5034 ASSERT(result->error.is_null()); 5035 result->tree = tree; 5036 int capture_count = parser.captures_started(); 5037 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; 5038 result->contains_anchor = parser.contains_anchor(); 5039 result->capture_count = capture_count; 5040 } 5041 return !parser.failed(); 5042 } 5043 5044 5045 FunctionLiteral* MakeAST(bool compile_in_global_context, 5046 Handle<Script> script, 5047 v8::Extension* extension, 5048 ScriptDataImpl* pre_data, 5049 bool is_json) { 5050 bool allow_natives_syntax = 5051 always_allow_natives_syntax || 5052 FLAG_allow_natives_syntax || 5053 Bootstrapper::IsActive(); 5054 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); 5055 if (pre_data != NULL && pre_data->has_error()) { 5056 Scanner::Location loc = pre_data->MessageLocation(); 5057 const char* message = pre_data->BuildMessage(); 5058 Vector<const char*> args = pre_data->BuildArgs(); 5059 parser.ReportMessageAt(loc, message, args); 5060 DeleteArray(message); 5061 for (int i = 0; i < args.length(); i++) { 5062 DeleteArray(args[i]); 5063 } 5064 DeleteArray(args.start()); 5065 return NULL; 5066 } 5067 Handle<String> source = Handle<String>(String::cast(script->source())); 5068 SafeStringInputBuffer input(source.location()); 5069 FunctionLiteral* result; 5070 if (is_json) { 5071 ASSERT(compile_in_global_context); 5072 result = parser.ParseJson(source, &input); 5073 } else { 5074 result = parser.ParseProgram(source, &input, compile_in_global_context); 5075 } 5076 return result; 5077 } 5078 5079 5080 FunctionLiteral* MakeLazyAST(Handle<Script> script, 5081 Handle<String> name, 5082 int start_position, 5083 int end_position, 5084 bool is_expression) { 5085 bool allow_natives_syntax_before = always_allow_natives_syntax; 5086 always_allow_natives_syntax = true; 5087 AstBuildingParser parser(script, true, NULL, NULL); // always allow 5088 always_allow_natives_syntax = allow_natives_syntax_before; 5089 // Parse the function by pulling the function source from the script source. 5090 Handle<String> script_source(String::cast(script->source())); 5091 FunctionLiteral* result = 5092 parser.ParseLazy(SubString(script_source, start_position, end_position), 5093 name, 5094 start_position, 5095 is_expression); 5096 return result; 5097 } 5098 5099 5100 #undef NEW 5101 5102 5103 } } // namespace v8::internal 5104