1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 #include <string.h> 31 32 #include "v8.h" 33 34 #include "isolate.h" 35 #include "token.h" 36 #include "scanner.h" 37 #include "parser.h" 38 #include "utils.h" 39 #include "execution.h" 40 #include "preparser.h" 41 #include "cctest.h" 42 43 namespace i = ::v8::internal; 44 45 TEST(KeywordMatcher) { 46 struct KeywordToken { 47 const char* keyword; 48 i::Token::Value token; 49 }; 50 51 static const KeywordToken keywords[] = { 52 #define KEYWORD(t, s, d) { s, i::Token::t }, 53 #define IGNORE(t, s, d) /* */ 54 TOKEN_LIST(IGNORE, KEYWORD, IGNORE) 55 #undef KEYWORD 56 { NULL, i::Token::IDENTIFIER } 57 }; 58 59 static const char* future_keywords[] = { 60 #define FUTURE(t, s, d) s, 61 TOKEN_LIST(IGNORE, IGNORE, FUTURE) 62 #undef FUTURE 63 #undef IGNORE 64 NULL 65 }; 66 67 KeywordToken key_token; 68 for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) { 69 i::KeywordMatcher matcher; 70 const char* keyword = key_token.keyword; 71 int length = i::StrLength(keyword); 72 for (int j = 0; j < length; j++) { 73 if (key_token.token == i::Token::INSTANCEOF && j == 2) { 74 // "in" is a prefix of "instanceof". It's the only keyword 75 // that is a prefix of another. 76 CHECK_EQ(i::Token::IN, matcher.token()); 77 } else { 78 CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); 79 } 80 matcher.AddChar(keyword[j]); 81 } 82 CHECK_EQ(key_token.token, matcher.token()); 83 // Adding more characters will make keyword matching fail. 84 matcher.AddChar('z'); 85 CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); 86 // Adding a keyword later will not make it match again. 87 matcher.AddChar('i'); 88 matcher.AddChar('f'); 89 CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); 90 } 91 92 // Future keywords are not recognized. 93 const char* future_keyword; 94 for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) { 95 i::KeywordMatcher matcher; 96 int length = i::StrLength(future_keyword); 97 for (int j = 0; j < length; j++) { 98 matcher.AddChar(future_keyword[j]); 99 } 100 CHECK_EQ(i::Token::IDENTIFIER, matcher.token()); 101 } 102 103 // Zero isn't ignored at first. 104 i::KeywordMatcher bad_start; 105 bad_start.AddChar(0); 106 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token()); 107 bad_start.AddChar('i'); 108 bad_start.AddChar('f'); 109 CHECK_EQ(i::Token::IDENTIFIER, bad_start.token()); 110 111 // Zero isn't ignored at end. 112 i::KeywordMatcher bad_end; 113 bad_end.AddChar('i'); 114 bad_end.AddChar('f'); 115 CHECK_EQ(i::Token::IF, bad_end.token()); 116 bad_end.AddChar(0); 117 CHECK_EQ(i::Token::IDENTIFIER, bad_end.token()); 118 119 // Case isn't ignored. 120 i::KeywordMatcher bad_case; 121 bad_case.AddChar('i'); 122 bad_case.AddChar('F'); 123 CHECK_EQ(i::Token::IDENTIFIER, bad_case.token()); 124 125 // If we mark it as failure, continuing won't help. 126 i::KeywordMatcher full_stop; 127 full_stop.AddChar('i'); 128 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token()); 129 full_stop.Fail(); 130 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token()); 131 full_stop.AddChar('f'); 132 CHECK_EQ(i::Token::IDENTIFIER, full_stop.token()); 133 } 134 135 136 TEST(ScanHTMLEndComments) { 137 v8::V8::Initialize(); 138 139 // Regression test. See: 140 // http://code.google.com/p/chromium/issues/detail?id=53548 141 // Tests that --> is correctly interpreted as comment-to-end-of-line if there 142 // is only whitespace before it on the line, even after a multiline-comment 143 // comment. This was not the case if it occurred before the first real token 144 // in the input. 145 const char* tests[] = { 146 // Before first real token. 147 "--> is eol-comment\nvar y = 37;\n", 148 "\n --> is eol-comment\nvar y = 37;\n", 149 "/* precomment */ --> is eol-comment\nvar y = 37;\n", 150 "\n/* precomment */ --> is eol-comment\nvar y = 37;\n", 151 // After first real token. 152 "var x = 42;\n--> is eol-comment\nvar y = 37;\n", 153 "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n", 154 NULL 155 }; 156 157 // Parser/Scanner needs a stack limit. 158 int marker; 159 i::Isolate::Current()->stack_guard()->SetStackLimit( 160 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 161 162 for (int i = 0; tests[i]; i++) { 163 v8::ScriptData* data = 164 v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i])); 165 CHECK(data != NULL && !data->HasError()); 166 delete data; 167 } 168 } 169 170 171 class ScriptResource : public v8::String::ExternalAsciiStringResource { 172 public: 173 ScriptResource(const char* data, size_t length) 174 : data_(data), length_(length) { } 175 176 const char* data() const { return data_; } 177 size_t length() const { return length_; } 178 179 private: 180 const char* data_; 181 size_t length_; 182 }; 183 184 185 TEST(Preparsing) { 186 v8::HandleScope handles; 187 v8::Persistent<v8::Context> context = v8::Context::New(); 188 v8::Context::Scope context_scope(context); 189 int marker; 190 i::Isolate::Current()->stack_guard()->SetStackLimit( 191 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 192 193 // Source containing functions that might be lazily compiled and all types 194 // of symbols (string, propertyName, regexp). 195 const char* source = 196 "var x = 42;" 197 "function foo(a) { return function nolazy(b) { return a + b; } }" 198 "function bar(a) { if (a) return function lazy(b) { return b; } }" 199 "var z = {'string': 'string literal', bareword: 'propertyName', " 200 " 42: 'number literal', for: 'keyword as propertyName', " 201 " f\\u006fr: 'keyword propertyname with escape'};" 202 "var v = /RegExp Literal/;" 203 "var w = /RegExp Literal\\u0020With Escape/gin;" 204 "var y = { get getter() { return 42; }, " 205 " set setter(v) { this.value = v; }};"; 206 int source_length = i::StrLength(source); 207 const char* error_source = "var x = y z;"; 208 int error_source_length = i::StrLength(error_source); 209 210 v8::ScriptData* preparse = 211 v8::ScriptData::PreCompile(source, source_length); 212 CHECK(!preparse->HasError()); 213 bool lazy_flag = i::FLAG_lazy; 214 { 215 i::FLAG_lazy = true; 216 ScriptResource* resource = new ScriptResource(source, source_length); 217 v8::Local<v8::String> script_source = v8::String::NewExternal(resource); 218 v8::Script::Compile(script_source, NULL, preparse); 219 } 220 221 { 222 i::FLAG_lazy = false; 223 224 ScriptResource* resource = new ScriptResource(source, source_length); 225 v8::Local<v8::String> script_source = v8::String::NewExternal(resource); 226 v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>()); 227 } 228 delete preparse; 229 i::FLAG_lazy = lazy_flag; 230 231 // Syntax error. 232 v8::ScriptData* error_preparse = 233 v8::ScriptData::PreCompile(error_source, error_source_length); 234 CHECK(error_preparse->HasError()); 235 i::ScriptDataImpl *pre_impl = 236 reinterpret_cast<i::ScriptDataImpl*>(error_preparse); 237 i::Scanner::Location error_location = 238 pre_impl->MessageLocation(); 239 // Error is at "z" in source, location 10..11. 240 CHECK_EQ(10, error_location.beg_pos); 241 CHECK_EQ(11, error_location.end_pos); 242 // Should not crash. 243 const char* message = pre_impl->BuildMessage(); 244 i::Vector<const char*> args = pre_impl->BuildArgs(); 245 CHECK_GT(strlen(message), 0); 246 } 247 248 249 TEST(StandAlonePreParser) { 250 v8::V8::Initialize(); 251 252 int marker; 253 i::Isolate::Current()->stack_guard()->SetStackLimit( 254 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 255 256 const char* programs[] = { 257 "{label: 42}", 258 "var x = 42;", 259 "function foo(x, y) { return x + y; }", 260 "native function foo(); return %ArgleBargle(glop);", 261 "var x = new new Function('this.x = 42');", 262 NULL 263 }; 264 265 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit(); 266 for (int i = 0; programs[i]; i++) { 267 const char* program = programs[i]; 268 i::Utf8ToUC16CharacterStream stream( 269 reinterpret_cast<const i::byte*>(program), 270 static_cast<unsigned>(strlen(program))); 271 i::CompleteParserRecorder log; 272 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache()); 273 scanner.Initialize(&stream); 274 275 v8::preparser::PreParser::PreParseResult result = 276 v8::preparser::PreParser::PreParseProgram(&scanner, 277 &log, 278 true, 279 stack_limit); 280 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result); 281 i::ScriptDataImpl data(log.ExtractData()); 282 CHECK(!data.has_error()); 283 } 284 } 285 286 287 TEST(RegressChromium62639) { 288 v8::V8::Initialize(); 289 290 int marker; 291 i::Isolate::Current()->stack_guard()->SetStackLimit( 292 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 293 294 const char* program = "var x = 'something';\n" 295 "escape: function() {}"; 296 // Fails parsing expecting an identifier after "function". 297 // Before fix, didn't check *ok after Expect(Token::Identifier, ok), 298 // and then used the invalid currently scanned literal. This always 299 // failed in debug mode, and sometimes crashed in release mode. 300 301 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program), 302 static_cast<unsigned>(strlen(program))); 303 i::ScriptDataImpl* data = 304 i::ParserApi::PreParse(&stream, NULL); 305 CHECK(data->HasError()); 306 delete data; 307 } 308 309 310 TEST(Regress928) { 311 v8::V8::Initialize(); 312 313 // Preparsing didn't consider the catch clause of a try statement 314 // as with-content, which made it assume that a function inside 315 // the block could be lazily compiled, and an extra, unexpected, 316 // entry was added to the data. 317 int marker; 318 i::Isolate::Current()->stack_guard()->SetStackLimit( 319 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 320 321 const char* program = 322 "try { } catch (e) { var foo = function () { /* first */ } }" 323 "var bar = function () { /* second */ }"; 324 325 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program), 326 static_cast<unsigned>(strlen(program))); 327 i::ScriptDataImpl* data = 328 i::ParserApi::PartialPreParse(&stream, NULL); 329 CHECK(!data->HasError()); 330 331 data->Initialize(); 332 333 int first_function = 334 static_cast<int>(strstr(program, "function") - program); 335 int first_lbrace = first_function + static_cast<int>(strlen("function () ")); 336 CHECK_EQ('{', program[first_lbrace]); 337 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace); 338 CHECK(!entry1.is_valid()); 339 340 int second_function = 341 static_cast<int>(strstr(program + first_lbrace, "function") - program); 342 int second_lbrace = 343 second_function + static_cast<int>(strlen("function () ")); 344 CHECK_EQ('{', program[second_lbrace]); 345 i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace); 346 CHECK(entry2.is_valid()); 347 CHECK_EQ('}', program[entry2.end_pos() - 1]); 348 delete data; 349 } 350 351 352 TEST(PreParseOverflow) { 353 v8::V8::Initialize(); 354 355 int marker; 356 i::Isolate::Current()->stack_guard()->SetStackLimit( 357 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 358 359 size_t kProgramSize = 1024 * 1024; 360 i::SmartPointer<char> program( 361 reinterpret_cast<char*>(malloc(kProgramSize + 1))); 362 memset(*program, '(', kProgramSize); 363 program[kProgramSize] = '\0'; 364 365 uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit(); 366 367 i::Utf8ToUC16CharacterStream stream( 368 reinterpret_cast<const i::byte*>(*program), 369 static_cast<unsigned>(kProgramSize)); 370 i::CompleteParserRecorder log; 371 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache()); 372 scanner.Initialize(&stream); 373 374 375 v8::preparser::PreParser::PreParseResult result = 376 v8::preparser::PreParser::PreParseProgram(&scanner, 377 &log, 378 true, 379 stack_limit); 380 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result); 381 } 382 383 384 class TestExternalResource: public v8::String::ExternalStringResource { 385 public: 386 explicit TestExternalResource(uint16_t* data, int length) 387 : data_(data), length_(static_cast<size_t>(length)) { } 388 389 ~TestExternalResource() { } 390 391 const uint16_t* data() const { 392 return data_; 393 } 394 395 size_t length() const { 396 return length_; 397 } 398 private: 399 uint16_t* data_; 400 size_t length_; 401 }; 402 403 404 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2)) 405 406 void TestCharacterStream(const char* ascii_source, 407 unsigned length, 408 unsigned start = 0, 409 unsigned end = 0) { 410 if (end == 0) end = length; 411 unsigned sub_length = end - start; 412 i::HandleScope test_scope; 413 i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]); 414 for (unsigned i = 0; i < length; i++) { 415 uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]); 416 } 417 i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length)); 418 i::Handle<i::String> ascii_string( 419 FACTORY->NewStringFromAscii(ascii_vector)); 420 TestExternalResource resource(*uc16_buffer, length); 421 i::Handle<i::String> uc16_string( 422 FACTORY->NewExternalStringFromTwoByte(&resource)); 423 424 i::ExternalTwoByteStringUC16CharacterStream uc16_stream( 425 i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end); 426 i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end); 427 i::Utf8ToUC16CharacterStream utf8_stream( 428 reinterpret_cast<const i::byte*>(ascii_source), end); 429 utf8_stream.SeekForward(start); 430 431 unsigned i = start; 432 while (i < end) { 433 // Read streams one char at a time 434 CHECK_EQU(i, uc16_stream.pos()); 435 CHECK_EQU(i, string_stream.pos()); 436 CHECK_EQU(i, utf8_stream.pos()); 437 int32_t c0 = ascii_source[i]; 438 int32_t c1 = uc16_stream.Advance(); 439 int32_t c2 = string_stream.Advance(); 440 int32_t c3 = utf8_stream.Advance(); 441 i++; 442 CHECK_EQ(c0, c1); 443 CHECK_EQ(c0, c2); 444 CHECK_EQ(c0, c3); 445 CHECK_EQU(i, uc16_stream.pos()); 446 CHECK_EQU(i, string_stream.pos()); 447 CHECK_EQU(i, utf8_stream.pos()); 448 } 449 while (i > start + sub_length / 4) { 450 // Pushback, re-read, pushback again. 451 int32_t c0 = ascii_source[i - 1]; 452 CHECK_EQU(i, uc16_stream.pos()); 453 CHECK_EQU(i, string_stream.pos()); 454 CHECK_EQU(i, utf8_stream.pos()); 455 uc16_stream.PushBack(c0); 456 string_stream.PushBack(c0); 457 utf8_stream.PushBack(c0); 458 i--; 459 CHECK_EQU(i, uc16_stream.pos()); 460 CHECK_EQU(i, string_stream.pos()); 461 CHECK_EQU(i, utf8_stream.pos()); 462 int32_t c1 = uc16_stream.Advance(); 463 int32_t c2 = string_stream.Advance(); 464 int32_t c3 = utf8_stream.Advance(); 465 i++; 466 CHECK_EQU(i, uc16_stream.pos()); 467 CHECK_EQU(i, string_stream.pos()); 468 CHECK_EQU(i, utf8_stream.pos()); 469 CHECK_EQ(c0, c1); 470 CHECK_EQ(c0, c2); 471 CHECK_EQ(c0, c3); 472 uc16_stream.PushBack(c0); 473 string_stream.PushBack(c0); 474 utf8_stream.PushBack(c0); 475 i--; 476 CHECK_EQU(i, uc16_stream.pos()); 477 CHECK_EQU(i, string_stream.pos()); 478 CHECK_EQU(i, utf8_stream.pos()); 479 } 480 unsigned halfway = start + sub_length / 2; 481 uc16_stream.SeekForward(halfway - i); 482 string_stream.SeekForward(halfway - i); 483 utf8_stream.SeekForward(halfway - i); 484 i = halfway; 485 CHECK_EQU(i, uc16_stream.pos()); 486 CHECK_EQU(i, string_stream.pos()); 487 CHECK_EQU(i, utf8_stream.pos()); 488 489 while (i < end) { 490 // Read streams one char at a time 491 CHECK_EQU(i, uc16_stream.pos()); 492 CHECK_EQU(i, string_stream.pos()); 493 CHECK_EQU(i, utf8_stream.pos()); 494 int32_t c0 = ascii_source[i]; 495 int32_t c1 = uc16_stream.Advance(); 496 int32_t c2 = string_stream.Advance(); 497 int32_t c3 = utf8_stream.Advance(); 498 i++; 499 CHECK_EQ(c0, c1); 500 CHECK_EQ(c0, c2); 501 CHECK_EQ(c0, c3); 502 CHECK_EQU(i, uc16_stream.pos()); 503 CHECK_EQU(i, string_stream.pos()); 504 CHECK_EQU(i, utf8_stream.pos()); 505 } 506 507 int32_t c1 = uc16_stream.Advance(); 508 int32_t c2 = string_stream.Advance(); 509 int32_t c3 = utf8_stream.Advance(); 510 CHECK_LT(c1, 0); 511 CHECK_LT(c2, 0); 512 CHECK_LT(c3, 0); 513 } 514 515 516 TEST(CharacterStreams) { 517 v8::HandleScope handles; 518 v8::Persistent<v8::Context> context = v8::Context::New(); 519 v8::Context::Scope context_scope(context); 520 521 TestCharacterStream("abc\0\n\r\x7f", 7); 522 static const unsigned kBigStringSize = 4096; 523 char buffer[kBigStringSize + 1]; 524 for (unsigned i = 0; i < kBigStringSize; i++) { 525 buffer[i] = static_cast<char>(i & 0x7f); 526 } 527 TestCharacterStream(buffer, kBigStringSize); 528 529 TestCharacterStream(buffer, kBigStringSize, 576, 3298); 530 531 TestCharacterStream("\0", 1); 532 TestCharacterStream("", 0); 533 } 534 535 536 TEST(Utf8CharacterStream) { 537 static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar; 538 static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU); 539 540 static const int kAllUtf8CharsSize = 541 (unibrow::Utf8::kMaxOneByteChar + 1) + 542 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 + 543 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3; 544 static const unsigned kAllUtf8CharsSizeU = 545 static_cast<unsigned>(kAllUtf8CharsSize); 546 547 char buffer[kAllUtf8CharsSizeU]; 548 unsigned cursor = 0; 549 for (int i = 0; i <= kMaxUC16Char; i++) { 550 cursor += unibrow::Utf8::Encode(buffer + cursor, i); 551 } 552 ASSERT(cursor == kAllUtf8CharsSizeU); 553 554 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer), 555 kAllUtf8CharsSizeU); 556 for (int i = 0; i <= kMaxUC16Char; i++) { 557 CHECK_EQU(i, stream.pos()); 558 int32_t c = stream.Advance(); 559 CHECK_EQ(i, c); 560 CHECK_EQU(i + 1, stream.pos()); 561 } 562 for (int i = kMaxUC16Char; i >= 0; i--) { 563 CHECK_EQU(i + 1, stream.pos()); 564 stream.PushBack(i); 565 CHECK_EQU(i, stream.pos()); 566 } 567 int i = 0; 568 while (stream.pos() < kMaxUC16CharU) { 569 CHECK_EQU(i, stream.pos()); 570 unsigned progress = stream.SeekForward(12); 571 i += progress; 572 int32_t c = stream.Advance(); 573 if (i <= kMaxUC16Char) { 574 CHECK_EQ(i, c); 575 } else { 576 CHECK_EQ(-1, c); 577 } 578 i += 1; 579 CHECK_EQU(i, stream.pos()); 580 } 581 } 582 583 #undef CHECK_EQU 584 585 void TestStreamScanner(i::UC16CharacterStream* stream, 586 i::Token::Value* expected_tokens, 587 int skip_pos = 0, // Zero means not skipping. 588 int skip_to = 0) { 589 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache()); 590 scanner.Initialize(stream); 591 592 int i = 0; 593 do { 594 i::Token::Value expected = expected_tokens[i]; 595 i::Token::Value actual = scanner.Next(); 596 CHECK_EQ(i::Token::String(expected), i::Token::String(actual)); 597 if (scanner.location().end_pos == skip_pos) { 598 scanner.SeekForward(skip_to); 599 } 600 i++; 601 } while (expected_tokens[i] != i::Token::ILLEGAL); 602 } 603 604 TEST(StreamScanner) { 605 v8::V8::Initialize(); 606 607 const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib"; 608 i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1), 609 static_cast<unsigned>(strlen(str1))); 610 i::Token::Value expectations1[] = { 611 i::Token::LBRACE, 612 i::Token::IDENTIFIER, 613 i::Token::IDENTIFIER, 614 i::Token::FOR, 615 i::Token::COLON, 616 i::Token::MUL, 617 i::Token::DIV, 618 i::Token::LT, 619 i::Token::SUB, 620 i::Token::IDENTIFIER, 621 i::Token::EOS, 622 i::Token::ILLEGAL 623 }; 624 TestStreamScanner(&stream1, expectations1, 0, 0); 625 626 const char* str2 = "case default const {THIS\nPART\nSKIPPED} do"; 627 i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2), 628 static_cast<unsigned>(strlen(str2))); 629 i::Token::Value expectations2[] = { 630 i::Token::CASE, 631 i::Token::DEFAULT, 632 i::Token::CONST, 633 i::Token::LBRACE, 634 // Skipped part here 635 i::Token::RBRACE, 636 i::Token::DO, 637 i::Token::EOS, 638 i::Token::ILLEGAL 639 }; 640 ASSERT_EQ('{', str2[19]); 641 ASSERT_EQ('}', str2[37]); 642 TestStreamScanner(&stream2, expectations2, 20, 37); 643 644 const char* str3 = "{}}}}"; 645 i::Token::Value expectations3[] = { 646 i::Token::LBRACE, 647 i::Token::RBRACE, 648 i::Token::RBRACE, 649 i::Token::RBRACE, 650 i::Token::RBRACE, 651 i::Token::EOS, 652 i::Token::ILLEGAL 653 }; 654 // Skip zero-four RBRACEs. 655 for (int i = 0; i <= 4; i++) { 656 expectations3[6 - i] = i::Token::ILLEGAL; 657 expectations3[5 - i] = i::Token::EOS; 658 i::Utf8ToUC16CharacterStream stream3( 659 reinterpret_cast<const i::byte*>(str3), 660 static_cast<unsigned>(strlen(str3))); 661 TestStreamScanner(&stream3, expectations3, 1, 1 + i); 662 } 663 } 664 665 666 void TestScanRegExp(const char* re_source, const char* expected) { 667 i::Utf8ToUC16CharacterStream stream( 668 reinterpret_cast<const i::byte*>(re_source), 669 static_cast<unsigned>(strlen(re_source))); 670 i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache()); 671 scanner.Initialize(&stream); 672 673 i::Token::Value start = scanner.peek(); 674 CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV); 675 CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV)); 676 scanner.Next(); // Current token is now the regexp literal. 677 CHECK(scanner.is_literal_ascii()); 678 i::Vector<const char> actual = scanner.literal_ascii_string(); 679 for (int i = 0; i < actual.length(); i++) { 680 CHECK_NE('\0', expected[i]); 681 CHECK_EQ(expected[i], actual[i]); 682 } 683 } 684 685 686 TEST(RegExpScanning) { 687 v8::V8::Initialize(); 688 689 // RegExp token with added garbage at the end. The scanner should only 690 // scan the RegExp until the terminating slash just before "flipperwald". 691 TestScanRegExp("/b/flipperwald", "b"); 692 // Incomplete escape sequences doesn't hide the terminating slash. 693 TestScanRegExp("/\\x/flipperwald", "\\x"); 694 TestScanRegExp("/\\u/flipperwald", "\\u"); 695 TestScanRegExp("/\\u1/flipperwald", "\\u1"); 696 TestScanRegExp("/\\u12/flipperwald", "\\u12"); 697 TestScanRegExp("/\\u123/flipperwald", "\\u123"); 698 TestScanRegExp("/\\c/flipperwald", "\\c"); 699 TestScanRegExp("/\\c//flipperwald", "\\c"); 700 // Slashes inside character classes are not terminating. 701 TestScanRegExp("/[/]/flipperwald", "[/]"); 702 TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]"); 703 // Incomplete escape sequences inside a character class doesn't hide 704 // the end of the character class. 705 TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]"); 706 TestScanRegExp("/[\\c]/flipperwald", "[\\c]"); 707 TestScanRegExp("/[\\x]/flipperwald", "[\\x]"); 708 TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]"); 709 TestScanRegExp("/[\\u]/flipperwald", "[\\u]"); 710 TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]"); 711 TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]"); 712 TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]"); 713 // Escaped ']'s wont end the character class. 714 TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]"); 715 // Escaped slashes are not terminating. 716 TestScanRegExp("/\\//flipperwald", "\\/"); 717 // Starting with '=' works too. 718 TestScanRegExp("/=/", "="); 719 TestScanRegExp("/=?/", "=?"); 720 } 721