Home | History | Annotate | Download | only in cctest
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include <stdlib.h>
     29 #include <stdio.h>
     30 #include <string.h>
     31 
     32 #include "v8.h"
     33 
     34 #include "isolate.h"
     35 #include "token.h"
     36 #include "scanner.h"
     37 #include "parser.h"
     38 #include "utils.h"
     39 #include "execution.h"
     40 #include "preparser.h"
     41 #include "cctest.h"
     42 
     43 namespace i = ::v8::internal;
     44 
     45 TEST(KeywordMatcher) {
     46   struct KeywordToken {
     47     const char* keyword;
     48     i::Token::Value token;
     49   };
     50 
     51   static const KeywordToken keywords[] = {
     52 #define KEYWORD(t, s, d) { s, i::Token::t },
     53 #define IGNORE(t, s, d)  /* */
     54       TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
     55 #undef KEYWORD
     56       { NULL, i::Token::IDENTIFIER }
     57   };
     58 
     59   static const char* future_keywords[] = {
     60 #define FUTURE(t, s, d) s,
     61       TOKEN_LIST(IGNORE, IGNORE, FUTURE)
     62 #undef FUTURE
     63 #undef IGNORE
     64       NULL
     65   };
     66 
     67   KeywordToken key_token;
     68   for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
     69     i::KeywordMatcher matcher;
     70     const char* keyword = key_token.keyword;
     71     int length = i::StrLength(keyword);
     72     for (int j = 0; j < length; j++) {
     73       if (key_token.token == i::Token::INSTANCEOF && j == 2) {
     74         // "in" is a prefix of "instanceof". It's the only keyword
     75         // that is a prefix of another.
     76         CHECK_EQ(i::Token::IN, matcher.token());
     77       } else {
     78         CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
     79       }
     80       matcher.AddChar(keyword[j]);
     81     }
     82     CHECK_EQ(key_token.token, matcher.token());
     83     // Adding more characters will make keyword matching fail.
     84     matcher.AddChar('z');
     85     CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
     86     // Adding a keyword later will not make it match again.
     87     matcher.AddChar('i');
     88     matcher.AddChar('f');
     89     CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
     90   }
     91 
     92   // Future keywords are not recognized.
     93   const char* future_keyword;
     94   for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
     95     i::KeywordMatcher matcher;
     96     int length = i::StrLength(future_keyword);
     97     for (int j = 0; j < length; j++) {
     98       matcher.AddChar(future_keyword[j]);
     99     }
    100     CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
    101   }
    102 
    103   // Zero isn't ignored at first.
    104   i::KeywordMatcher bad_start;
    105   bad_start.AddChar(0);
    106   CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
    107   bad_start.AddChar('i');
    108   bad_start.AddChar('f');
    109   CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
    110 
    111   // Zero isn't ignored at end.
    112   i::KeywordMatcher bad_end;
    113   bad_end.AddChar('i');
    114   bad_end.AddChar('f');
    115   CHECK_EQ(i::Token::IF, bad_end.token());
    116   bad_end.AddChar(0);
    117   CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
    118 
    119   // Case isn't ignored.
    120   i::KeywordMatcher bad_case;
    121   bad_case.AddChar('i');
    122   bad_case.AddChar('F');
    123   CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
    124 
    125   // If we mark it as failure, continuing won't help.
    126   i::KeywordMatcher full_stop;
    127   full_stop.AddChar('i');
    128   CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
    129   full_stop.Fail();
    130   CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
    131   full_stop.AddChar('f');
    132   CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
    133 }
    134 
    135 
    136 TEST(ScanHTMLEndComments) {
    137   v8::V8::Initialize();
    138 
    139   // Regression test. See:
    140   //    http://code.google.com/p/chromium/issues/detail?id=53548
    141   // Tests that --> is correctly interpreted as comment-to-end-of-line if there
    142   // is only whitespace before it on the line, even after a multiline-comment
    143   // comment. This was not the case if it occurred before the first real token
    144   // in the input.
    145   const char* tests[] = {
    146       // Before first real token.
    147       "--> is eol-comment\nvar y = 37;\n",
    148       "\n --> is eol-comment\nvar y = 37;\n",
    149       "/* precomment */ --> is eol-comment\nvar y = 37;\n",
    150       "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
    151       // After first real token.
    152       "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
    153       "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
    154       NULL
    155   };
    156 
    157   // Parser/Scanner needs a stack limit.
    158   int marker;
    159   i::Isolate::Current()->stack_guard()->SetStackLimit(
    160       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
    161 
    162   for (int i = 0; tests[i]; i++) {
    163     v8::ScriptData* data =
    164         v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i]));
    165     CHECK(data != NULL && !data->HasError());
    166     delete data;
    167   }
    168 }
    169 
    170 
    171 class ScriptResource : public v8::String::ExternalAsciiStringResource {
    172  public:
    173   ScriptResource(const char* data, size_t length)
    174       : data_(data), length_(length) { }
    175 
    176   const char* data() const { return data_; }
    177   size_t length() const { return length_; }
    178 
    179  private:
    180   const char* data_;
    181   size_t length_;
    182 };
    183 
    184 
    185 TEST(Preparsing) {
    186   v8::HandleScope handles;
    187   v8::Persistent<v8::Context> context = v8::Context::New();
    188   v8::Context::Scope context_scope(context);
    189   int marker;
    190   i::Isolate::Current()->stack_guard()->SetStackLimit(
    191       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
    192 
    193   // Source containing functions that might be lazily compiled  and all types
    194   // of symbols (string, propertyName, regexp).
    195   const char* source =
    196       "var x = 42;"
    197       "function foo(a) { return function nolazy(b) { return a + b; } }"
    198       "function bar(a) { if (a) return function lazy(b) { return b; } }"
    199       "var z = {'string': 'string literal', bareword: 'propertyName', "
    200       "         42: 'number literal', for: 'keyword as propertyName', "
    201       "         f\\u006fr: 'keyword propertyname with escape'};"
    202       "var v = /RegExp Literal/;"
    203       "var w = /RegExp Literal\\u0020With Escape/gin;"
    204       "var y = { get getter() { return 42; }, "
    205       "          set setter(v) { this.value = v; }};";
    206   int source_length = i::StrLength(source);
    207   const char* error_source = "var x = y z;";
    208   int error_source_length = i::StrLength(error_source);
    209 
    210   v8::ScriptData* preparse =
    211       v8::ScriptData::PreCompile(source, source_length);
    212   CHECK(!preparse->HasError());
    213   bool lazy_flag = i::FLAG_lazy;
    214   {
    215     i::FLAG_lazy = true;
    216     ScriptResource* resource = new ScriptResource(source, source_length);
    217     v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
    218     v8::Script::Compile(script_source, NULL, preparse);
    219   }
    220 
    221   {
    222     i::FLAG_lazy = false;
    223 
    224     ScriptResource* resource = new ScriptResource(source, source_length);
    225     v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
    226     v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
    227   }
    228   delete preparse;
    229   i::FLAG_lazy = lazy_flag;
    230 
    231   // Syntax error.
    232   v8::ScriptData* error_preparse =
    233       v8::ScriptData::PreCompile(error_source, error_source_length);
    234   CHECK(error_preparse->HasError());
    235   i::ScriptDataImpl *pre_impl =
    236       reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
    237   i::Scanner::Location error_location =
    238       pre_impl->MessageLocation();
    239   // Error is at "z" in source, location 10..11.
    240   CHECK_EQ(10, error_location.beg_pos);
    241   CHECK_EQ(11, error_location.end_pos);
    242   // Should not crash.
    243   const char* message = pre_impl->BuildMessage();
    244   i::Vector<const char*> args = pre_impl->BuildArgs();
    245   CHECK_GT(strlen(message), 0);
    246 }
    247 
    248 
    249 TEST(StandAlonePreParser) {
    250   v8::V8::Initialize();
    251 
    252   int marker;
    253   i::Isolate::Current()->stack_guard()->SetStackLimit(
    254       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
    255 
    256   const char* programs[] = {
    257       "{label: 42}",
    258       "var x = 42;",
    259       "function foo(x, y) { return x + y; }",
    260       "native function foo(); return %ArgleBargle(glop);",
    261       "var x = new new Function('this.x = 42');",
    262       NULL
    263   };
    264 
    265   uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
    266   for (int i = 0; programs[i]; i++) {
    267     const char* program = programs[i];
    268     i::Utf8ToUC16CharacterStream stream(
    269         reinterpret_cast<const i::byte*>(program),
    270         static_cast<unsigned>(strlen(program)));
    271     i::CompleteParserRecorder log;
    272     i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
    273     scanner.Initialize(&stream);
    274 
    275     v8::preparser::PreParser::PreParseResult result =
    276         v8::preparser::PreParser::PreParseProgram(&scanner,
    277                                                   &log,
    278                                                   true,
    279                                                   stack_limit);
    280     CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
    281     i::ScriptDataImpl data(log.ExtractData());
    282     CHECK(!data.has_error());
    283   }
    284 }
    285 
    286 
    287 TEST(RegressChromium62639) {
    288   v8::V8::Initialize();
    289 
    290   int marker;
    291   i::Isolate::Current()->stack_guard()->SetStackLimit(
    292       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
    293 
    294   const char* program = "var x = 'something';\n"
    295                         "escape: function() {}";
    296   // Fails parsing expecting an identifier after "function".
    297   // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
    298   // and then used the invalid currently scanned literal. This always
    299   // failed in debug mode, and sometimes crashed in release mode.
    300 
    301   i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
    302                                       static_cast<unsigned>(strlen(program)));
    303   i::ScriptDataImpl* data =
    304       i::ParserApi::PreParse(&stream, NULL);
    305   CHECK(data->HasError());
    306   delete data;
    307 }
    308 
    309 
    310 TEST(Regress928) {
    311   v8::V8::Initialize();
    312 
    313   // Preparsing didn't consider the catch clause of a try statement
    314   // as with-content, which made it assume that a function inside
    315   // the block could be lazily compiled, and an extra, unexpected,
    316   // entry was added to the data.
    317   int marker;
    318   i::Isolate::Current()->stack_guard()->SetStackLimit(
    319       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
    320 
    321   const char* program =
    322       "try { } catch (e) { var foo = function () { /* first */ } }"
    323       "var bar = function () { /* second */ }";
    324 
    325   i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
    326                                       static_cast<unsigned>(strlen(program)));
    327   i::ScriptDataImpl* data =
    328       i::ParserApi::PartialPreParse(&stream, NULL);
    329   CHECK(!data->HasError());
    330 
    331   data->Initialize();
    332 
    333   int first_function =
    334       static_cast<int>(strstr(program, "function") - program);
    335   int first_lbrace = first_function + static_cast<int>(strlen("function () "));
    336   CHECK_EQ('{', program[first_lbrace]);
    337   i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
    338   CHECK(!entry1.is_valid());
    339 
    340   int second_function =
    341       static_cast<int>(strstr(program + first_lbrace, "function") - program);
    342   int second_lbrace =
    343       second_function + static_cast<int>(strlen("function () "));
    344   CHECK_EQ('{', program[second_lbrace]);
    345   i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
    346   CHECK(entry2.is_valid());
    347   CHECK_EQ('}', program[entry2.end_pos() - 1]);
    348   delete data;
    349 }
    350 
    351 
    352 TEST(PreParseOverflow) {
    353   v8::V8::Initialize();
    354 
    355   int marker;
    356   i::Isolate::Current()->stack_guard()->SetStackLimit(
    357       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
    358 
    359   size_t kProgramSize = 1024 * 1024;
    360   i::SmartPointer<char> program(
    361       reinterpret_cast<char*>(malloc(kProgramSize + 1)));
    362   memset(*program, '(', kProgramSize);
    363   program[kProgramSize] = '\0';
    364 
    365   uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
    366 
    367   i::Utf8ToUC16CharacterStream stream(
    368       reinterpret_cast<const i::byte*>(*program),
    369       static_cast<unsigned>(kProgramSize));
    370   i::CompleteParserRecorder log;
    371   i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
    372   scanner.Initialize(&stream);
    373 
    374 
    375   v8::preparser::PreParser::PreParseResult result =
    376       v8::preparser::PreParser::PreParseProgram(&scanner,
    377                                                 &log,
    378                                                 true,
    379                                                 stack_limit);
    380   CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
    381 }
    382 
    383 
    384 class TestExternalResource: public v8::String::ExternalStringResource {
    385  public:
    386   explicit TestExternalResource(uint16_t* data, int length)
    387       : data_(data), length_(static_cast<size_t>(length)) { }
    388 
    389   ~TestExternalResource() { }
    390 
    391   const uint16_t* data() const {
    392     return data_;
    393   }
    394 
    395   size_t length() const {
    396     return length_;
    397   }
    398  private:
    399   uint16_t* data_;
    400   size_t length_;
    401 };
    402 
    403 
    404 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
    405 
    406 void TestCharacterStream(const char* ascii_source,
    407                          unsigned length,
    408                          unsigned start = 0,
    409                          unsigned end = 0) {
    410   if (end == 0) end = length;
    411   unsigned sub_length = end - start;
    412   i::HandleScope test_scope;
    413   i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
    414   for (unsigned i = 0; i < length; i++) {
    415     uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
    416   }
    417   i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
    418   i::Handle<i::String> ascii_string(
    419       FACTORY->NewStringFromAscii(ascii_vector));
    420   TestExternalResource resource(*uc16_buffer, length);
    421   i::Handle<i::String> uc16_string(
    422       FACTORY->NewExternalStringFromTwoByte(&resource));
    423 
    424   i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
    425       i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
    426   i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
    427   i::Utf8ToUC16CharacterStream utf8_stream(
    428       reinterpret_cast<const i::byte*>(ascii_source), end);
    429   utf8_stream.SeekForward(start);
    430 
    431   unsigned i = start;
    432   while (i < end) {
    433     // Read streams one char at a time
    434     CHECK_EQU(i, uc16_stream.pos());
    435     CHECK_EQU(i, string_stream.pos());
    436     CHECK_EQU(i, utf8_stream.pos());
    437     int32_t c0 = ascii_source[i];
    438     int32_t c1 = uc16_stream.Advance();
    439     int32_t c2 = string_stream.Advance();
    440     int32_t c3 = utf8_stream.Advance();
    441     i++;
    442     CHECK_EQ(c0, c1);
    443     CHECK_EQ(c0, c2);
    444     CHECK_EQ(c0, c3);
    445     CHECK_EQU(i, uc16_stream.pos());
    446     CHECK_EQU(i, string_stream.pos());
    447     CHECK_EQU(i, utf8_stream.pos());
    448   }
    449   while (i > start + sub_length / 4) {
    450     // Pushback, re-read, pushback again.
    451     int32_t c0 = ascii_source[i - 1];
    452     CHECK_EQU(i, uc16_stream.pos());
    453     CHECK_EQU(i, string_stream.pos());
    454     CHECK_EQU(i, utf8_stream.pos());
    455     uc16_stream.PushBack(c0);
    456     string_stream.PushBack(c0);
    457     utf8_stream.PushBack(c0);
    458     i--;
    459     CHECK_EQU(i, uc16_stream.pos());
    460     CHECK_EQU(i, string_stream.pos());
    461     CHECK_EQU(i, utf8_stream.pos());
    462     int32_t c1 = uc16_stream.Advance();
    463     int32_t c2 = string_stream.Advance();
    464     int32_t c3 = utf8_stream.Advance();
    465     i++;
    466     CHECK_EQU(i, uc16_stream.pos());
    467     CHECK_EQU(i, string_stream.pos());
    468     CHECK_EQU(i, utf8_stream.pos());
    469     CHECK_EQ(c0, c1);
    470     CHECK_EQ(c0, c2);
    471     CHECK_EQ(c0, c3);
    472     uc16_stream.PushBack(c0);
    473     string_stream.PushBack(c0);
    474     utf8_stream.PushBack(c0);
    475     i--;
    476     CHECK_EQU(i, uc16_stream.pos());
    477     CHECK_EQU(i, string_stream.pos());
    478     CHECK_EQU(i, utf8_stream.pos());
    479   }
    480   unsigned halfway = start + sub_length / 2;
    481   uc16_stream.SeekForward(halfway - i);
    482   string_stream.SeekForward(halfway - i);
    483   utf8_stream.SeekForward(halfway - i);
    484   i = halfway;
    485   CHECK_EQU(i, uc16_stream.pos());
    486   CHECK_EQU(i, string_stream.pos());
    487   CHECK_EQU(i, utf8_stream.pos());
    488 
    489   while (i < end) {
    490     // Read streams one char at a time
    491     CHECK_EQU(i, uc16_stream.pos());
    492     CHECK_EQU(i, string_stream.pos());
    493     CHECK_EQU(i, utf8_stream.pos());
    494     int32_t c0 = ascii_source[i];
    495     int32_t c1 = uc16_stream.Advance();
    496     int32_t c2 = string_stream.Advance();
    497     int32_t c3 = utf8_stream.Advance();
    498     i++;
    499     CHECK_EQ(c0, c1);
    500     CHECK_EQ(c0, c2);
    501     CHECK_EQ(c0, c3);
    502     CHECK_EQU(i, uc16_stream.pos());
    503     CHECK_EQU(i, string_stream.pos());
    504     CHECK_EQU(i, utf8_stream.pos());
    505   }
    506 
    507   int32_t c1 = uc16_stream.Advance();
    508   int32_t c2 = string_stream.Advance();
    509   int32_t c3 = utf8_stream.Advance();
    510   CHECK_LT(c1, 0);
    511   CHECK_LT(c2, 0);
    512   CHECK_LT(c3, 0);
    513 }
    514 
    515 
    516 TEST(CharacterStreams) {
    517   v8::HandleScope handles;
    518   v8::Persistent<v8::Context> context = v8::Context::New();
    519   v8::Context::Scope context_scope(context);
    520 
    521   TestCharacterStream("abc\0\n\r\x7f", 7);
    522   static const unsigned kBigStringSize = 4096;
    523   char buffer[kBigStringSize + 1];
    524   for (unsigned i = 0; i < kBigStringSize; i++) {
    525     buffer[i] = static_cast<char>(i & 0x7f);
    526   }
    527   TestCharacterStream(buffer, kBigStringSize);
    528 
    529   TestCharacterStream(buffer, kBigStringSize, 576, 3298);
    530 
    531   TestCharacterStream("\0", 1);
    532   TestCharacterStream("", 0);
    533 }
    534 
    535 
    536 TEST(Utf8CharacterStream) {
    537   static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
    538   static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
    539 
    540   static const int kAllUtf8CharsSize =
    541       (unibrow::Utf8::kMaxOneByteChar + 1) +
    542       (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
    543       (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
    544   static const unsigned kAllUtf8CharsSizeU =
    545       static_cast<unsigned>(kAllUtf8CharsSize);
    546 
    547   char buffer[kAllUtf8CharsSizeU];
    548   unsigned cursor = 0;
    549   for (int i = 0; i <= kMaxUC16Char; i++) {
    550     cursor += unibrow::Utf8::Encode(buffer + cursor, i);
    551   }
    552   ASSERT(cursor == kAllUtf8CharsSizeU);
    553 
    554   i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
    555                                       kAllUtf8CharsSizeU);
    556   for (int i = 0; i <= kMaxUC16Char; i++) {
    557     CHECK_EQU(i, stream.pos());
    558     int32_t c = stream.Advance();
    559     CHECK_EQ(i, c);
    560     CHECK_EQU(i + 1, stream.pos());
    561   }
    562   for (int i = kMaxUC16Char; i >= 0; i--) {
    563     CHECK_EQU(i + 1, stream.pos());
    564     stream.PushBack(i);
    565     CHECK_EQU(i, stream.pos());
    566   }
    567   int i = 0;
    568   while (stream.pos() < kMaxUC16CharU) {
    569     CHECK_EQU(i, stream.pos());
    570     unsigned progress = stream.SeekForward(12);
    571     i += progress;
    572     int32_t c = stream.Advance();
    573     if (i <= kMaxUC16Char) {
    574       CHECK_EQ(i, c);
    575     } else {
    576       CHECK_EQ(-1, c);
    577     }
    578     i += 1;
    579     CHECK_EQU(i, stream.pos());
    580   }
    581 }
    582 
    583 #undef CHECK_EQU
    584 
    585 void TestStreamScanner(i::UC16CharacterStream* stream,
    586                        i::Token::Value* expected_tokens,
    587                        int skip_pos = 0,  // Zero means not skipping.
    588                        int skip_to = 0) {
    589   i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
    590   scanner.Initialize(stream);
    591 
    592   int i = 0;
    593   do {
    594     i::Token::Value expected = expected_tokens[i];
    595     i::Token::Value actual = scanner.Next();
    596     CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
    597     if (scanner.location().end_pos == skip_pos) {
    598       scanner.SeekForward(skip_to);
    599     }
    600     i++;
    601   } while (expected_tokens[i] != i::Token::ILLEGAL);
    602 }
    603 
    604 TEST(StreamScanner) {
    605   v8::V8::Initialize();
    606 
    607   const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
    608   i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
    609                                        static_cast<unsigned>(strlen(str1)));
    610   i::Token::Value expectations1[] = {
    611       i::Token::LBRACE,
    612       i::Token::IDENTIFIER,
    613       i::Token::IDENTIFIER,
    614       i::Token::FOR,
    615       i::Token::COLON,
    616       i::Token::MUL,
    617       i::Token::DIV,
    618       i::Token::LT,
    619       i::Token::SUB,
    620       i::Token::IDENTIFIER,
    621       i::Token::EOS,
    622       i::Token::ILLEGAL
    623   };
    624   TestStreamScanner(&stream1, expectations1, 0, 0);
    625 
    626   const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
    627   i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
    628                                        static_cast<unsigned>(strlen(str2)));
    629   i::Token::Value expectations2[] = {
    630       i::Token::CASE,
    631       i::Token::DEFAULT,
    632       i::Token::CONST,
    633       i::Token::LBRACE,
    634       // Skipped part here
    635       i::Token::RBRACE,
    636       i::Token::DO,
    637       i::Token::EOS,
    638       i::Token::ILLEGAL
    639   };
    640   ASSERT_EQ('{', str2[19]);
    641   ASSERT_EQ('}', str2[37]);
    642   TestStreamScanner(&stream2, expectations2, 20, 37);
    643 
    644   const char* str3 = "{}}}}";
    645   i::Token::Value expectations3[] = {
    646       i::Token::LBRACE,
    647       i::Token::RBRACE,
    648       i::Token::RBRACE,
    649       i::Token::RBRACE,
    650       i::Token::RBRACE,
    651       i::Token::EOS,
    652       i::Token::ILLEGAL
    653   };
    654   // Skip zero-four RBRACEs.
    655   for (int i = 0; i <= 4; i++) {
    656      expectations3[6 - i] = i::Token::ILLEGAL;
    657      expectations3[5 - i] = i::Token::EOS;
    658      i::Utf8ToUC16CharacterStream stream3(
    659          reinterpret_cast<const i::byte*>(str3),
    660          static_cast<unsigned>(strlen(str3)));
    661      TestStreamScanner(&stream3, expectations3, 1, 1 + i);
    662   }
    663 }
    664 
    665 
    666 void TestScanRegExp(const char* re_source, const char* expected) {
    667   i::Utf8ToUC16CharacterStream stream(
    668        reinterpret_cast<const i::byte*>(re_source),
    669        static_cast<unsigned>(strlen(re_source)));
    670   i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
    671   scanner.Initialize(&stream);
    672 
    673   i::Token::Value start = scanner.peek();
    674   CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
    675   CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
    676   scanner.Next();  // Current token is now the regexp literal.
    677   CHECK(scanner.is_literal_ascii());
    678   i::Vector<const char> actual = scanner.literal_ascii_string();
    679   for (int i = 0; i < actual.length(); i++) {
    680     CHECK_NE('\0', expected[i]);
    681     CHECK_EQ(expected[i], actual[i]);
    682   }
    683 }
    684 
    685 
    686 TEST(RegExpScanning) {
    687   v8::V8::Initialize();
    688 
    689   // RegExp token with added garbage at the end. The scanner should only
    690   // scan the RegExp until the terminating slash just before "flipperwald".
    691   TestScanRegExp("/b/flipperwald", "b");
    692   // Incomplete escape sequences doesn't hide the terminating slash.
    693   TestScanRegExp("/\\x/flipperwald", "\\x");
    694   TestScanRegExp("/\\u/flipperwald", "\\u");
    695   TestScanRegExp("/\\u1/flipperwald", "\\u1");
    696   TestScanRegExp("/\\u12/flipperwald", "\\u12");
    697   TestScanRegExp("/\\u123/flipperwald", "\\u123");
    698   TestScanRegExp("/\\c/flipperwald", "\\c");
    699   TestScanRegExp("/\\c//flipperwald", "\\c");
    700   // Slashes inside character classes are not terminating.
    701   TestScanRegExp("/[/]/flipperwald", "[/]");
    702   TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
    703   // Incomplete escape sequences inside a character class doesn't hide
    704   // the end of the character class.
    705   TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
    706   TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
    707   TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
    708   TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
    709   TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
    710   TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
    711   TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
    712   TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
    713   // Escaped ']'s wont end the character class.
    714   TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
    715   // Escaped slashes are not terminating.
    716   TestScanRegExp("/\\//flipperwald", "\\/");
    717   // Starting with '=' works too.
    718   TestScanRegExp("/=/", "=");
    719   TestScanRegExp("/=?/", "=?");
    720 }
    721