Home | History | Annotate | Download | only in cctest
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 
     29 #include <stdlib.h>
     30 
     31 #include "src/v8.h"
     32 
     33 #include "src/ast.h"
     34 #include "src/char-predicates-inl.h"
     35 #include "src/jsregexp.h"
     36 #include "src/ostreams.h"
     37 #include "src/parser.h"
     38 #include "src/regexp-macro-assembler.h"
     39 #include "src/regexp-macro-assembler-irregexp.h"
     40 #include "src/string-stream.h"
     41 #include "src/zone-inl.h"
     42 #ifdef V8_INTERPRETED_REGEXP
     43 #include "src/interpreter-irregexp.h"
     44 #else  // V8_INTERPRETED_REGEXP
     45 #include "src/macro-assembler.h"
     46 #if V8_TARGET_ARCH_ARM
     47 #include "src/arm/assembler-arm.h"  // NOLINT
     48 #include "src/arm/macro-assembler-arm.h"
     49 #include "src/arm/regexp-macro-assembler-arm.h"
     50 #endif
     51 #if V8_TARGET_ARCH_ARM64
     52 #include "src/arm64/assembler-arm64.h"
     53 #include "src/arm64/macro-assembler-arm64.h"
     54 #include "src/arm64/regexp-macro-assembler-arm64.h"
     55 #endif
     56 #if V8_TARGET_ARCH_MIPS
     57 #include "src/mips/assembler-mips.h"
     58 #include "src/mips/macro-assembler-mips.h"
     59 #include "src/mips/regexp-macro-assembler-mips.h"
     60 #endif
     61 #if V8_TARGET_ARCH_MIPS64
     62 #include "src/mips64/assembler-mips64.h"
     63 #include "src/mips64/macro-assembler-mips64.h"
     64 #include "src/mips64/regexp-macro-assembler-mips64.h"
     65 #endif
     66 #if V8_TARGET_ARCH_X64
     67 #include "src/x64/assembler-x64.h"
     68 #include "src/x64/macro-assembler-x64.h"
     69 #include "src/x64/regexp-macro-assembler-x64.h"
     70 #endif
     71 #if V8_TARGET_ARCH_IA32
     72 #include "src/ia32/assembler-ia32.h"
     73 #include "src/ia32/macro-assembler-ia32.h"
     74 #include "src/ia32/regexp-macro-assembler-ia32.h"
     75 #endif
     76 #if V8_TARGET_ARCH_X87
     77 #include "src/x87/assembler-x87.h"
     78 #include "src/x87/macro-assembler-x87.h"
     79 #include "src/x87/regexp-macro-assembler-x87.h"
     80 #endif
     81 #endif  // V8_INTERPRETED_REGEXP
     82 #include "test/cctest/cctest.h"
     83 
     84 using namespace v8::internal;
     85 
     86 
     87 static bool CheckParse(const char* input) {
     88   v8::HandleScope scope(CcTest::isolate());
     89   Zone zone(CcTest::i_isolate());
     90   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     91   RegExpCompileData result;
     92   return v8::internal::RegExpParser::ParseRegExp(
     93       &reader, false, &result, &zone);
     94 }
     95 
     96 
     97 static void CheckParseEq(const char* input, const char* expected) {
     98   v8::HandleScope scope(CcTest::isolate());
     99   Zone zone(CcTest::i_isolate());
    100   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    101   RegExpCompileData result;
    102   CHECK(v8::internal::RegExpParser::ParseRegExp(
    103       &reader, false, &result, &zone));
    104   CHECK(result.tree != NULL);
    105   CHECK(result.error.is_null());
    106   OStringStream os;
    107   result.tree->Print(os, &zone);
    108   CHECK_EQ(expected, os.c_str());
    109 }
    110 
    111 
    112 static bool CheckSimple(const char* input) {
    113   v8::HandleScope scope(CcTest::isolate());
    114   Zone zone(CcTest::i_isolate());
    115   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    116   RegExpCompileData result;
    117   CHECK(v8::internal::RegExpParser::ParseRegExp(
    118       &reader, false, &result, &zone));
    119   CHECK(result.tree != NULL);
    120   CHECK(result.error.is_null());
    121   return result.simple;
    122 }
    123 
    124 struct MinMaxPair {
    125   int min_match;
    126   int max_match;
    127 };
    128 
    129 
    130 static MinMaxPair CheckMinMaxMatch(const char* input) {
    131   v8::HandleScope scope(CcTest::isolate());
    132   Zone zone(CcTest::i_isolate());
    133   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    134   RegExpCompileData result;
    135   CHECK(v8::internal::RegExpParser::ParseRegExp(
    136       &reader, false, &result, &zone));
    137   CHECK(result.tree != NULL);
    138   CHECK(result.error.is_null());
    139   int min_match = result.tree->min_match();
    140   int max_match = result.tree->max_match();
    141   MinMaxPair pair = { min_match, max_match };
    142   return pair;
    143 }
    144 
    145 
    146 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
    147 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
    148 #define CHECK_MIN_MAX(input, min, max)                                         \
    149   { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
    150     CHECK_EQ(min, min_max.min_match);                                          \
    151     CHECK_EQ(max, min_max.max_match);                                          \
    152   }
    153 
    154 TEST(Parser) {
    155   CHECK_PARSE_ERROR("?");
    156 
    157   CheckParseEq("abc", "'abc'");
    158   CheckParseEq("", "%");
    159   CheckParseEq("abc|def", "(| 'abc' 'def')");
    160   CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
    161   CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
    162   CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
    163   CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
    164   CheckParseEq("a*", "(# 0 - g 'a')");
    165   CheckParseEq("a*?", "(# 0 - n 'a')");
    166   CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
    167   CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
    168   CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
    169   CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
    170   CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
    171   CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
    172   CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
    173   CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
    174   CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
    175   CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
    176   CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
    177   CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
    178   CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
    179   CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
    180   CheckParseEq("(?:foo)", "'foo'");
    181   CheckParseEq("(?: foo )", "' foo '");
    182   CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
    183   CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
    184   CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
    185   CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
    186   CheckParseEq("()", "(^ %)");
    187   CheckParseEq("(?=)", "(-> + %)");
    188   CheckParseEq("[]", "^[\\x00-\\uffff]");  // Doesn't compile on windows
    189   CheckParseEq("[^]", "[\\x00-\\uffff]");  // \uffff isn't in codepage 1252
    190   CheckParseEq("[x]", "[x]");
    191   CheckParseEq("[xyz]", "[x y z]");
    192   CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
    193   CheckParseEq("[-123]", "[- 1 2 3]");
    194   CheckParseEq("[^123]", "^[1 2 3]");
    195   CheckParseEq("]", "']'");
    196   CheckParseEq("}", "'}'");
    197   CheckParseEq("[a-b-c]", "[a-b - c]");
    198   CheckParseEq("[\\d]", "[0-9]");
    199   CheckParseEq("[x\\dz]", "[x 0-9 z]");
    200   CheckParseEq("[\\d-z]", "[0-9 - z]");
    201   CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
    202   CheckParseEq("[z-\\d]", "[z - 0-9]");
    203   // Control character outside character class.
    204   CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
    205   CheckParseEq("\\c!", "'\\c!'");
    206   CheckParseEq("\\c_", "'\\c_'");
    207   CheckParseEq("\\c~", "'\\c~'");
    208   CheckParseEq("\\c1", "'\\c1'");
    209   // Control character inside character class.
    210   CheckParseEq("[\\c!]", "[\\ c !]");
    211   CheckParseEq("[\\c_]", "[\\x1f]");
    212   CheckParseEq("[\\c~]", "[\\ c ~]");
    213   CheckParseEq("[\\ca]", "[\\x01]");
    214   CheckParseEq("[\\cz]", "[\\x1a]");
    215   CheckParseEq("[\\cA]", "[\\x01]");
    216   CheckParseEq("[\\cZ]", "[\\x1a]");
    217   CheckParseEq("[\\c1]", "[\\x11]");
    218 
    219   CheckParseEq("[a\\]c]", "[a ] c]");
    220   CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
    221   CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
    222   CheckParseEq("\\0", "'\\x00'");
    223   CheckParseEq("\\8", "'8'");
    224   CheckParseEq("\\9", "'9'");
    225   CheckParseEq("\\11", "'\\x09'");
    226   CheckParseEq("\\11a", "'\\x09a'");
    227   CheckParseEq("\\011", "'\\x09'");
    228   CheckParseEq("\\00011", "'\\x0011'");
    229   CheckParseEq("\\118", "'\\x098'");
    230   CheckParseEq("\\111", "'I'");
    231   CheckParseEq("\\1111", "'I1'");
    232   CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
    233   CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
    234   CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
    235   CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
    236   CheckParseEq("(x)(x)(x)\\1*",
    237                "(: (^ 'x') (^ 'x') (^ 'x')"
    238                " (# 0 - g (<- 1)))");
    239   CheckParseEq("(x)(x)(x)\\2*",
    240                "(: (^ 'x') (^ 'x') (^ 'x')"
    241                " (# 0 - g (<- 2)))");
    242   CheckParseEq("(x)(x)(x)\\3*",
    243                "(: (^ 'x') (^ 'x') (^ 'x')"
    244                " (# 0 - g (<- 3)))");
    245   CheckParseEq("(x)(x)(x)\\4*",
    246                "(: (^ 'x') (^ 'x') (^ 'x')"
    247                " (# 0 - g '\\x04'))");
    248   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
    249                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
    250                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
    251   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
    252                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
    253                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
    254   CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
    255   CheckParseEq("(a\\1)", "(^ 'a')");
    256   CheckParseEq("(\\1a)", "(^ 'a')");
    257   CheckParseEq("(?=a)?a", "'a'");
    258   CheckParseEq("(?=a){0,10}a", "'a'");
    259   CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
    260   CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
    261   CheckParseEq("(?!a)?a", "'a'");
    262   CheckParseEq("\\1(a)", "(^ 'a')");
    263   CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
    264   CheckParseEq("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
    265   CheckParseEq("[\\0]", "[\\x00]");
    266   CheckParseEq("[\\11]", "[\\x09]");
    267   CheckParseEq("[\\11a]", "[\\x09 a]");
    268   CheckParseEq("[\\011]", "[\\x09]");
    269   CheckParseEq("[\\00011]", "[\\x00 1 1]");
    270   CheckParseEq("[\\118]", "[\\x09 8]");
    271   CheckParseEq("[\\111]", "[I]");
    272   CheckParseEq("[\\1111]", "[I 1]");
    273   CheckParseEq("\\x34", "'\x34'");
    274   CheckParseEq("\\x60", "'\x60'");
    275   CheckParseEq("\\x3z", "'x3z'");
    276   CheckParseEq("\\c", "'\\c'");
    277   CheckParseEq("\\u0034", "'\x34'");
    278   CheckParseEq("\\u003z", "'u003z'");
    279   CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
    280 
    281   CHECK_SIMPLE("", false);
    282   CHECK_SIMPLE("a", true);
    283   CHECK_SIMPLE("a|b", false);
    284   CHECK_SIMPLE("a\\n", false);
    285   CHECK_SIMPLE("^a", false);
    286   CHECK_SIMPLE("a$", false);
    287   CHECK_SIMPLE("a\\b!", false);
    288   CHECK_SIMPLE("a\\Bb", false);
    289   CHECK_SIMPLE("a*", false);
    290   CHECK_SIMPLE("a*?", false);
    291   CHECK_SIMPLE("a?", false);
    292   CHECK_SIMPLE("a??", false);
    293   CHECK_SIMPLE("a{0,1}?", false);
    294   CHECK_SIMPLE("a{1,1}?", false);
    295   CHECK_SIMPLE("a{1,2}?", false);
    296   CHECK_SIMPLE("a+?", false);
    297   CHECK_SIMPLE("(a)", false);
    298   CHECK_SIMPLE("(a)\\1", false);
    299   CHECK_SIMPLE("(\\1a)", false);
    300   CHECK_SIMPLE("\\1(a)", false);
    301   CHECK_SIMPLE("a\\s", false);
    302   CHECK_SIMPLE("a\\S", false);
    303   CHECK_SIMPLE("a\\d", false);
    304   CHECK_SIMPLE("a\\D", false);
    305   CHECK_SIMPLE("a\\w", false);
    306   CHECK_SIMPLE("a\\W", false);
    307   CHECK_SIMPLE("a.", false);
    308   CHECK_SIMPLE("a\\q", false);
    309   CHECK_SIMPLE("a[a]", false);
    310   CHECK_SIMPLE("a[^a]", false);
    311   CHECK_SIMPLE("a[a-z]", false);
    312   CHECK_SIMPLE("a[\\q]", false);
    313   CHECK_SIMPLE("a(?:b)", false);
    314   CHECK_SIMPLE("a(?=b)", false);
    315   CHECK_SIMPLE("a(?!b)", false);
    316   CHECK_SIMPLE("\\x60", false);
    317   CHECK_SIMPLE("\\u0060", false);
    318   CHECK_SIMPLE("\\cA", false);
    319   CHECK_SIMPLE("\\q", false);
    320   CHECK_SIMPLE("\\1112", false);
    321   CHECK_SIMPLE("\\0", false);
    322   CHECK_SIMPLE("(a)\\1", false);
    323   CHECK_SIMPLE("(?=a)?a", false);
    324   CHECK_SIMPLE("(?!a)?a\\1", false);
    325   CHECK_SIMPLE("(?:(?=a))a\\1", false);
    326 
    327   CheckParseEq("a{}", "'a{}'");
    328   CheckParseEq("a{,}", "'a{,}'");
    329   CheckParseEq("a{", "'a{'");
    330   CheckParseEq("a{z}", "'a{z}'");
    331   CheckParseEq("a{1z}", "'a{1z}'");
    332   CheckParseEq("a{12z}", "'a{12z}'");
    333   CheckParseEq("a{12,", "'a{12,'");
    334   CheckParseEq("a{12,3b", "'a{12,3b'");
    335   CheckParseEq("{}", "'{}'");
    336   CheckParseEq("{,}", "'{,}'");
    337   CheckParseEq("{", "'{'");
    338   CheckParseEq("{z}", "'{z}'");
    339   CheckParseEq("{1z}", "'{1z}'");
    340   CheckParseEq("{12z}", "'{12z}'");
    341   CheckParseEq("{12,", "'{12,'");
    342   CheckParseEq("{12,3b", "'{12,3b'");
    343 
    344   CHECK_MIN_MAX("a", 1, 1);
    345   CHECK_MIN_MAX("abc", 3, 3);
    346   CHECK_MIN_MAX("a[bc]d", 3, 3);
    347   CHECK_MIN_MAX("a|bc", 1, 2);
    348   CHECK_MIN_MAX("ab|c", 1, 2);
    349   CHECK_MIN_MAX("a||bc", 0, 2);
    350   CHECK_MIN_MAX("|", 0, 0);
    351   CHECK_MIN_MAX("(?:ab)", 2, 2);
    352   CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
    353   CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
    354   CHECK_MIN_MAX("(ab)", 2, 2);
    355   CHECK_MIN_MAX("(ab|cde)", 2, 3);
    356   CHECK_MIN_MAX("(ab)\\1", 2, 4);
    357   CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
    358   CHECK_MIN_MAX("(?:ab)?", 0, 2);
    359   CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
    360   CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
    361   CHECK_MIN_MAX("a?", 0, 1);
    362   CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
    363   CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
    364   CHECK_MIN_MAX("a??", 0, 1);
    365   CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
    366   CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
    367   CHECK_MIN_MAX("(?:a?)?", 0, 1);
    368   CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
    369   CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
    370   CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
    371   CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
    372   CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
    373   CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
    374   CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
    375   CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
    376   CHECK_MIN_MAX("a{0}", 0, 0);
    377   CHECK_MIN_MAX("(?:a+){0}", 0, 0);
    378   CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
    379   CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
    380   CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
    381   CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
    382   CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
    383   CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
    384   CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
    385   CHECK_MIN_MAX("a\\bc", 2, 2);
    386   CHECK_MIN_MAX("a\\Bc", 2, 2);
    387   CHECK_MIN_MAX("a\\sc", 3, 3);
    388   CHECK_MIN_MAX("a\\Sc", 3, 3);
    389   CHECK_MIN_MAX("a(?=b)c", 2, 2);
    390   CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
    391   CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
    392 }
    393 
    394 
    395 TEST(ParserRegression) {
    396   CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
    397   CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
    398   CheckParseEq("{", "'{'");
    399   CheckParseEq("a|", "(| 'a' %)");
    400 }
    401 
    402 static void ExpectError(const char* input,
    403                         const char* expected) {
    404   v8::HandleScope scope(CcTest::isolate());
    405   Zone zone(CcTest::i_isolate());
    406   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    407   RegExpCompileData result;
    408   CHECK(!v8::internal::RegExpParser::ParseRegExp(
    409       &reader, false, &result, &zone));
    410   CHECK(result.tree == NULL);
    411   CHECK(!result.error.is_null());
    412   SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
    413   CHECK_EQ(expected, str.get());
    414 }
    415 
    416 
    417 TEST(Errors) {
    418   const char* kEndBackslash = "\\ at end of pattern";
    419   ExpectError("\\", kEndBackslash);
    420   const char* kUnterminatedGroup = "Unterminated group";
    421   ExpectError("(foo", kUnterminatedGroup);
    422   const char* kInvalidGroup = "Invalid group";
    423   ExpectError("(?", kInvalidGroup);
    424   const char* kUnterminatedCharacterClass = "Unterminated character class";
    425   ExpectError("[", kUnterminatedCharacterClass);
    426   ExpectError("[a-", kUnterminatedCharacterClass);
    427   const char* kNothingToRepeat = "Nothing to repeat";
    428   ExpectError("*", kNothingToRepeat);
    429   ExpectError("?", kNothingToRepeat);
    430   ExpectError("+", kNothingToRepeat);
    431   ExpectError("{1}", kNothingToRepeat);
    432   ExpectError("{1,2}", kNothingToRepeat);
    433   ExpectError("{1,}", kNothingToRepeat);
    434 
    435   // Check that we don't allow more than kMaxCapture captures
    436   const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
    437   const char* kTooManyCaptures = "Too many captures";
    438   OStringStream os;
    439   for (int i = 0; i <= kMaxCaptures; i++) {
    440     os << "()";
    441   }
    442   ExpectError(os.c_str(), kTooManyCaptures);
    443 }
    444 
    445 
    446 static bool IsDigit(uc16 c) {
    447   return ('0' <= c && c <= '9');
    448 }
    449 
    450 
    451 static bool NotDigit(uc16 c) {
    452   return !IsDigit(c);
    453 }
    454 
    455 
    456 static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
    457   // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
    458   // WhiteSpace (7.2) and LineTerminator (7.3) values.
    459   return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
    460 }
    461 
    462 
    463 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
    464   return !IsWhiteSpaceOrLineTerminator(c);
    465 }
    466 
    467 
    468 static bool NotWord(uc16 c) {
    469   return !IsRegExpWord(c);
    470 }
    471 
    472 
    473 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
    474   Zone zone(CcTest::i_isolate());
    475   ZoneList<CharacterRange>* ranges =
    476       new(&zone) ZoneList<CharacterRange>(2, &zone);
    477   CharacterRange::AddClassEscape(c, ranges, &zone);
    478   for (unsigned i = 0; i < (1 << 16); i++) {
    479     bool in_class = false;
    480     for (int j = 0; !in_class && j < ranges->length(); j++) {
    481       CharacterRange& range = ranges->at(j);
    482       in_class = (range.from() <= i && i <= range.to());
    483     }
    484     CHECK_EQ(pred(i), in_class);
    485   }
    486 }
    487 
    488 
    489 TEST(CharacterClassEscapes) {
    490   TestCharacterClassEscapes('.', IsRegExpNewline);
    491   TestCharacterClassEscapes('d', IsDigit);
    492   TestCharacterClassEscapes('D', NotDigit);
    493   TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
    494   TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
    495   TestCharacterClassEscapes('w', IsRegExpWord);
    496   TestCharacterClassEscapes('W', NotWord);
    497 }
    498 
    499 
    500 static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
    501                            Zone* zone) {
    502   Isolate* isolate = CcTest::i_isolate();
    503   FlatStringReader reader(isolate, CStrVector(input));
    504   RegExpCompileData compile_data;
    505   if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
    506                                                &compile_data, zone))
    507     return NULL;
    508   Handle<String> pattern = isolate->factory()->
    509       NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
    510   Handle<String> sample_subject =
    511       isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
    512   RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
    513                         sample_subject, is_one_byte, zone);
    514   return compile_data.node;
    515 }
    516 
    517 
    518 static void Execute(const char* input, bool multiline, bool is_one_byte,
    519                     bool dot_output = false) {
    520   v8::HandleScope scope(CcTest::isolate());
    521   Zone zone(CcTest::i_isolate());
    522   RegExpNode* node = Compile(input, multiline, is_one_byte, &zone);
    523   USE(node);
    524 #ifdef DEBUG
    525   if (dot_output) {
    526     RegExpEngine::DotPrint(input, node, false);
    527   }
    528 #endif  // DEBUG
    529 }
    530 
    531 
    532 class TestConfig {
    533  public:
    534   typedef int Key;
    535   typedef int Value;
    536   static const int kNoKey;
    537   static int NoValue() { return 0; }
    538   static inline int Compare(int a, int b) {
    539     if (a < b)
    540       return -1;
    541     else if (a > b)
    542       return 1;
    543     else
    544       return 0;
    545   }
    546 };
    547 
    548 
    549 const int TestConfig::kNoKey = 0;
    550 
    551 
    552 static unsigned PseudoRandom(int i, int j) {
    553   return ~(~((i * 781) ^ (j * 329)));
    554 }
    555 
    556 
    557 TEST(SplayTreeSimple) {
    558   static const unsigned kLimit = 1000;
    559   Zone zone(CcTest::i_isolate());
    560   ZoneSplayTree<TestConfig> tree(&zone);
    561   bool seen[kLimit];
    562   for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
    563 #define CHECK_MAPS_EQUAL() do {                                      \
    564     for (unsigned k = 0; k < kLimit; k++)                            \
    565       CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
    566   } while (false)
    567   for (int i = 0; i < 50; i++) {
    568     for (int j = 0; j < 50; j++) {
    569       unsigned next = PseudoRandom(i, j) % kLimit;
    570       if (seen[next]) {
    571         // We've already seen this one.  Check the value and remove
    572         // it.
    573         ZoneSplayTree<TestConfig>::Locator loc;
    574         CHECK(tree.Find(next, &loc));
    575         CHECK_EQ(next, loc.key());
    576         CHECK_EQ(3 * next, loc.value());
    577         tree.Remove(next);
    578         seen[next] = false;
    579         CHECK_MAPS_EQUAL();
    580       } else {
    581         // Check that it wasn't there already and then add it.
    582         ZoneSplayTree<TestConfig>::Locator loc;
    583         CHECK(!tree.Find(next, &loc));
    584         CHECK(tree.Insert(next, &loc));
    585         CHECK_EQ(next, loc.key());
    586         loc.set_value(3 * next);
    587         seen[next] = true;
    588         CHECK_MAPS_EQUAL();
    589       }
    590       int val = PseudoRandom(j, i) % kLimit;
    591       if (seen[val]) {
    592         ZoneSplayTree<TestConfig>::Locator loc;
    593         CHECK(tree.FindGreatestLessThan(val, &loc));
    594         CHECK_EQ(loc.key(), val);
    595         break;
    596       }
    597       val = PseudoRandom(i + j, i - j) % kLimit;
    598       if (seen[val]) {
    599         ZoneSplayTree<TestConfig>::Locator loc;
    600         CHECK(tree.FindLeastGreaterThan(val, &loc));
    601         CHECK_EQ(loc.key(), val);
    602         break;
    603       }
    604     }
    605   }
    606 }
    607 
    608 
    609 TEST(DispatchTableConstruction) {
    610   // Initialize test data.
    611   static const int kLimit = 1000;
    612   static const int kRangeCount = 8;
    613   static const int kRangeSize = 16;
    614   uc16 ranges[kRangeCount][2 * kRangeSize];
    615   for (int i = 0; i < kRangeCount; i++) {
    616     Vector<uc16> range(ranges[i], 2 * kRangeSize);
    617     for (int j = 0; j < 2 * kRangeSize; j++) {
    618       range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
    619     }
    620     range.Sort();
    621     for (int j = 1; j < 2 * kRangeSize; j++) {
    622       CHECK(range[j-1] <= range[j]);
    623     }
    624   }
    625   // Enter test data into dispatch table.
    626   Zone zone(CcTest::i_isolate());
    627   DispatchTable table(&zone);
    628   for (int i = 0; i < kRangeCount; i++) {
    629     uc16* range = ranges[i];
    630     for (int j = 0; j < 2 * kRangeSize; j += 2)
    631       table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
    632   }
    633   // Check that the table looks as we would expect
    634   for (int p = 0; p < kLimit; p++) {
    635     OutSet* outs = table.Get(p);
    636     for (int j = 0; j < kRangeCount; j++) {
    637       uc16* range = ranges[j];
    638       bool is_on = false;
    639       for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
    640         is_on = (range[k] <= p && p <= range[k + 1]);
    641       CHECK_EQ(is_on, outs->Get(j));
    642     }
    643   }
    644 }
    645 
    646 
    647 // Test of debug-only syntax.
    648 #ifdef DEBUG
    649 
    650 TEST(ParsePossessiveRepetition) {
    651   bool old_flag_value = FLAG_regexp_possessive_quantifier;
    652 
    653   // Enable possessive quantifier syntax.
    654   FLAG_regexp_possessive_quantifier = true;
    655 
    656   CheckParseEq("a*+", "(# 0 - p 'a')");
    657   CheckParseEq("a++", "(# 1 - p 'a')");
    658   CheckParseEq("a?+", "(# 0 1 p 'a')");
    659   CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
    660   CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
    661 
    662   // Disable possessive quantifier syntax.
    663   FLAG_regexp_possessive_quantifier = false;
    664 
    665   CHECK_PARSE_ERROR("a*+");
    666   CHECK_PARSE_ERROR("a++");
    667   CHECK_PARSE_ERROR("a?+");
    668   CHECK_PARSE_ERROR("a{10,20}+");
    669   CHECK_PARSE_ERROR("a{10,20}+b");
    670 
    671   FLAG_regexp_possessive_quantifier = old_flag_value;
    672 }
    673 
    674 #endif
    675 
    676 // Tests of interpreter.
    677 
    678 
    679 #ifndef V8_INTERPRETED_REGEXP
    680 
    681 #if V8_TARGET_ARCH_IA32
    682 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
    683 #elif V8_TARGET_ARCH_X64
    684 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
    685 #elif V8_TARGET_ARCH_ARM
    686 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
    687 #elif V8_TARGET_ARCH_ARM64
    688 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
    689 #elif V8_TARGET_ARCH_MIPS
    690 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
    691 #elif V8_TARGET_ARCH_MIPS64
    692 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
    693 #elif V8_TARGET_ARCH_X87
    694 typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
    695 #endif
    696 
    697 class ContextInitializer {
    698  public:
    699   ContextInitializer()
    700       : scope_(CcTest::isolate()),
    701         env_(v8::Context::New(CcTest::isolate())) {
    702     env_->Enter();
    703   }
    704   ~ContextInitializer() {
    705     env_->Exit();
    706   }
    707  private:
    708   v8::HandleScope scope_;
    709   v8::Handle<v8::Context> env_;
    710 };
    711 
    712 
    713 static ArchRegExpMacroAssembler::Result Execute(Code* code,
    714                                                 String* input,
    715                                                 int start_offset,
    716                                                 const byte* input_start,
    717                                                 const byte* input_end,
    718                                                 int* captures) {
    719   return NativeRegExpMacroAssembler::Execute(
    720       code,
    721       input,
    722       start_offset,
    723       input_start,
    724       input_end,
    725       captures,
    726       0,
    727       CcTest::i_isolate());
    728 }
    729 
    730 
    731 TEST(MacroAssemblerNativeSuccess) {
    732   v8::V8::Initialize();
    733   ContextInitializer initializer;
    734   Isolate* isolate = CcTest::i_isolate();
    735   Factory* factory = isolate->factory();
    736   Zone zone(isolate);
    737 
    738   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
    739 
    740   m.Succeed();
    741 
    742   Handle<String> source = factory->NewStringFromStaticChars("");
    743   Handle<Object> code_object = m.GetCode(source);
    744   Handle<Code> code = Handle<Code>::cast(code_object);
    745 
    746   int captures[4] = {42, 37, 87, 117};
    747   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    748   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    749   const byte* start_adr =
    750       reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
    751 
    752   NativeRegExpMacroAssembler::Result result =
    753       Execute(*code,
    754               *input,
    755               0,
    756               start_adr,
    757               start_adr + seq_input->length(),
    758               captures);
    759 
    760   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    761   CHECK_EQ(-1, captures[0]);
    762   CHECK_EQ(-1, captures[1]);
    763   CHECK_EQ(-1, captures[2]);
    764   CHECK_EQ(-1, captures[3]);
    765 }
    766 
    767 
    768 TEST(MacroAssemblerNativeSimple) {
    769   v8::V8::Initialize();
    770   ContextInitializer initializer;
    771   Isolate* isolate = CcTest::i_isolate();
    772   Factory* factory = isolate->factory();
    773   Zone zone(isolate);
    774 
    775   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
    776 
    777   Label fail, backtrack;
    778   m.PushBacktrack(&fail);
    779   m.CheckNotAtStart(NULL);
    780   m.LoadCurrentCharacter(2, NULL);
    781   m.CheckNotCharacter('o', NULL);
    782   m.LoadCurrentCharacter(1, NULL, false);
    783   m.CheckNotCharacter('o', NULL);
    784   m.LoadCurrentCharacter(0, NULL, false);
    785   m.CheckNotCharacter('f', NULL);
    786   m.WriteCurrentPositionToRegister(0, 0);
    787   m.WriteCurrentPositionToRegister(1, 3);
    788   m.AdvanceCurrentPosition(3);
    789   m.PushBacktrack(&backtrack);
    790   m.Succeed();
    791   m.Bind(&backtrack);
    792   m.Backtrack();
    793   m.Bind(&fail);
    794   m.Fail();
    795 
    796   Handle<String> source = factory->NewStringFromStaticChars("^foo");
    797   Handle<Object> code_object = m.GetCode(source);
    798   Handle<Code> code = Handle<Code>::cast(code_object);
    799 
    800   int captures[4] = {42, 37, 87, 117};
    801   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    802   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    803   Address start_adr = seq_input->GetCharsAddress();
    804 
    805   NativeRegExpMacroAssembler::Result result =
    806       Execute(*code,
    807               *input,
    808               0,
    809               start_adr,
    810               start_adr + input->length(),
    811               captures);
    812 
    813   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    814   CHECK_EQ(0, captures[0]);
    815   CHECK_EQ(3, captures[1]);
    816   CHECK_EQ(-1, captures[2]);
    817   CHECK_EQ(-1, captures[3]);
    818 
    819   input = factory->NewStringFromStaticChars("barbarbar");
    820   seq_input = Handle<SeqOneByteString>::cast(input);
    821   start_adr = seq_input->GetCharsAddress();
    822 
    823   result = Execute(*code,
    824                    *input,
    825                    0,
    826                    start_adr,
    827                    start_adr + input->length(),
    828                    captures);
    829 
    830   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    831 }
    832 
    833 
    834 TEST(MacroAssemblerNativeSimpleUC16) {
    835   v8::V8::Initialize();
    836   ContextInitializer initializer;
    837   Isolate* isolate = CcTest::i_isolate();
    838   Factory* factory = isolate->factory();
    839   Zone zone(isolate);
    840 
    841   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
    842 
    843   Label fail, backtrack;
    844   m.PushBacktrack(&fail);
    845   m.CheckNotAtStart(NULL);
    846   m.LoadCurrentCharacter(2, NULL);
    847   m.CheckNotCharacter('o', NULL);
    848   m.LoadCurrentCharacter(1, NULL, false);
    849   m.CheckNotCharacter('o', NULL);
    850   m.LoadCurrentCharacter(0, NULL, false);
    851   m.CheckNotCharacter('f', NULL);
    852   m.WriteCurrentPositionToRegister(0, 0);
    853   m.WriteCurrentPositionToRegister(1, 3);
    854   m.AdvanceCurrentPosition(3);
    855   m.PushBacktrack(&backtrack);
    856   m.Succeed();
    857   m.Bind(&backtrack);
    858   m.Backtrack();
    859   m.Bind(&fail);
    860   m.Fail();
    861 
    862   Handle<String> source = factory->NewStringFromStaticChars("^foo");
    863   Handle<Object> code_object = m.GetCode(source);
    864   Handle<Code> code = Handle<Code>::cast(code_object);
    865 
    866   int captures[4] = {42, 37, 87, 117};
    867   const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
    868                               static_cast<uc16>(0x2603)};
    869   Handle<String> input = factory->NewStringFromTwoByte(
    870       Vector<const uc16>(input_data, 6)).ToHandleChecked();
    871   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
    872   Address start_adr = seq_input->GetCharsAddress();
    873 
    874   NativeRegExpMacroAssembler::Result result =
    875       Execute(*code,
    876               *input,
    877               0,
    878               start_adr,
    879               start_adr + input->length(),
    880               captures);
    881 
    882   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    883   CHECK_EQ(0, captures[0]);
    884   CHECK_EQ(3, captures[1]);
    885   CHECK_EQ(-1, captures[2]);
    886   CHECK_EQ(-1, captures[3]);
    887 
    888   const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
    889                                static_cast<uc16>(0x2603)};
    890   input = factory->NewStringFromTwoByte(
    891       Vector<const uc16>(input_data2, 9)).ToHandleChecked();
    892   seq_input = Handle<SeqTwoByteString>::cast(input);
    893   start_adr = seq_input->GetCharsAddress();
    894 
    895   result = Execute(*code,
    896                    *input,
    897                    0,
    898                    start_adr,
    899                    start_adr + input->length() * 2,
    900                    captures);
    901 
    902   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    903 }
    904 
    905 
    906 TEST(MacroAssemblerNativeBacktrack) {
    907   v8::V8::Initialize();
    908   ContextInitializer initializer;
    909   Isolate* isolate = CcTest::i_isolate();
    910   Factory* factory = isolate->factory();
    911   Zone zone(isolate);
    912 
    913   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
    914 
    915   Label fail;
    916   Label backtrack;
    917   m.LoadCurrentCharacter(10, &fail);
    918   m.Succeed();
    919   m.Bind(&fail);
    920   m.PushBacktrack(&backtrack);
    921   m.LoadCurrentCharacter(10, NULL);
    922   m.Succeed();
    923   m.Bind(&backtrack);
    924   m.Fail();
    925 
    926   Handle<String> source = factory->NewStringFromStaticChars("..........");
    927   Handle<Object> code_object = m.GetCode(source);
    928   Handle<Code> code = Handle<Code>::cast(code_object);
    929 
    930   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    931   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    932   Address start_adr = seq_input->GetCharsAddress();
    933 
    934   NativeRegExpMacroAssembler::Result result =
    935       Execute(*code,
    936               *input,
    937               0,
    938               start_adr,
    939               start_adr + input->length(),
    940               NULL);
    941 
    942   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    943 }
    944 
    945 
    946 TEST(MacroAssemblerNativeBackReferenceLATIN1) {
    947   v8::V8::Initialize();
    948   ContextInitializer initializer;
    949   Isolate* isolate = CcTest::i_isolate();
    950   Factory* factory = isolate->factory();
    951   Zone zone(isolate);
    952 
    953   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
    954 
    955   m.WriteCurrentPositionToRegister(0, 0);
    956   m.AdvanceCurrentPosition(2);
    957   m.WriteCurrentPositionToRegister(1, 0);
    958   Label nomatch;
    959   m.CheckNotBackReference(0, &nomatch);
    960   m.Fail();
    961   m.Bind(&nomatch);
    962   m.AdvanceCurrentPosition(2);
    963   Label missing_match;
    964   m.CheckNotBackReference(0, &missing_match);
    965   m.WriteCurrentPositionToRegister(2, 0);
    966   m.Succeed();
    967   m.Bind(&missing_match);
    968   m.Fail();
    969 
    970   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    971   Handle<Object> code_object = m.GetCode(source);
    972   Handle<Code> code = Handle<Code>::cast(code_object);
    973 
    974   Handle<String> input = factory->NewStringFromStaticChars("fooofo");
    975   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    976   Address start_adr = seq_input->GetCharsAddress();
    977 
    978   int output[4];
    979   NativeRegExpMacroAssembler::Result result =
    980       Execute(*code,
    981               *input,
    982               0,
    983               start_adr,
    984               start_adr + input->length(),
    985               output);
    986 
    987   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    988   CHECK_EQ(0, output[0]);
    989   CHECK_EQ(2, output[1]);
    990   CHECK_EQ(6, output[2]);
    991   CHECK_EQ(-1, output[3]);
    992 }
    993 
    994 
    995 TEST(MacroAssemblerNativeBackReferenceUC16) {
    996   v8::V8::Initialize();
    997   ContextInitializer initializer;
    998   Isolate* isolate = CcTest::i_isolate();
    999   Factory* factory = isolate->factory();
   1000   Zone zone(isolate);
   1001 
   1002   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
   1003 
   1004   m.WriteCurrentPositionToRegister(0, 0);
   1005   m.AdvanceCurrentPosition(2);
   1006   m.WriteCurrentPositionToRegister(1, 0);
   1007   Label nomatch;
   1008   m.CheckNotBackReference(0, &nomatch);
   1009   m.Fail();
   1010   m.Bind(&nomatch);
   1011   m.AdvanceCurrentPosition(2);
   1012   Label missing_match;
   1013   m.CheckNotBackReference(0, &missing_match);
   1014   m.WriteCurrentPositionToRegister(2, 0);
   1015   m.Succeed();
   1016   m.Bind(&missing_match);
   1017   m.Fail();
   1018 
   1019   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
   1020   Handle<Object> code_object = m.GetCode(source);
   1021   Handle<Code> code = Handle<Code>::cast(code_object);
   1022 
   1023   const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
   1024   Handle<String> input = factory->NewStringFromTwoByte(
   1025       Vector<const uc16>(input_data, 6)).ToHandleChecked();
   1026   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
   1027   Address start_adr = seq_input->GetCharsAddress();
   1028 
   1029   int output[4];
   1030   NativeRegExpMacroAssembler::Result result =
   1031       Execute(*code,
   1032               *input,
   1033               0,
   1034               start_adr,
   1035               start_adr + input->length() * 2,
   1036               output);
   1037 
   1038   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1039   CHECK_EQ(0, output[0]);
   1040   CHECK_EQ(2, output[1]);
   1041   CHECK_EQ(6, output[2]);
   1042   CHECK_EQ(-1, output[3]);
   1043 }
   1044 
   1045 
   1046 
   1047 TEST(MacroAssemblernativeAtStart) {
   1048   v8::V8::Initialize();
   1049   ContextInitializer initializer;
   1050   Isolate* isolate = CcTest::i_isolate();
   1051   Factory* factory = isolate->factory();
   1052   Zone zone(isolate);
   1053 
   1054   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
   1055 
   1056   Label not_at_start, newline, fail;
   1057   m.CheckNotAtStart(&not_at_start);
   1058   // Check that prevchar = '\n' and current = 'f'.
   1059   m.CheckCharacter('\n', &newline);
   1060   m.Bind(&fail);
   1061   m.Fail();
   1062   m.Bind(&newline);
   1063   m.LoadCurrentCharacter(0, &fail);
   1064   m.CheckNotCharacter('f', &fail);
   1065   m.Succeed();
   1066 
   1067   m.Bind(&not_at_start);
   1068   // Check that prevchar = 'o' and current = 'b'.
   1069   Label prevo;
   1070   m.CheckCharacter('o', &prevo);
   1071   m.Fail();
   1072   m.Bind(&prevo);
   1073   m.LoadCurrentCharacter(0, &fail);
   1074   m.CheckNotCharacter('b', &fail);
   1075   m.Succeed();
   1076 
   1077   Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
   1078   Handle<Object> code_object = m.GetCode(source);
   1079   Handle<Code> code = Handle<Code>::cast(code_object);
   1080 
   1081   Handle<String> input = factory->NewStringFromStaticChars("foobar");
   1082   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1083   Address start_adr = seq_input->GetCharsAddress();
   1084 
   1085   NativeRegExpMacroAssembler::Result result =
   1086       Execute(*code,
   1087               *input,
   1088               0,
   1089               start_adr,
   1090               start_adr + input->length(),
   1091               NULL);
   1092 
   1093   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1094 
   1095   result = Execute(*code,
   1096                    *input,
   1097                    3,
   1098                    start_adr + 3,
   1099                    start_adr + input->length(),
   1100                    NULL);
   1101 
   1102   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1103 }
   1104 
   1105 
   1106 TEST(MacroAssemblerNativeBackRefNoCase) {
   1107   v8::V8::Initialize();
   1108   ContextInitializer initializer;
   1109   Isolate* isolate = CcTest::i_isolate();
   1110   Factory* factory = isolate->factory();
   1111   Zone zone(isolate);
   1112 
   1113   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
   1114 
   1115   Label fail, succ;
   1116 
   1117   m.WriteCurrentPositionToRegister(0, 0);
   1118   m.WriteCurrentPositionToRegister(2, 0);
   1119   m.AdvanceCurrentPosition(3);
   1120   m.WriteCurrentPositionToRegister(3, 0);
   1121   m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "AbC".
   1122   m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "ABC".
   1123   Label expected_fail;
   1124   m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
   1125   m.Bind(&fail);
   1126   m.Fail();
   1127 
   1128   m.Bind(&expected_fail);
   1129   m.AdvanceCurrentPosition(3);  // Skip "xYz"
   1130   m.CheckNotBackReferenceIgnoreCase(2, &succ);
   1131   m.Fail();
   1132 
   1133   m.Bind(&succ);
   1134   m.WriteCurrentPositionToRegister(1, 0);
   1135   m.Succeed();
   1136 
   1137   Handle<String> source =
   1138       factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
   1139   Handle<Object> code_object = m.GetCode(source);
   1140   Handle<Code> code = Handle<Code>::cast(code_object);
   1141 
   1142   Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
   1143   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1144   Address start_adr = seq_input->GetCharsAddress();
   1145 
   1146   int output[4];
   1147   NativeRegExpMacroAssembler::Result result =
   1148       Execute(*code,
   1149               *input,
   1150               0,
   1151               start_adr,
   1152               start_adr + input->length(),
   1153               output);
   1154 
   1155   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1156   CHECK_EQ(0, output[0]);
   1157   CHECK_EQ(12, output[1]);
   1158   CHECK_EQ(0, output[2]);
   1159   CHECK_EQ(3, output[3]);
   1160 }
   1161 
   1162 
   1163 
   1164 TEST(MacroAssemblerNativeRegisters) {
   1165   v8::V8::Initialize();
   1166   ContextInitializer initializer;
   1167   Isolate* isolate = CcTest::i_isolate();
   1168   Factory* factory = isolate->factory();
   1169   Zone zone(isolate);
   1170 
   1171   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 6, &zone);
   1172 
   1173   uc16 foo_chars[3] = {'f', 'o', 'o'};
   1174   Vector<const uc16> foo(foo_chars, 3);
   1175 
   1176   enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
   1177   Label fail;
   1178   Label backtrack;
   1179   m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
   1180   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
   1181   m.PushBacktrack(&backtrack);
   1182   m.WriteStackPointerToRegister(sp);
   1183   // Fill stack and registers
   1184   m.AdvanceCurrentPosition(2);
   1185   m.WriteCurrentPositionToRegister(out1, 0);
   1186   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
   1187   m.PushBacktrack(&fail);
   1188   // Drop backtrack stack frames.
   1189   m.ReadStackPointerFromRegister(sp);
   1190   // And take the first backtrack (to &backtrack)
   1191   m.Backtrack();
   1192 
   1193   m.PushCurrentPosition();
   1194   m.AdvanceCurrentPosition(2);
   1195   m.PopCurrentPosition();
   1196 
   1197   m.Bind(&backtrack);
   1198   m.PopRegister(out1);
   1199   m.ReadCurrentPositionFromRegister(out1);
   1200   m.AdvanceCurrentPosition(3);
   1201   m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
   1202 
   1203   Label loop;
   1204   m.SetRegister(loop_cnt, 0);  // loop counter
   1205   m.Bind(&loop);
   1206   m.AdvanceRegister(loop_cnt, 1);
   1207   m.AdvanceCurrentPosition(1);
   1208   m.IfRegisterLT(loop_cnt, 3, &loop);
   1209   m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
   1210 
   1211   Label loop2;
   1212   m.SetRegister(loop_cnt, 2);  // loop counter
   1213   m.Bind(&loop2);
   1214   m.AdvanceRegister(loop_cnt, -1);
   1215   m.AdvanceCurrentPosition(1);
   1216   m.IfRegisterGE(loop_cnt, 0, &loop2);
   1217   m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
   1218 
   1219   Label loop3;
   1220   Label exit_loop3;
   1221   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
   1222   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
   1223   m.ReadCurrentPositionFromRegister(out3);
   1224   m.Bind(&loop3);
   1225   m.AdvanceCurrentPosition(1);
   1226   m.CheckGreedyLoop(&exit_loop3);
   1227   m.GoTo(&loop3);
   1228   m.Bind(&exit_loop3);
   1229   m.PopCurrentPosition();
   1230   m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
   1231 
   1232   m.Succeed();
   1233 
   1234   m.Bind(&fail);
   1235   m.Fail();
   1236 
   1237   Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
   1238   Handle<Object> code_object = m.GetCode(source);
   1239   Handle<Code> code = Handle<Code>::cast(code_object);
   1240 
   1241   // String long enough for test (content doesn't matter).
   1242   Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
   1243   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1244   Address start_adr = seq_input->GetCharsAddress();
   1245 
   1246   int output[6];
   1247   NativeRegExpMacroAssembler::Result result =
   1248       Execute(*code,
   1249               *input,
   1250               0,
   1251               start_adr,
   1252               start_adr + input->length(),
   1253               output);
   1254 
   1255   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1256   CHECK_EQ(0, output[0]);
   1257   CHECK_EQ(3, output[1]);
   1258   CHECK_EQ(6, output[2]);
   1259   CHECK_EQ(9, output[3]);
   1260   CHECK_EQ(9, output[4]);
   1261   CHECK_EQ(-1, output[5]);
   1262 }
   1263 
   1264 
   1265 TEST(MacroAssemblerStackOverflow) {
   1266   v8::V8::Initialize();
   1267   ContextInitializer initializer;
   1268   Isolate* isolate = CcTest::i_isolate();
   1269   Factory* factory = isolate->factory();
   1270   Zone zone(isolate);
   1271 
   1272   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
   1273 
   1274   Label loop;
   1275   m.Bind(&loop);
   1276   m.PushBacktrack(&loop);
   1277   m.GoTo(&loop);
   1278 
   1279   Handle<String> source =
   1280       factory->NewStringFromStaticChars("<stack overflow test>");
   1281   Handle<Object> code_object = m.GetCode(source);
   1282   Handle<Code> code = Handle<Code>::cast(code_object);
   1283 
   1284   // String long enough for test (content doesn't matter).
   1285   Handle<String> input = factory->NewStringFromStaticChars("dummy");
   1286   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1287   Address start_adr = seq_input->GetCharsAddress();
   1288 
   1289   NativeRegExpMacroAssembler::Result result =
   1290       Execute(*code,
   1291               *input,
   1292               0,
   1293               start_adr,
   1294               start_adr + input->length(),
   1295               NULL);
   1296 
   1297   CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
   1298   CHECK(isolate->has_pending_exception());
   1299   isolate->clear_pending_exception();
   1300 }
   1301 
   1302 
   1303 TEST(MacroAssemblerNativeLotsOfRegisters) {
   1304   v8::V8::Initialize();
   1305   ContextInitializer initializer;
   1306   Isolate* isolate = CcTest::i_isolate();
   1307   Factory* factory = isolate->factory();
   1308   Zone zone(isolate);
   1309 
   1310   ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 2, &zone);
   1311 
   1312   // At least 2048, to ensure the allocated space for registers
   1313   // span one full page.
   1314   const int large_number = 8000;
   1315   m.WriteCurrentPositionToRegister(large_number, 42);
   1316   m.WriteCurrentPositionToRegister(0, 0);
   1317   m.WriteCurrentPositionToRegister(1, 1);
   1318   Label done;
   1319   m.CheckNotBackReference(0, &done);  // Performs a system-stack push.
   1320   m.Bind(&done);
   1321   m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
   1322   m.PopRegister(1);
   1323   m.Succeed();
   1324 
   1325   Handle<String> source =
   1326       factory->NewStringFromStaticChars("<huge register space test>");
   1327   Handle<Object> code_object = m.GetCode(source);
   1328   Handle<Code> code = Handle<Code>::cast(code_object);
   1329 
   1330   // String long enough for test (content doesn't matter).
   1331   Handle<String> input = factory->NewStringFromStaticChars("sample text");
   1332   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1333   Address start_adr = seq_input->GetCharsAddress();
   1334 
   1335   int captures[2];
   1336   NativeRegExpMacroAssembler::Result result =
   1337       Execute(*code,
   1338               *input,
   1339               0,
   1340               start_adr,
   1341               start_adr + input->length(),
   1342               captures);
   1343 
   1344   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1345   CHECK_EQ(0, captures[0]);
   1346   CHECK_EQ(42, captures[1]);
   1347 
   1348   isolate->clear_pending_exception();
   1349 }
   1350 
   1351 #else  // V8_INTERPRETED_REGEXP
   1352 
   1353 TEST(MacroAssembler) {
   1354   byte codes[1024];
   1355   Zone zone(CcTest::i_isolate());
   1356   RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone);
   1357   // ^f(o)o.
   1358   Label start, fail, backtrack;
   1359 
   1360   m.SetRegister(4, 42);
   1361   m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
   1362   m.AdvanceRegister(4, 42);
   1363   m.GoTo(&start);
   1364   m.Fail();
   1365   m.Bind(&start);
   1366   m.PushBacktrack(&fail);
   1367   m.CheckNotAtStart(NULL);
   1368   m.LoadCurrentCharacter(0, NULL);
   1369   m.CheckNotCharacter('f', NULL);
   1370   m.LoadCurrentCharacter(1, NULL);
   1371   m.CheckNotCharacter('o', NULL);
   1372   m.LoadCurrentCharacter(2, NULL);
   1373   m.CheckNotCharacter('o', NULL);
   1374   m.WriteCurrentPositionToRegister(0, 0);
   1375   m.WriteCurrentPositionToRegister(1, 3);
   1376   m.WriteCurrentPositionToRegister(2, 1);
   1377   m.WriteCurrentPositionToRegister(3, 2);
   1378   m.AdvanceCurrentPosition(3);
   1379   m.PushBacktrack(&backtrack);
   1380   m.Succeed();
   1381   m.Bind(&backtrack);
   1382   m.ClearRegisters(2, 3);
   1383   m.Backtrack();
   1384   m.Bind(&fail);
   1385   m.PopRegister(0);
   1386   m.Fail();
   1387 
   1388   Isolate* isolate = CcTest::i_isolate();
   1389   Factory* factory = isolate->factory();
   1390   HandleScope scope(isolate);
   1391 
   1392   Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
   1393   Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
   1394   int captures[5];
   1395 
   1396   const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
   1397   Handle<String> f1_16 = factory->NewStringFromTwoByte(
   1398       Vector<const uc16>(str1, 6)).ToHandleChecked();
   1399 
   1400   CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
   1401   CHECK_EQ(0, captures[0]);
   1402   CHECK_EQ(3, captures[1]);
   1403   CHECK_EQ(1, captures[2]);
   1404   CHECK_EQ(2, captures[3]);
   1405   CHECK_EQ(84, captures[4]);
   1406 
   1407   const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
   1408   Handle<String> f2_16 = factory->NewStringFromTwoByte(
   1409       Vector<const uc16>(str2, 6)).ToHandleChecked();
   1410 
   1411   CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
   1412   CHECK_EQ(42, captures[0]);
   1413 }
   1414 
   1415 #endif  // V8_INTERPRETED_REGEXP
   1416 
   1417 
   1418 TEST(AddInverseToTable) {
   1419   static const int kLimit = 1000;
   1420   static const int kRangeCount = 16;
   1421   for (int t = 0; t < 10; t++) {
   1422     Zone zone(CcTest::i_isolate());
   1423     ZoneList<CharacterRange>* ranges =
   1424         new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
   1425     for (int i = 0; i < kRangeCount; i++) {
   1426       int from = PseudoRandom(t + 87, i + 25) % kLimit;
   1427       int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
   1428       if (to > kLimit) to = kLimit;
   1429       ranges->Add(CharacterRange(from, to), &zone);
   1430     }
   1431     DispatchTable table(&zone);
   1432     DispatchTableConstructor cons(&table, false, &zone);
   1433     cons.set_choice_index(0);
   1434     cons.AddInverse(ranges);
   1435     for (int i = 0; i < kLimit; i++) {
   1436       bool is_on = false;
   1437       for (int j = 0; !is_on && j < kRangeCount; j++)
   1438         is_on = ranges->at(j).Contains(i);
   1439       OutSet* set = table.Get(i);
   1440       CHECK_EQ(is_on, set->Get(0) == false);
   1441     }
   1442   }
   1443   Zone zone(CcTest::i_isolate());
   1444   ZoneList<CharacterRange>* ranges =
   1445       new(&zone) ZoneList<CharacterRange>(1, &zone);
   1446   ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
   1447   DispatchTable table(&zone);
   1448   DispatchTableConstructor cons(&table, false, &zone);
   1449   cons.set_choice_index(0);
   1450   cons.AddInverse(ranges);
   1451   CHECK(!table.Get(0xFFFE)->Get(0));
   1452   CHECK(table.Get(0xFFFF)->Get(0));
   1453 }
   1454 
   1455 
   1456 static uc32 canonicalize(uc32 c) {
   1457   unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
   1458   int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
   1459   if (count == 0) {
   1460     return c;
   1461   } else {
   1462     CHECK_EQ(1, count);
   1463     return canon[0];
   1464   }
   1465 }
   1466 
   1467 
   1468 TEST(LatinCanonicalize) {
   1469   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
   1470   for (char lower = 'a'; lower <= 'z'; lower++) {
   1471     char upper = lower + ('A' - 'a');
   1472     CHECK_EQ(canonicalize(lower), canonicalize(upper));
   1473     unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1474     int length = un_canonicalize.get(lower, '\0', uncanon);
   1475     CHECK_EQ(2, length);
   1476     CHECK_EQ(upper, uncanon[0]);
   1477     CHECK_EQ(lower, uncanon[1]);
   1478   }
   1479   for (uc32 c = 128; c < (1 << 21); c++)
   1480     CHECK_GE(canonicalize(c), 128);
   1481   unibrow::Mapping<unibrow::ToUppercase> to_upper;
   1482   // Canonicalization is only defined for the Basic Multilingual Plane.
   1483   for (uc32 c = 0; c < (1 << 16); c++) {
   1484     unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
   1485     int length = to_upper.get(c, '\0', upper);
   1486     if (length == 0) {
   1487       length = 1;
   1488       upper[0] = c;
   1489     }
   1490     uc32 u = upper[0];
   1491     if (length > 1 || (c >= 128 && u < 128))
   1492       u = c;
   1493     CHECK_EQ(u, canonicalize(c));
   1494   }
   1495 }
   1496 
   1497 
   1498 static uc32 CanonRangeEnd(uc32 c) {
   1499   unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
   1500   int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
   1501   if (count == 0) {
   1502     return c;
   1503   } else {
   1504     CHECK_EQ(1, count);
   1505     return canon[0];
   1506   }
   1507 }
   1508 
   1509 
   1510 TEST(RangeCanonicalization) {
   1511   // Check that we arrive at the same result when using the basic
   1512   // range canonicalization primitives as when using immediate
   1513   // canonicalization.
   1514   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
   1515   int block_start = 0;
   1516   while (block_start <= 0xFFFF) {
   1517     uc32 block_end = CanonRangeEnd(block_start);
   1518     unsigned block_length = block_end - block_start + 1;
   1519     if (block_length > 1) {
   1520       unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1521       int first_length = un_canonicalize.get(block_start, '\0', first);
   1522       for (unsigned i = 1; i < block_length; i++) {
   1523         unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1524         int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
   1525         CHECK_EQ(first_length, succ_length);
   1526         for (int j = 0; j < succ_length; j++) {
   1527           int calc = first[j] + i;
   1528           int found = succ[j];
   1529           CHECK_EQ(calc, found);
   1530         }
   1531       }
   1532     }
   1533     block_start = block_start + block_length;
   1534   }
   1535 }
   1536 
   1537 
   1538 TEST(UncanonicalizeEquivalence) {
   1539   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
   1540   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1541   for (int i = 0; i < (1 << 16); i++) {
   1542     int length = un_canonicalize.get(i, '\0', chars);
   1543     for (int j = 0; j < length; j++) {
   1544       unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1545       int length2 = un_canonicalize.get(chars[j], '\0', chars2);
   1546       CHECK_EQ(length, length2);
   1547       for (int k = 0; k < length; k++)
   1548         CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
   1549     }
   1550   }
   1551 }
   1552 
   1553 
   1554 static void TestRangeCaseIndependence(CharacterRange input,
   1555                                       Vector<CharacterRange> expected) {
   1556   Zone zone(CcTest::i_isolate());
   1557   int count = expected.length();
   1558   ZoneList<CharacterRange>* list =
   1559       new(&zone) ZoneList<CharacterRange>(count, &zone);
   1560   input.AddCaseEquivalents(list, false, &zone);
   1561   CHECK_EQ(count, list->length());
   1562   for (int i = 0; i < list->length(); i++) {
   1563     CHECK_EQ(expected[i].from(), list->at(i).from());
   1564     CHECK_EQ(expected[i].to(), list->at(i).to());
   1565   }
   1566 }
   1567 
   1568 
   1569 static void TestSimpleRangeCaseIndependence(CharacterRange input,
   1570                                             CharacterRange expected) {
   1571   EmbeddedVector<CharacterRange, 1> vector;
   1572   vector[0] = expected;
   1573   TestRangeCaseIndependence(input, vector);
   1574 }
   1575 
   1576 
   1577 TEST(CharacterRangeCaseIndependence) {
   1578   TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
   1579                                   CharacterRange::Singleton('A'));
   1580   TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
   1581                                   CharacterRange::Singleton('Z'));
   1582   TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
   1583                                   CharacterRange('A', 'Z'));
   1584   TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
   1585                                   CharacterRange('C', 'F'));
   1586   TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
   1587                                   CharacterRange('A', 'B'));
   1588   TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
   1589                                   CharacterRange('Y', 'Z'));
   1590   TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
   1591                                   CharacterRange('A', 'Z'));
   1592   TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
   1593                                   CharacterRange('a', 'z'));
   1594   TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
   1595                                   CharacterRange('c', 'f'));
   1596   TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
   1597                                   CharacterRange('a', 'z'));
   1598   // Here we need to add [l-z] to complete the case independence of
   1599   // [A-Za-z] but we expect [a-z] to be added since we always add a
   1600   // whole block at a time.
   1601   TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
   1602                                   CharacterRange('a', 'z'));
   1603 }
   1604 
   1605 
   1606 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
   1607   if (ranges == NULL)
   1608     return false;
   1609   for (int i = 0; i < ranges->length(); i++) {
   1610     CharacterRange range = ranges->at(i);
   1611     if (range.from() <= c && c <= range.to())
   1612       return true;
   1613   }
   1614   return false;
   1615 }
   1616 
   1617 
   1618 TEST(CharClassDifference) {
   1619   Zone zone(CcTest::i_isolate());
   1620   ZoneList<CharacterRange>* base =
   1621       new(&zone) ZoneList<CharacterRange>(1, &zone);
   1622   base->Add(CharacterRange::Everything(), &zone);
   1623   Vector<const int> overlay = CharacterRange::GetWordBounds();
   1624   ZoneList<CharacterRange>* included = NULL;
   1625   ZoneList<CharacterRange>* excluded = NULL;
   1626   CharacterRange::Split(base, overlay, &included, &excluded, &zone);
   1627   for (int i = 0; i < (1 << 16); i++) {
   1628     bool in_base = InClass(i, base);
   1629     if (in_base) {
   1630       bool in_overlay = false;
   1631       for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
   1632         if (overlay[j] <= i && i < overlay[j+1])
   1633           in_overlay = true;
   1634       }
   1635       CHECK_EQ(in_overlay, InClass(i, included));
   1636       CHECK_EQ(!in_overlay, InClass(i, excluded));
   1637     } else {
   1638       CHECK(!InClass(i, included));
   1639       CHECK(!InClass(i, excluded));
   1640     }
   1641   }
   1642 }
   1643 
   1644 
   1645 TEST(CanonicalizeCharacterSets) {
   1646   Zone zone(CcTest::i_isolate());
   1647   ZoneList<CharacterRange>* list =
   1648       new(&zone) ZoneList<CharacterRange>(4, &zone);
   1649   CharacterSet set(list);
   1650 
   1651   list->Add(CharacterRange(10, 20), &zone);
   1652   list->Add(CharacterRange(30, 40), &zone);
   1653   list->Add(CharacterRange(50, 60), &zone);
   1654   set.Canonicalize();
   1655   DCHECK_EQ(3, list->length());
   1656   DCHECK_EQ(10, list->at(0).from());
   1657   DCHECK_EQ(20, list->at(0).to());
   1658   DCHECK_EQ(30, list->at(1).from());
   1659   DCHECK_EQ(40, list->at(1).to());
   1660   DCHECK_EQ(50, list->at(2).from());
   1661   DCHECK_EQ(60, list->at(2).to());
   1662 
   1663   list->Rewind(0);
   1664   list->Add(CharacterRange(10, 20), &zone);
   1665   list->Add(CharacterRange(50, 60), &zone);
   1666   list->Add(CharacterRange(30, 40), &zone);
   1667   set.Canonicalize();
   1668   DCHECK_EQ(3, list->length());
   1669   DCHECK_EQ(10, list->at(0).from());
   1670   DCHECK_EQ(20, list->at(0).to());
   1671   DCHECK_EQ(30, list->at(1).from());
   1672   DCHECK_EQ(40, list->at(1).to());
   1673   DCHECK_EQ(50, list->at(2).from());
   1674   DCHECK_EQ(60, list->at(2).to());
   1675 
   1676   list->Rewind(0);
   1677   list->Add(CharacterRange(30, 40), &zone);
   1678   list->Add(CharacterRange(10, 20), &zone);
   1679   list->Add(CharacterRange(25, 25), &zone);
   1680   list->Add(CharacterRange(100, 100), &zone);
   1681   list->Add(CharacterRange(1, 1), &zone);
   1682   set.Canonicalize();
   1683   DCHECK_EQ(5, list->length());
   1684   DCHECK_EQ(1, list->at(0).from());
   1685   DCHECK_EQ(1, list->at(0).to());
   1686   DCHECK_EQ(10, list->at(1).from());
   1687   DCHECK_EQ(20, list->at(1).to());
   1688   DCHECK_EQ(25, list->at(2).from());
   1689   DCHECK_EQ(25, list->at(2).to());
   1690   DCHECK_EQ(30, list->at(3).from());
   1691   DCHECK_EQ(40, list->at(3).to());
   1692   DCHECK_EQ(100, list->at(4).from());
   1693   DCHECK_EQ(100, list->at(4).to());
   1694 
   1695   list->Rewind(0);
   1696   list->Add(CharacterRange(10, 19), &zone);
   1697   list->Add(CharacterRange(21, 30), &zone);
   1698   list->Add(CharacterRange(20, 20), &zone);
   1699   set.Canonicalize();
   1700   DCHECK_EQ(1, list->length());
   1701   DCHECK_EQ(10, list->at(0).from());
   1702   DCHECK_EQ(30, list->at(0).to());
   1703 }
   1704 
   1705 
   1706 TEST(CharacterRangeMerge) {
   1707   Zone zone(CcTest::i_isolate());
   1708   ZoneList<CharacterRange> l1(4, &zone);
   1709   ZoneList<CharacterRange> l2(4, &zone);
   1710   // Create all combinations of intersections of ranges, both singletons and
   1711   // longer.
   1712 
   1713   int offset = 0;
   1714 
   1715   // The five kinds of singleton intersections:
   1716   //     X
   1717   //   Y      - outside before
   1718   //    Y     - outside touching start
   1719   //     Y    - overlap
   1720   //      Y   - outside touching end
   1721   //       Y  - outside after
   1722 
   1723   for (int i = 0; i < 5; i++) {
   1724     l1.Add(CharacterRange::Singleton(offset + 2), &zone);
   1725     l2.Add(CharacterRange::Singleton(offset + i), &zone);
   1726     offset += 6;
   1727   }
   1728 
   1729   // The seven kinds of singleton/non-singleton intersections:
   1730   //    XXX
   1731   //  Y        - outside before
   1732   //   Y       - outside touching start
   1733   //    Y      - inside touching start
   1734   //     Y     - entirely inside
   1735   //      Y    - inside touching end
   1736   //       Y   - outside touching end
   1737   //        Y  - disjoint after
   1738 
   1739   for (int i = 0; i < 7; i++) {
   1740     l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
   1741     l2.Add(CharacterRange::Singleton(offset + i), &zone);
   1742     offset += 8;
   1743   }
   1744 
   1745   // The eleven kinds of non-singleton intersections:
   1746   //
   1747   //       XXXXXXXX
   1748   // YYYY                  - outside before.
   1749   //   YYYY                - outside touching start.
   1750   //     YYYY              - overlapping start
   1751   //       YYYY            - inside touching start
   1752   //         YYYY          - entirely inside
   1753   //           YYYY        - inside touching end
   1754   //             YYYY      - overlapping end
   1755   //               YYYY    - outside touching end
   1756   //                 YYYY  - outside after
   1757   //       YYYYYYYY        - identical
   1758   //     YYYYYYYYYYYY      - containing entirely.
   1759 
   1760   for (int i = 0; i < 9; i++) {
   1761     l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
   1762     l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
   1763     offset += 22;
   1764   }
   1765   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
   1766   l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
   1767   offset += 22;
   1768   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
   1769   l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
   1770   offset += 22;
   1771 
   1772   // Different kinds of multi-range overlap:
   1773   // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
   1774   //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
   1775 
   1776   l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
   1777   l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
   1778   for (int i = 0; i < 6; i++) {
   1779     l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
   1780     l2.Add(CharacterRange::Singleton(offset + 8), &zone);
   1781     offset += 9;
   1782   }
   1783 
   1784   DCHECK(CharacterRange::IsCanonical(&l1));
   1785   DCHECK(CharacterRange::IsCanonical(&l2));
   1786 
   1787   ZoneList<CharacterRange> first_only(4, &zone);
   1788   ZoneList<CharacterRange> second_only(4, &zone);
   1789   ZoneList<CharacterRange> both(4, &zone);
   1790 }
   1791 
   1792 
   1793 TEST(Graph) {
   1794   Execute("\\b\\w+\\b", false, true, true);
   1795 }
   1796