Home | History | Annotate | Download | only in cctest
      1 // Copyright 2012 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include <cstdlib>
     29 #include <sstream>
     30 
     31 #include "include/v8.h"
     32 #include "src/v8.h"
     33 
     34 #include "src/ast/ast.h"
     35 #include "src/char-predicates-inl.h"
     36 #include "src/ostreams.h"
     37 #include "src/regexp/jsregexp.h"
     38 #include "src/regexp/regexp-macro-assembler.h"
     39 #include "src/regexp/regexp-macro-assembler-irregexp.h"
     40 #include "src/regexp/regexp-parser.h"
     41 #include "src/splay-tree-inl.h"
     42 #include "src/string-stream.h"
     43 #ifdef V8_INTERPRETED_REGEXP
     44 #include "src/regexp/interpreter-irregexp.h"
     45 #else  // V8_INTERPRETED_REGEXP
     46 #include "src/macro-assembler.h"
     47 #if V8_TARGET_ARCH_ARM
     48 #include "src/arm/assembler-arm.h"  // NOLINT
     49 #include "src/arm/macro-assembler-arm.h"
     50 #include "src/regexp/arm/regexp-macro-assembler-arm.h"
     51 #endif
     52 #if V8_TARGET_ARCH_ARM64
     53 #include "src/arm64/assembler-arm64.h"
     54 #include "src/arm64/macro-assembler-arm64.h"
     55 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
     56 #endif
     57 #if V8_TARGET_ARCH_PPC
     58 #include "src/ppc/assembler-ppc.h"
     59 #include "src/ppc/macro-assembler-ppc.h"
     60 #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
     61 #endif
     62 #if V8_TARGET_ARCH_MIPS
     63 #include "src/mips/assembler-mips.h"
     64 #include "src/mips/macro-assembler-mips.h"
     65 #include "src/regexp/mips/regexp-macro-assembler-mips.h"
     66 #endif
     67 #if V8_TARGET_ARCH_MIPS64
     68 #include "src/mips64/assembler-mips64.h"
     69 #include "src/mips64/macro-assembler-mips64.h"
     70 #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
     71 #endif
     72 #if V8_TARGET_ARCH_X64
     73 #include "src/regexp/x64/regexp-macro-assembler-x64.h"
     74 #include "src/x64/assembler-x64.h"
     75 #include "src/x64/macro-assembler-x64.h"
     76 #endif
     77 #if V8_TARGET_ARCH_IA32
     78 #include "src/ia32/assembler-ia32.h"
     79 #include "src/ia32/macro-assembler-ia32.h"
     80 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
     81 #endif
     82 #if V8_TARGET_ARCH_X87
     83 #include "src/regexp/x87/regexp-macro-assembler-x87.h"
     84 #include "src/x87/assembler-x87.h"
     85 #include "src/x87/macro-assembler-x87.h"
     86 #endif
     87 #endif  // V8_INTERPRETED_REGEXP
     88 #include "test/cctest/cctest.h"
     89 
     90 using namespace v8::internal;
     91 
     92 
     93 static bool CheckParse(const char* input) {
     94   v8::HandleScope scope(CcTest::isolate());
     95   Zone zone;
     96   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     97   RegExpCompileData result;
     98   return v8::internal::RegExpParser::ParseRegExp(
     99       CcTest::i_isolate(), &zone, &reader, false, false, &result);
    100 }
    101 
    102 
    103 static void CheckParseEq(const char* input, const char* expected,
    104                          bool unicode = false) {
    105   v8::HandleScope scope(CcTest::isolate());
    106   Zone zone;
    107   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    108   RegExpCompileData result;
    109   CHECK(v8::internal::RegExpParser::ParseRegExp(
    110       CcTest::i_isolate(), &zone, &reader, false, unicode, &result));
    111   CHECK(result.tree != NULL);
    112   CHECK(result.error.is_null());
    113   std::ostringstream os;
    114   result.tree->Print(os, &zone);
    115   if (strcmp(expected, os.str().c_str()) != 0) {
    116     printf("%s | %s\n", expected, os.str().c_str());
    117   }
    118   CHECK_EQ(0, strcmp(expected, os.str().c_str()));
    119 }
    120 
    121 
    122 static bool CheckSimple(const char* input) {
    123   v8::HandleScope scope(CcTest::isolate());
    124   Zone zone;
    125   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    126   RegExpCompileData result;
    127   CHECK(v8::internal::RegExpParser::ParseRegExp(
    128       CcTest::i_isolate(), &zone, &reader, false, false, &result));
    129   CHECK(result.tree != NULL);
    130   CHECK(result.error.is_null());
    131   return result.simple;
    132 }
    133 
    134 struct MinMaxPair {
    135   int min_match;
    136   int max_match;
    137 };
    138 
    139 
    140 static MinMaxPair CheckMinMaxMatch(const char* input) {
    141   v8::HandleScope scope(CcTest::isolate());
    142   Zone zone;
    143   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    144   RegExpCompileData result;
    145   CHECK(v8::internal::RegExpParser::ParseRegExp(
    146       CcTest::i_isolate(), &zone, &reader, false, false, &result));
    147   CHECK(result.tree != NULL);
    148   CHECK(result.error.is_null());
    149   int min_match = result.tree->min_match();
    150   int max_match = result.tree->max_match();
    151   MinMaxPair pair = { min_match, max_match };
    152   return pair;
    153 }
    154 
    155 
    156 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
    157 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
    158 #define CHECK_MIN_MAX(input, min, max)                                         \
    159   { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
    160     CHECK_EQ(min, min_max.min_match);                                          \
    161     CHECK_EQ(max, min_max.max_match);                                          \
    162   }
    163 
    164 
    165 void TestRegExpParser(bool lookbehind) {
    166   FLAG_harmony_regexp_lookbehind = lookbehind;
    167   FLAG_harmony_unicode_regexps = true;
    168 
    169   CHECK_PARSE_ERROR("?");
    170 
    171   CheckParseEq("abc", "'abc'");
    172   CheckParseEq("", "%");
    173   CheckParseEq("abc|def", "(| 'abc' 'def')");
    174   CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
    175   CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
    176   CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
    177   CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
    178   CheckParseEq("a*", "(# 0 - g 'a')");
    179   CheckParseEq("a*?", "(# 0 - n 'a')");
    180   CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
    181   CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
    182   CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
    183   CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
    184   CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
    185   CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
    186   CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
    187   CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
    188   CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
    189   CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
    190   CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
    191   CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
    192   CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
    193   CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
    194   CheckParseEq("(?:foo)", "'foo'");
    195   CheckParseEq("(?: foo )", "' foo '");
    196   CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
    197   CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
    198   CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
    199   CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
    200   if (lookbehind) {
    201     CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
    202     CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
    203   } else {
    204     CHECK_PARSE_ERROR("foo(?<=bar)baz");
    205     CHECK_PARSE_ERROR("foo(?<!bar)baz");
    206   }
    207   CheckParseEq("()", "(^ %)");
    208   CheckParseEq("(?=)", "(-> + %)");
    209   CheckParseEq("[]", "^[\\x00-\\uffff]");  // Doesn't compile on windows
    210   CheckParseEq("[^]", "[\\x00-\\uffff]");  // \uffff isn't in codepage 1252
    211   CheckParseEq("[x]", "[x]");
    212   CheckParseEq("[xyz]", "[x y z]");
    213   CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
    214   CheckParseEq("[-123]", "[- 1 2 3]");
    215   CheckParseEq("[^123]", "^[1 2 3]");
    216   CheckParseEq("]", "']'");
    217   CheckParseEq("}", "'}'");
    218   CheckParseEq("[a-b-c]", "[a-b - c]");
    219   CheckParseEq("[\\d]", "[0-9]");
    220   CheckParseEq("[x\\dz]", "[x 0-9 z]");
    221   CheckParseEq("[\\d-z]", "[0-9 - z]");
    222   CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
    223   CheckParseEq("[z-\\d]", "[z - 0-9]");
    224   // Control character outside character class.
    225   CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
    226   CheckParseEq("\\c!", "'\\c!'");
    227   CheckParseEq("\\c_", "'\\c_'");
    228   CheckParseEq("\\c~", "'\\c~'");
    229   CheckParseEq("\\c1", "'\\c1'");
    230   // Control character inside character class.
    231   CheckParseEq("[\\c!]", "[\\ c !]");
    232   CheckParseEq("[\\c_]", "[\\x1f]");
    233   CheckParseEq("[\\c~]", "[\\ c ~]");
    234   CheckParseEq("[\\ca]", "[\\x01]");
    235   CheckParseEq("[\\cz]", "[\\x1a]");
    236   CheckParseEq("[\\cA]", "[\\x01]");
    237   CheckParseEq("[\\cZ]", "[\\x1a]");
    238   CheckParseEq("[\\c1]", "[\\x11]");
    239 
    240   CheckParseEq("[a\\]c]", "[a ] c]");
    241   CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
    242   CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
    243   CheckParseEq("\\0", "'\\x00'");
    244   CheckParseEq("\\8", "'8'");
    245   CheckParseEq("\\9", "'9'");
    246   CheckParseEq("\\11", "'\\x09'");
    247   CheckParseEq("\\11a", "'\\x09a'");
    248   CheckParseEq("\\011", "'\\x09'");
    249   CheckParseEq("\\00011", "'\\x0011'");
    250   CheckParseEq("\\118", "'\\x098'");
    251   CheckParseEq("\\111", "'I'");
    252   CheckParseEq("\\1111", "'I1'");
    253   CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
    254   CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
    255   CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
    256   CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
    257   CheckParseEq("(x)(x)(x)\\1*",
    258                "(: (^ 'x') (^ 'x') (^ 'x')"
    259                " (# 0 - g (<- 1)))");
    260   CheckParseEq("(x)(x)(x)\\2*",
    261                "(: (^ 'x') (^ 'x') (^ 'x')"
    262                " (# 0 - g (<- 2)))");
    263   CheckParseEq("(x)(x)(x)\\3*",
    264                "(: (^ 'x') (^ 'x') (^ 'x')"
    265                " (# 0 - g (<- 3)))");
    266   CheckParseEq("(x)(x)(x)\\4*",
    267                "(: (^ 'x') (^ 'x') (^ 'x')"
    268                " (# 0 - g '\\x04'))");
    269   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
    270                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
    271                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
    272   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
    273                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
    274                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
    275   CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
    276   CheckParseEq("(a\\1)", "(^ 'a')");
    277   CheckParseEq("(\\1a)", "(^ 'a')");
    278   CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
    279   CheckParseEq("(?=a)?a", "'a'");
    280   CheckParseEq("(?=a){0,10}a", "'a'");
    281   CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
    282   CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
    283   CheckParseEq("(?!a)?a", "'a'");
    284   CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
    285   CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
    286   CheckParseEq("(?!\\1(a\\1)\\1)\\1",
    287                "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
    288   CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
    289                "(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))");
    290   if (lookbehind) {
    291     CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
    292                  "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
    293   }
    294   CheckParseEq("[\\0]", "[\\x00]");
    295   CheckParseEq("[\\11]", "[\\x09]");
    296   CheckParseEq("[\\11a]", "[\\x09 a]");
    297   CheckParseEq("[\\011]", "[\\x09]");
    298   CheckParseEq("[\\00011]", "[\\x00 1 1]");
    299   CheckParseEq("[\\118]", "[\\x09 8]");
    300   CheckParseEq("[\\111]", "[I]");
    301   CheckParseEq("[\\1111]", "[I 1]");
    302   CheckParseEq("\\x34", "'\x34'");
    303   CheckParseEq("\\x60", "'\x60'");
    304   CheckParseEq("\\x3z", "'x3z'");
    305   CheckParseEq("\\c", "'\\c'");
    306   CheckParseEq("\\u0034", "'\x34'");
    307   CheckParseEq("\\u003z", "'u003z'");
    308   CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
    309 
    310   // Unicode regexps
    311   CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
    312   CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
    313                true);
    314   CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
    315                true);
    316   CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
    317   CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
    318 
    319   CHECK_SIMPLE("", false);
    320   CHECK_SIMPLE("a", true);
    321   CHECK_SIMPLE("a|b", false);
    322   CHECK_SIMPLE("a\\n", false);
    323   CHECK_SIMPLE("^a", false);
    324   CHECK_SIMPLE("a$", false);
    325   CHECK_SIMPLE("a\\b!", false);
    326   CHECK_SIMPLE("a\\Bb", false);
    327   CHECK_SIMPLE("a*", false);
    328   CHECK_SIMPLE("a*?", false);
    329   CHECK_SIMPLE("a?", false);
    330   CHECK_SIMPLE("a??", false);
    331   CHECK_SIMPLE("a{0,1}?", false);
    332   CHECK_SIMPLE("a{1,1}?", false);
    333   CHECK_SIMPLE("a{1,2}?", false);
    334   CHECK_SIMPLE("a+?", false);
    335   CHECK_SIMPLE("(a)", false);
    336   CHECK_SIMPLE("(a)\\1", false);
    337   CHECK_SIMPLE("(\\1a)", false);
    338   CHECK_SIMPLE("\\1(a)", false);
    339   CHECK_SIMPLE("a\\s", false);
    340   CHECK_SIMPLE("a\\S", false);
    341   CHECK_SIMPLE("a\\d", false);
    342   CHECK_SIMPLE("a\\D", false);
    343   CHECK_SIMPLE("a\\w", false);
    344   CHECK_SIMPLE("a\\W", false);
    345   CHECK_SIMPLE("a.", false);
    346   CHECK_SIMPLE("a\\q", false);
    347   CHECK_SIMPLE("a[a]", false);
    348   CHECK_SIMPLE("a[^a]", false);
    349   CHECK_SIMPLE("a[a-z]", false);
    350   CHECK_SIMPLE("a[\\q]", false);
    351   CHECK_SIMPLE("a(?:b)", false);
    352   CHECK_SIMPLE("a(?=b)", false);
    353   CHECK_SIMPLE("a(?!b)", false);
    354   CHECK_SIMPLE("\\x60", false);
    355   CHECK_SIMPLE("\\u0060", false);
    356   CHECK_SIMPLE("\\cA", false);
    357   CHECK_SIMPLE("\\q", false);
    358   CHECK_SIMPLE("\\1112", false);
    359   CHECK_SIMPLE("\\0", false);
    360   CHECK_SIMPLE("(a)\\1", false);
    361   CHECK_SIMPLE("(?=a)?a", false);
    362   CHECK_SIMPLE("(?!a)?a\\1", false);
    363   CHECK_SIMPLE("(?:(?=a))a\\1", false);
    364 
    365   CheckParseEq("a{}", "'a{}'");
    366   CheckParseEq("a{,}", "'a{,}'");
    367   CheckParseEq("a{", "'a{'");
    368   CheckParseEq("a{z}", "'a{z}'");
    369   CheckParseEq("a{1z}", "'a{1z}'");
    370   CheckParseEq("a{12z}", "'a{12z}'");
    371   CheckParseEq("a{12,", "'a{12,'");
    372   CheckParseEq("a{12,3b", "'a{12,3b'");
    373   CheckParseEq("{}", "'{}'");
    374   CheckParseEq("{,}", "'{,}'");
    375   CheckParseEq("{", "'{'");
    376   CheckParseEq("{z}", "'{z}'");
    377   CheckParseEq("{1z}", "'{1z}'");
    378   CheckParseEq("{12z}", "'{12z}'");
    379   CheckParseEq("{12,", "'{12,'");
    380   CheckParseEq("{12,3b", "'{12,3b'");
    381 
    382   CHECK_MIN_MAX("a", 1, 1);
    383   CHECK_MIN_MAX("abc", 3, 3);
    384   CHECK_MIN_MAX("a[bc]d", 3, 3);
    385   CHECK_MIN_MAX("a|bc", 1, 2);
    386   CHECK_MIN_MAX("ab|c", 1, 2);
    387   CHECK_MIN_MAX("a||bc", 0, 2);
    388   CHECK_MIN_MAX("|", 0, 0);
    389   CHECK_MIN_MAX("(?:ab)", 2, 2);
    390   CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
    391   CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
    392   CHECK_MIN_MAX("(ab)", 2, 2);
    393   CHECK_MIN_MAX("(ab|cde)", 2, 3);
    394   CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
    395   CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
    396   CHECK_MIN_MAX("(?:ab)?", 0, 2);
    397   CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
    398   CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
    399   CHECK_MIN_MAX("a?", 0, 1);
    400   CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
    401   CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
    402   CHECK_MIN_MAX("a??", 0, 1);
    403   CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
    404   CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
    405   CHECK_MIN_MAX("(?:a?)?", 0, 1);
    406   CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
    407   CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
    408   CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
    409   CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
    410   CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
    411   CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
    412   CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
    413   CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
    414   CHECK_MIN_MAX("a{0}", 0, 0);
    415   CHECK_MIN_MAX("(?:a+){0}", 0, 0);
    416   CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
    417   CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
    418   CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
    419   CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
    420   CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
    421   CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
    422   CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
    423   CHECK_MIN_MAX("a\\bc", 2, 2);
    424   CHECK_MIN_MAX("a\\Bc", 2, 2);
    425   CHECK_MIN_MAX("a\\sc", 3, 3);
    426   CHECK_MIN_MAX("a\\Sc", 3, 3);
    427   CHECK_MIN_MAX("a(?=b)c", 2, 2);
    428   CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
    429   CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
    430 }
    431 
    432 
    433 TEST(ParserWithLookbehind) {
    434   TestRegExpParser(true);  // Lookbehind enabled.
    435 }
    436 
    437 
    438 TEST(ParserWithoutLookbehind) {
    439   TestRegExpParser(true);  // Lookbehind enabled.
    440 }
    441 
    442 
    443 TEST(ParserRegression) {
    444   CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
    445   CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
    446   CheckParseEq("{", "'{'");
    447   CheckParseEq("a|", "(| 'a' %)");
    448 }
    449 
    450 static void ExpectError(const char* input,
    451                         const char* expected) {
    452   v8::HandleScope scope(CcTest::isolate());
    453   Zone zone;
    454   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
    455   RegExpCompileData result;
    456   CHECK(!v8::internal::RegExpParser::ParseRegExp(
    457             CcTest::i_isolate(), &zone, &reader, false, false, &result));
    458   CHECK(result.tree == NULL);
    459   CHECK(!result.error.is_null());
    460   v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
    461   CHECK_EQ(0, strcmp(expected, str.get()));
    462 }
    463 
    464 
    465 TEST(Errors) {
    466   const char* kEndBackslash = "\\ at end of pattern";
    467   ExpectError("\\", kEndBackslash);
    468   const char* kUnterminatedGroup = "Unterminated group";
    469   ExpectError("(foo", kUnterminatedGroup);
    470   const char* kInvalidGroup = "Invalid group";
    471   ExpectError("(?", kInvalidGroup);
    472   const char* kUnterminatedCharacterClass = "Unterminated character class";
    473   ExpectError("[", kUnterminatedCharacterClass);
    474   ExpectError("[a-", kUnterminatedCharacterClass);
    475   const char* kNothingToRepeat = "Nothing to repeat";
    476   ExpectError("*", kNothingToRepeat);
    477   ExpectError("?", kNothingToRepeat);
    478   ExpectError("+", kNothingToRepeat);
    479   ExpectError("{1}", kNothingToRepeat);
    480   ExpectError("{1,2}", kNothingToRepeat);
    481   ExpectError("{1,}", kNothingToRepeat);
    482 
    483   // Check that we don't allow more than kMaxCapture captures
    484   const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
    485   const char* kTooManyCaptures = "Too many captures";
    486   std::ostringstream os;
    487   for (int i = 0; i <= kMaxCaptures; i++) {
    488     os << "()";
    489   }
    490   ExpectError(os.str().c_str(), kTooManyCaptures);
    491 }
    492 
    493 
    494 static bool IsDigit(uc16 c) {
    495   return ('0' <= c && c <= '9');
    496 }
    497 
    498 
    499 static bool NotDigit(uc16 c) {
    500   return !IsDigit(c);
    501 }
    502 
    503 
    504 static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
    505   // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
    506   // WhiteSpace (7.2) and LineTerminator (7.3) values.
    507   return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
    508 }
    509 
    510 
    511 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
    512   return !IsWhiteSpaceOrLineTerminator(c);
    513 }
    514 
    515 
    516 static bool NotWord(uc16 c) {
    517   return !IsRegExpWord(c);
    518 }
    519 
    520 
    521 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
    522   Zone zone;
    523   ZoneList<CharacterRange>* ranges =
    524       new(&zone) ZoneList<CharacterRange>(2, &zone);
    525   CharacterRange::AddClassEscape(c, ranges, &zone);
    526   for (unsigned i = 0; i < (1 << 16); i++) {
    527     bool in_class = false;
    528     for (int j = 0; !in_class && j < ranges->length(); j++) {
    529       CharacterRange& range = ranges->at(j);
    530       in_class = (range.from() <= i && i <= range.to());
    531     }
    532     CHECK_EQ(pred(i), in_class);
    533   }
    534 }
    535 
    536 
    537 TEST(CharacterClassEscapes) {
    538   TestCharacterClassEscapes('.', IsRegExpNewline);
    539   TestCharacterClassEscapes('d', IsDigit);
    540   TestCharacterClassEscapes('D', NotDigit);
    541   TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
    542   TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
    543   TestCharacterClassEscapes('w', IsRegExpWord);
    544   TestCharacterClassEscapes('W', NotWord);
    545 }
    546 
    547 
    548 static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
    549                            bool is_one_byte, Zone* zone) {
    550   Isolate* isolate = CcTest::i_isolate();
    551   FlatStringReader reader(isolate, CStrVector(input));
    552   RegExpCompileData compile_data;
    553   if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
    554                                                &reader, multiline, unicode,
    555                                                &compile_data))
    556     return NULL;
    557   Handle<String> pattern = isolate->factory()
    558                                ->NewStringFromUtf8(CStrVector(input))
    559                                .ToHandleChecked();
    560   Handle<String> sample_subject =
    561       isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
    562   RegExpEngine::Compile(isolate, zone, &compile_data, false, false, multiline,
    563                         false, pattern, sample_subject, is_one_byte);
    564   return compile_data.node;
    565 }
    566 
    567 
    568 static void Execute(const char* input, bool multiline, bool unicode,
    569                     bool is_one_byte, bool dot_output = false) {
    570   v8::HandleScope scope(CcTest::isolate());
    571   Zone zone;
    572   RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
    573   USE(node);
    574 #ifdef DEBUG
    575   if (dot_output) {
    576     RegExpEngine::DotPrint(input, node, false);
    577   }
    578 #endif  // DEBUG
    579 }
    580 
    581 
    582 class TestConfig {
    583  public:
    584   typedef int Key;
    585   typedef int Value;
    586   static const int kNoKey;
    587   static int NoValue() { return 0; }
    588   static inline int Compare(int a, int b) {
    589     if (a < b)
    590       return -1;
    591     else if (a > b)
    592       return 1;
    593     else
    594       return 0;
    595   }
    596 };
    597 
    598 
    599 const int TestConfig::kNoKey = 0;
    600 
    601 
    602 static unsigned PseudoRandom(int i, int j) {
    603   return ~(~((i * 781) ^ (j * 329)));
    604 }
    605 
    606 
    607 TEST(SplayTreeSimple) {
    608   static const unsigned kLimit = 1000;
    609   Zone zone;
    610   ZoneSplayTree<TestConfig> tree(&zone);
    611   bool seen[kLimit];
    612   for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
    613 #define CHECK_MAPS_EQUAL() do {                                      \
    614     for (unsigned k = 0; k < kLimit; k++)                            \
    615       CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
    616   } while (false)
    617   for (int i = 0; i < 50; i++) {
    618     for (int j = 0; j < 50; j++) {
    619       int next = PseudoRandom(i, j) % kLimit;
    620       if (seen[next]) {
    621         // We've already seen this one.  Check the value and remove
    622         // it.
    623         ZoneSplayTree<TestConfig>::Locator loc;
    624         CHECK(tree.Find(next, &loc));
    625         CHECK_EQ(next, loc.key());
    626         CHECK_EQ(3 * next, loc.value());
    627         tree.Remove(next);
    628         seen[next] = false;
    629         CHECK_MAPS_EQUAL();
    630       } else {
    631         // Check that it wasn't there already and then add it.
    632         ZoneSplayTree<TestConfig>::Locator loc;
    633         CHECK(!tree.Find(next, &loc));
    634         CHECK(tree.Insert(next, &loc));
    635         CHECK_EQ(next, loc.key());
    636         loc.set_value(3 * next);
    637         seen[next] = true;
    638         CHECK_MAPS_EQUAL();
    639       }
    640       int val = PseudoRandom(j, i) % kLimit;
    641       if (seen[val]) {
    642         ZoneSplayTree<TestConfig>::Locator loc;
    643         CHECK(tree.FindGreatestLessThan(val, &loc));
    644         CHECK_EQ(loc.key(), val);
    645         break;
    646       }
    647       val = PseudoRandom(i + j, i - j) % kLimit;
    648       if (seen[val]) {
    649         ZoneSplayTree<TestConfig>::Locator loc;
    650         CHECK(tree.FindLeastGreaterThan(val, &loc));
    651         CHECK_EQ(loc.key(), val);
    652         break;
    653       }
    654     }
    655   }
    656 }
    657 
    658 
    659 TEST(DispatchTableConstruction) {
    660   // Initialize test data.
    661   static const int kLimit = 1000;
    662   static const int kRangeCount = 8;
    663   static const int kRangeSize = 16;
    664   uc16 ranges[kRangeCount][2 * kRangeSize];
    665   for (int i = 0; i < kRangeCount; i++) {
    666     Vector<uc16> range(ranges[i], 2 * kRangeSize);
    667     for (int j = 0; j < 2 * kRangeSize; j++) {
    668       range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
    669     }
    670     range.Sort();
    671     for (int j = 1; j < 2 * kRangeSize; j++) {
    672       CHECK(range[j-1] <= range[j]);
    673     }
    674   }
    675   // Enter test data into dispatch table.
    676   Zone zone;
    677   DispatchTable table(&zone);
    678   for (int i = 0; i < kRangeCount; i++) {
    679     uc16* range = ranges[i];
    680     for (int j = 0; j < 2 * kRangeSize; j += 2)
    681       table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
    682   }
    683   // Check that the table looks as we would expect
    684   for (int p = 0; p < kLimit; p++) {
    685     OutSet* outs = table.Get(p);
    686     for (int j = 0; j < kRangeCount; j++) {
    687       uc16* range = ranges[j];
    688       bool is_on = false;
    689       for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
    690         is_on = (range[k] <= p && p <= range[k + 1]);
    691       CHECK_EQ(is_on, outs->Get(j));
    692     }
    693   }
    694 }
    695 
    696 
    697 // Test of debug-only syntax.
    698 #ifdef DEBUG
    699 
    700 TEST(ParsePossessiveRepetition) {
    701   bool old_flag_value = FLAG_regexp_possessive_quantifier;
    702 
    703   // Enable possessive quantifier syntax.
    704   FLAG_regexp_possessive_quantifier = true;
    705 
    706   CheckParseEq("a*+", "(# 0 - p 'a')");
    707   CheckParseEq("a++", "(# 1 - p 'a')");
    708   CheckParseEq("a?+", "(# 0 1 p 'a')");
    709   CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
    710   CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
    711 
    712   // Disable possessive quantifier syntax.
    713   FLAG_regexp_possessive_quantifier = false;
    714 
    715   CHECK_PARSE_ERROR("a*+");
    716   CHECK_PARSE_ERROR("a++");
    717   CHECK_PARSE_ERROR("a?+");
    718   CHECK_PARSE_ERROR("a{10,20}+");
    719   CHECK_PARSE_ERROR("a{10,20}+b");
    720 
    721   FLAG_regexp_possessive_quantifier = old_flag_value;
    722 }
    723 
    724 #endif
    725 
    726 // Tests of interpreter.
    727 
    728 
    729 #ifndef V8_INTERPRETED_REGEXP
    730 
    731 #if V8_TARGET_ARCH_IA32
    732 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
    733 #elif V8_TARGET_ARCH_X64
    734 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
    735 #elif V8_TARGET_ARCH_ARM
    736 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
    737 #elif V8_TARGET_ARCH_ARM64
    738 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
    739 #elif V8_TARGET_ARCH_PPC
    740 typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
    741 #elif V8_TARGET_ARCH_MIPS
    742 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
    743 #elif V8_TARGET_ARCH_MIPS64
    744 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
    745 #elif V8_TARGET_ARCH_X87
    746 typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
    747 #endif
    748 
    749 class ContextInitializer {
    750  public:
    751   ContextInitializer()
    752       : scope_(CcTest::isolate()),
    753         env_(v8::Context::New(CcTest::isolate())) {
    754     env_->Enter();
    755   }
    756   ~ContextInitializer() {
    757     env_->Exit();
    758   }
    759  private:
    760   v8::HandleScope scope_;
    761   v8::Local<v8::Context> env_;
    762 };
    763 
    764 
    765 static ArchRegExpMacroAssembler::Result Execute(Code* code,
    766                                                 String* input,
    767                                                 int start_offset,
    768                                                 const byte* input_start,
    769                                                 const byte* input_end,
    770                                                 int* captures) {
    771   return NativeRegExpMacroAssembler::Execute(
    772       code,
    773       input,
    774       start_offset,
    775       input_start,
    776       input_end,
    777       captures,
    778       0,
    779       CcTest::i_isolate());
    780 }
    781 
    782 
    783 TEST(MacroAssemblerNativeSuccess) {
    784   v8::V8::Initialize();
    785   ContextInitializer initializer;
    786   Isolate* isolate = CcTest::i_isolate();
    787   Factory* factory = isolate->factory();
    788   Zone zone;
    789 
    790   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    791                              4);
    792 
    793   m.Succeed();
    794 
    795   Handle<String> source = factory->NewStringFromStaticChars("");
    796   Handle<Object> code_object = m.GetCode(source);
    797   Handle<Code> code = Handle<Code>::cast(code_object);
    798 
    799   int captures[4] = {42, 37, 87, 117};
    800   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    801   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    802   const byte* start_adr =
    803       reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
    804 
    805   NativeRegExpMacroAssembler::Result result =
    806       Execute(*code,
    807               *input,
    808               0,
    809               start_adr,
    810               start_adr + seq_input->length(),
    811               captures);
    812 
    813   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    814   CHECK_EQ(-1, captures[0]);
    815   CHECK_EQ(-1, captures[1]);
    816   CHECK_EQ(-1, captures[2]);
    817   CHECK_EQ(-1, captures[3]);
    818 }
    819 
    820 
    821 TEST(MacroAssemblerNativeSimple) {
    822   v8::V8::Initialize();
    823   ContextInitializer initializer;
    824   Isolate* isolate = CcTest::i_isolate();
    825   Factory* factory = isolate->factory();
    826   Zone zone;
    827 
    828   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    829                              4);
    830 
    831   Label fail, backtrack;
    832   m.PushBacktrack(&fail);
    833   m.CheckNotAtStart(0, NULL);
    834   m.LoadCurrentCharacter(2, NULL);
    835   m.CheckNotCharacter('o', NULL);
    836   m.LoadCurrentCharacter(1, NULL, false);
    837   m.CheckNotCharacter('o', NULL);
    838   m.LoadCurrentCharacter(0, NULL, false);
    839   m.CheckNotCharacter('f', NULL);
    840   m.WriteCurrentPositionToRegister(0, 0);
    841   m.WriteCurrentPositionToRegister(1, 3);
    842   m.AdvanceCurrentPosition(3);
    843   m.PushBacktrack(&backtrack);
    844   m.Succeed();
    845   m.Bind(&backtrack);
    846   m.Backtrack();
    847   m.Bind(&fail);
    848   m.Fail();
    849 
    850   Handle<String> source = factory->NewStringFromStaticChars("^foo");
    851   Handle<Object> code_object = m.GetCode(source);
    852   Handle<Code> code = Handle<Code>::cast(code_object);
    853 
    854   int captures[4] = {42, 37, 87, 117};
    855   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    856   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    857   Address start_adr = seq_input->GetCharsAddress();
    858 
    859   NativeRegExpMacroAssembler::Result result =
    860       Execute(*code,
    861               *input,
    862               0,
    863               start_adr,
    864               start_adr + input->length(),
    865               captures);
    866 
    867   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    868   CHECK_EQ(0, captures[0]);
    869   CHECK_EQ(3, captures[1]);
    870   CHECK_EQ(-1, captures[2]);
    871   CHECK_EQ(-1, captures[3]);
    872 
    873   input = factory->NewStringFromStaticChars("barbarbar");
    874   seq_input = Handle<SeqOneByteString>::cast(input);
    875   start_adr = seq_input->GetCharsAddress();
    876 
    877   result = Execute(*code,
    878                    *input,
    879                    0,
    880                    start_adr,
    881                    start_adr + input->length(),
    882                    captures);
    883 
    884   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    885 }
    886 
    887 
    888 TEST(MacroAssemblerNativeSimpleUC16) {
    889   v8::V8::Initialize();
    890   ContextInitializer initializer;
    891   Isolate* isolate = CcTest::i_isolate();
    892   Factory* factory = isolate->factory();
    893   Zone zone;
    894 
    895   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
    896                              4);
    897 
    898   Label fail, backtrack;
    899   m.PushBacktrack(&fail);
    900   m.CheckNotAtStart(0, NULL);
    901   m.LoadCurrentCharacter(2, NULL);
    902   m.CheckNotCharacter('o', NULL);
    903   m.LoadCurrentCharacter(1, NULL, false);
    904   m.CheckNotCharacter('o', NULL);
    905   m.LoadCurrentCharacter(0, NULL, false);
    906   m.CheckNotCharacter('f', NULL);
    907   m.WriteCurrentPositionToRegister(0, 0);
    908   m.WriteCurrentPositionToRegister(1, 3);
    909   m.AdvanceCurrentPosition(3);
    910   m.PushBacktrack(&backtrack);
    911   m.Succeed();
    912   m.Bind(&backtrack);
    913   m.Backtrack();
    914   m.Bind(&fail);
    915   m.Fail();
    916 
    917   Handle<String> source = factory->NewStringFromStaticChars("^foo");
    918   Handle<Object> code_object = m.GetCode(source);
    919   Handle<Code> code = Handle<Code>::cast(code_object);
    920 
    921   int captures[4] = {42, 37, 87, 117};
    922   const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
    923                               static_cast<uc16>(0x2603)};
    924   Handle<String> input = factory->NewStringFromTwoByte(
    925       Vector<const uc16>(input_data, 6)).ToHandleChecked();
    926   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
    927   Address start_adr = seq_input->GetCharsAddress();
    928 
    929   NativeRegExpMacroAssembler::Result result =
    930       Execute(*code,
    931               *input,
    932               0,
    933               start_adr,
    934               start_adr + input->length(),
    935               captures);
    936 
    937   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    938   CHECK_EQ(0, captures[0]);
    939   CHECK_EQ(3, captures[1]);
    940   CHECK_EQ(-1, captures[2]);
    941   CHECK_EQ(-1, captures[3]);
    942 
    943   const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
    944                                static_cast<uc16>(0x2603)};
    945   input = factory->NewStringFromTwoByte(
    946       Vector<const uc16>(input_data2, 9)).ToHandleChecked();
    947   seq_input = Handle<SeqTwoByteString>::cast(input);
    948   start_adr = seq_input->GetCharsAddress();
    949 
    950   result = Execute(*code,
    951                    *input,
    952                    0,
    953                    start_adr,
    954                    start_adr + input->length() * 2,
    955                    captures);
    956 
    957   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    958 }
    959 
    960 
    961 TEST(MacroAssemblerNativeBacktrack) {
    962   v8::V8::Initialize();
    963   ContextInitializer initializer;
    964   Isolate* isolate = CcTest::i_isolate();
    965   Factory* factory = isolate->factory();
    966   Zone zone;
    967 
    968   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    969                              0);
    970 
    971   Label fail;
    972   Label backtrack;
    973   m.LoadCurrentCharacter(10, &fail);
    974   m.Succeed();
    975   m.Bind(&fail);
    976   m.PushBacktrack(&backtrack);
    977   m.LoadCurrentCharacter(10, NULL);
    978   m.Succeed();
    979   m.Bind(&backtrack);
    980   m.Fail();
    981 
    982   Handle<String> source = factory->NewStringFromStaticChars("..........");
    983   Handle<Object> code_object = m.GetCode(source);
    984   Handle<Code> code = Handle<Code>::cast(code_object);
    985 
    986   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    987   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    988   Address start_adr = seq_input->GetCharsAddress();
    989 
    990   NativeRegExpMacroAssembler::Result result =
    991       Execute(*code,
    992               *input,
    993               0,
    994               start_adr,
    995               start_adr + input->length(),
    996               NULL);
    997 
    998   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    999 }
   1000 
   1001 
   1002 TEST(MacroAssemblerNativeBackReferenceLATIN1) {
   1003   v8::V8::Initialize();
   1004   ContextInitializer initializer;
   1005   Isolate* isolate = CcTest::i_isolate();
   1006   Factory* factory = isolate->factory();
   1007   Zone zone;
   1008 
   1009   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
   1010                              4);
   1011 
   1012   m.WriteCurrentPositionToRegister(0, 0);
   1013   m.AdvanceCurrentPosition(2);
   1014   m.WriteCurrentPositionToRegister(1, 0);
   1015   Label nomatch;
   1016   m.CheckNotBackReference(0, false, &nomatch);
   1017   m.Fail();
   1018   m.Bind(&nomatch);
   1019   m.AdvanceCurrentPosition(2);
   1020   Label missing_match;
   1021   m.CheckNotBackReference(0, false, &missing_match);
   1022   m.WriteCurrentPositionToRegister(2, 0);
   1023   m.Succeed();
   1024   m.Bind(&missing_match);
   1025   m.Fail();
   1026 
   1027   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
   1028   Handle<Object> code_object = m.GetCode(source);
   1029   Handle<Code> code = Handle<Code>::cast(code_object);
   1030 
   1031   Handle<String> input = factory->NewStringFromStaticChars("fooofo");
   1032   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1033   Address start_adr = seq_input->GetCharsAddress();
   1034 
   1035   int output[4];
   1036   NativeRegExpMacroAssembler::Result result =
   1037       Execute(*code,
   1038               *input,
   1039               0,
   1040               start_adr,
   1041               start_adr + input->length(),
   1042               output);
   1043 
   1044   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1045   CHECK_EQ(0, output[0]);
   1046   CHECK_EQ(2, output[1]);
   1047   CHECK_EQ(6, output[2]);
   1048   CHECK_EQ(-1, output[3]);
   1049 }
   1050 
   1051 
   1052 TEST(MacroAssemblerNativeBackReferenceUC16) {
   1053   v8::V8::Initialize();
   1054   ContextInitializer initializer;
   1055   Isolate* isolate = CcTest::i_isolate();
   1056   Factory* factory = isolate->factory();
   1057   Zone zone;
   1058 
   1059   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
   1060                              4);
   1061 
   1062   m.WriteCurrentPositionToRegister(0, 0);
   1063   m.AdvanceCurrentPosition(2);
   1064   m.WriteCurrentPositionToRegister(1, 0);
   1065   Label nomatch;
   1066   m.CheckNotBackReference(0, false, &nomatch);
   1067   m.Fail();
   1068   m.Bind(&nomatch);
   1069   m.AdvanceCurrentPosition(2);
   1070   Label missing_match;
   1071   m.CheckNotBackReference(0, false, &missing_match);
   1072   m.WriteCurrentPositionToRegister(2, 0);
   1073   m.Succeed();
   1074   m.Bind(&missing_match);
   1075   m.Fail();
   1076 
   1077   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
   1078   Handle<Object> code_object = m.GetCode(source);
   1079   Handle<Code> code = Handle<Code>::cast(code_object);
   1080 
   1081   const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
   1082   Handle<String> input = factory->NewStringFromTwoByte(
   1083       Vector<const uc16>(input_data, 6)).ToHandleChecked();
   1084   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
   1085   Address start_adr = seq_input->GetCharsAddress();
   1086 
   1087   int output[4];
   1088   NativeRegExpMacroAssembler::Result result =
   1089       Execute(*code,
   1090               *input,
   1091               0,
   1092               start_adr,
   1093               start_adr + input->length() * 2,
   1094               output);
   1095 
   1096   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1097   CHECK_EQ(0, output[0]);
   1098   CHECK_EQ(2, output[1]);
   1099   CHECK_EQ(6, output[2]);
   1100   CHECK_EQ(-1, output[3]);
   1101 }
   1102 
   1103 
   1104 
   1105 TEST(MacroAssemblernativeAtStart) {
   1106   v8::V8::Initialize();
   1107   ContextInitializer initializer;
   1108   Isolate* isolate = CcTest::i_isolate();
   1109   Factory* factory = isolate->factory();
   1110   Zone zone;
   1111 
   1112   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
   1113                              0);
   1114 
   1115   Label not_at_start, newline, fail;
   1116   m.CheckNotAtStart(0, &not_at_start);
   1117   // Check that prevchar = '\n' and current = 'f'.
   1118   m.CheckCharacter('\n', &newline);
   1119   m.Bind(&fail);
   1120   m.Fail();
   1121   m.Bind(&newline);
   1122   m.LoadCurrentCharacter(0, &fail);
   1123   m.CheckNotCharacter('f', &fail);
   1124   m.Succeed();
   1125 
   1126   m.Bind(&not_at_start);
   1127   // Check that prevchar = 'o' and current = 'b'.
   1128   Label prevo;
   1129   m.CheckCharacter('o', &prevo);
   1130   m.Fail();
   1131   m.Bind(&prevo);
   1132   m.LoadCurrentCharacter(0, &fail);
   1133   m.CheckNotCharacter('b', &fail);
   1134   m.Succeed();
   1135 
   1136   Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
   1137   Handle<Object> code_object = m.GetCode(source);
   1138   Handle<Code> code = Handle<Code>::cast(code_object);
   1139 
   1140   Handle<String> input = factory->NewStringFromStaticChars("foobar");
   1141   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1142   Address start_adr = seq_input->GetCharsAddress();
   1143 
   1144   NativeRegExpMacroAssembler::Result result =
   1145       Execute(*code,
   1146               *input,
   1147               0,
   1148               start_adr,
   1149               start_adr + input->length(),
   1150               NULL);
   1151 
   1152   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1153 
   1154   result = Execute(*code,
   1155                    *input,
   1156                    3,
   1157                    start_adr + 3,
   1158                    start_adr + input->length(),
   1159                    NULL);
   1160 
   1161   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1162 }
   1163 
   1164 
   1165 TEST(MacroAssemblerNativeBackRefNoCase) {
   1166   v8::V8::Initialize();
   1167   ContextInitializer initializer;
   1168   Isolate* isolate = CcTest::i_isolate();
   1169   Factory* factory = isolate->factory();
   1170   Zone zone;
   1171 
   1172   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
   1173                              4);
   1174 
   1175   Label fail, succ;
   1176 
   1177   m.WriteCurrentPositionToRegister(0, 0);
   1178   m.WriteCurrentPositionToRegister(2, 0);
   1179   m.AdvanceCurrentPosition(3);
   1180   m.WriteCurrentPositionToRegister(3, 0);
   1181   m.CheckNotBackReferenceIgnoreCase(2, false, &fail);  // Match "AbC".
   1182   m.CheckNotBackReferenceIgnoreCase(2, false, &fail);  // Match "ABC".
   1183   Label expected_fail;
   1184   m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
   1185   m.Bind(&fail);
   1186   m.Fail();
   1187 
   1188   m.Bind(&expected_fail);
   1189   m.AdvanceCurrentPosition(3);  // Skip "xYz"
   1190   m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
   1191   m.Fail();
   1192 
   1193   m.Bind(&succ);
   1194   m.WriteCurrentPositionToRegister(1, 0);
   1195   m.Succeed();
   1196 
   1197   Handle<String> source =
   1198       factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
   1199   Handle<Object> code_object = m.GetCode(source);
   1200   Handle<Code> code = Handle<Code>::cast(code_object);
   1201 
   1202   Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
   1203   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1204   Address start_adr = seq_input->GetCharsAddress();
   1205 
   1206   int output[4];
   1207   NativeRegExpMacroAssembler::Result result =
   1208       Execute(*code,
   1209               *input,
   1210               0,
   1211               start_adr,
   1212               start_adr + input->length(),
   1213               output);
   1214 
   1215   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1216   CHECK_EQ(0, output[0]);
   1217   CHECK_EQ(12, output[1]);
   1218   CHECK_EQ(0, output[2]);
   1219   CHECK_EQ(3, output[3]);
   1220 }
   1221 
   1222 
   1223 
   1224 TEST(MacroAssemblerNativeRegisters) {
   1225   v8::V8::Initialize();
   1226   ContextInitializer initializer;
   1227   Isolate* isolate = CcTest::i_isolate();
   1228   Factory* factory = isolate->factory();
   1229   Zone zone;
   1230 
   1231   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
   1232                              6);
   1233 
   1234   uc16 foo_chars[3] = {'f', 'o', 'o'};
   1235   Vector<const uc16> foo(foo_chars, 3);
   1236 
   1237   enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
   1238   Label fail;
   1239   Label backtrack;
   1240   m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
   1241   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
   1242   m.PushBacktrack(&backtrack);
   1243   m.WriteStackPointerToRegister(sp);
   1244   // Fill stack and registers
   1245   m.AdvanceCurrentPosition(2);
   1246   m.WriteCurrentPositionToRegister(out1, 0);
   1247   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
   1248   m.PushBacktrack(&fail);
   1249   // Drop backtrack stack frames.
   1250   m.ReadStackPointerFromRegister(sp);
   1251   // And take the first backtrack (to &backtrack)
   1252   m.Backtrack();
   1253 
   1254   m.PushCurrentPosition();
   1255   m.AdvanceCurrentPosition(2);
   1256   m.PopCurrentPosition();
   1257 
   1258   m.Bind(&backtrack);
   1259   m.PopRegister(out1);
   1260   m.ReadCurrentPositionFromRegister(out1);
   1261   m.AdvanceCurrentPosition(3);
   1262   m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
   1263 
   1264   Label loop;
   1265   m.SetRegister(loop_cnt, 0);  // loop counter
   1266   m.Bind(&loop);
   1267   m.AdvanceRegister(loop_cnt, 1);
   1268   m.AdvanceCurrentPosition(1);
   1269   m.IfRegisterLT(loop_cnt, 3, &loop);
   1270   m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
   1271 
   1272   Label loop2;
   1273   m.SetRegister(loop_cnt, 2);  // loop counter
   1274   m.Bind(&loop2);
   1275   m.AdvanceRegister(loop_cnt, -1);
   1276   m.AdvanceCurrentPosition(1);
   1277   m.IfRegisterGE(loop_cnt, 0, &loop2);
   1278   m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
   1279 
   1280   Label loop3;
   1281   Label exit_loop3;
   1282   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
   1283   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
   1284   m.ReadCurrentPositionFromRegister(out3);
   1285   m.Bind(&loop3);
   1286   m.AdvanceCurrentPosition(1);
   1287   m.CheckGreedyLoop(&exit_loop3);
   1288   m.GoTo(&loop3);
   1289   m.Bind(&exit_loop3);
   1290   m.PopCurrentPosition();
   1291   m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
   1292 
   1293   m.Succeed();
   1294 
   1295   m.Bind(&fail);
   1296   m.Fail();
   1297 
   1298   Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
   1299   Handle<Object> code_object = m.GetCode(source);
   1300   Handle<Code> code = Handle<Code>::cast(code_object);
   1301 
   1302   // String long enough for test (content doesn't matter).
   1303   Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
   1304   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1305   Address start_adr = seq_input->GetCharsAddress();
   1306 
   1307   int output[6];
   1308   NativeRegExpMacroAssembler::Result result =
   1309       Execute(*code,
   1310               *input,
   1311               0,
   1312               start_adr,
   1313               start_adr + input->length(),
   1314               output);
   1315 
   1316   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1317   CHECK_EQ(0, output[0]);
   1318   CHECK_EQ(3, output[1]);
   1319   CHECK_EQ(6, output[2]);
   1320   CHECK_EQ(9, output[3]);
   1321   CHECK_EQ(9, output[4]);
   1322   CHECK_EQ(-1, output[5]);
   1323 }
   1324 
   1325 
   1326 TEST(MacroAssemblerStackOverflow) {
   1327   v8::V8::Initialize();
   1328   ContextInitializer initializer;
   1329   Isolate* isolate = CcTest::i_isolate();
   1330   Factory* factory = isolate->factory();
   1331   Zone zone;
   1332 
   1333   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
   1334                              0);
   1335 
   1336   Label loop;
   1337   m.Bind(&loop);
   1338   m.PushBacktrack(&loop);
   1339   m.GoTo(&loop);
   1340 
   1341   Handle<String> source =
   1342       factory->NewStringFromStaticChars("<stack overflow test>");
   1343   Handle<Object> code_object = m.GetCode(source);
   1344   Handle<Code> code = Handle<Code>::cast(code_object);
   1345 
   1346   // String long enough for test (content doesn't matter).
   1347   Handle<String> input = factory->NewStringFromStaticChars("dummy");
   1348   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1349   Address start_adr = seq_input->GetCharsAddress();
   1350 
   1351   NativeRegExpMacroAssembler::Result result =
   1352       Execute(*code,
   1353               *input,
   1354               0,
   1355               start_adr,
   1356               start_adr + input->length(),
   1357               NULL);
   1358 
   1359   CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
   1360   CHECK(isolate->has_pending_exception());
   1361   isolate->clear_pending_exception();
   1362 }
   1363 
   1364 
   1365 TEST(MacroAssemblerNativeLotsOfRegisters) {
   1366   v8::V8::Initialize();
   1367   ContextInitializer initializer;
   1368   Isolate* isolate = CcTest::i_isolate();
   1369   Factory* factory = isolate->factory();
   1370   Zone zone;
   1371 
   1372   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
   1373                              2);
   1374 
   1375   // At least 2048, to ensure the allocated space for registers
   1376   // span one full page.
   1377   const int large_number = 8000;
   1378   m.WriteCurrentPositionToRegister(large_number, 42);
   1379   m.WriteCurrentPositionToRegister(0, 0);
   1380   m.WriteCurrentPositionToRegister(1, 1);
   1381   Label done;
   1382   m.CheckNotBackReference(0, false, &done);  // Performs a system-stack push.
   1383   m.Bind(&done);
   1384   m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
   1385   m.PopRegister(1);
   1386   m.Succeed();
   1387 
   1388   Handle<String> source =
   1389       factory->NewStringFromStaticChars("<huge register space test>");
   1390   Handle<Object> code_object = m.GetCode(source);
   1391   Handle<Code> code = Handle<Code>::cast(code_object);
   1392 
   1393   // String long enough for test (content doesn't matter).
   1394   Handle<String> input = factory->NewStringFromStaticChars("sample text");
   1395   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
   1396   Address start_adr = seq_input->GetCharsAddress();
   1397 
   1398   int captures[2];
   1399   NativeRegExpMacroAssembler::Result result =
   1400       Execute(*code,
   1401               *input,
   1402               0,
   1403               start_adr,
   1404               start_adr + input->length(),
   1405               captures);
   1406 
   1407   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
   1408   CHECK_EQ(0, captures[0]);
   1409   CHECK_EQ(42, captures[1]);
   1410 
   1411   isolate->clear_pending_exception();
   1412 }
   1413 
   1414 #else  // V8_INTERPRETED_REGEXP
   1415 
   1416 TEST(MacroAssembler) {
   1417   byte codes[1024];
   1418   Zone zone;
   1419   RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
   1420                                  &zone);
   1421   // ^f(o)o.
   1422   Label start, fail, backtrack;
   1423 
   1424   m.SetRegister(4, 42);
   1425   m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
   1426   m.AdvanceRegister(4, 42);
   1427   m.GoTo(&start);
   1428   m.Fail();
   1429   m.Bind(&start);
   1430   m.PushBacktrack(&fail);
   1431   m.CheckNotAtStart(0, NULL);
   1432   m.LoadCurrentCharacter(0, NULL);
   1433   m.CheckNotCharacter('f', NULL);
   1434   m.LoadCurrentCharacter(1, NULL);
   1435   m.CheckNotCharacter('o', NULL);
   1436   m.LoadCurrentCharacter(2, NULL);
   1437   m.CheckNotCharacter('o', NULL);
   1438   m.WriteCurrentPositionToRegister(0, 0);
   1439   m.WriteCurrentPositionToRegister(1, 3);
   1440   m.WriteCurrentPositionToRegister(2, 1);
   1441   m.WriteCurrentPositionToRegister(3, 2);
   1442   m.AdvanceCurrentPosition(3);
   1443   m.PushBacktrack(&backtrack);
   1444   m.Succeed();
   1445   m.Bind(&backtrack);
   1446   m.ClearRegisters(2, 3);
   1447   m.Backtrack();
   1448   m.Bind(&fail);
   1449   m.PopRegister(0);
   1450   m.Fail();
   1451 
   1452   Isolate* isolate = CcTest::i_isolate();
   1453   Factory* factory = isolate->factory();
   1454   HandleScope scope(isolate);
   1455 
   1456   Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
   1457   Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
   1458   int captures[5];
   1459 
   1460   const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
   1461   Handle<String> f1_16 = factory->NewStringFromTwoByte(
   1462       Vector<const uc16>(str1, 6)).ToHandleChecked();
   1463 
   1464   CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
   1465   CHECK_EQ(0, captures[0]);
   1466   CHECK_EQ(3, captures[1]);
   1467   CHECK_EQ(1, captures[2]);
   1468   CHECK_EQ(2, captures[3]);
   1469   CHECK_EQ(84, captures[4]);
   1470 
   1471   const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
   1472   Handle<String> f2_16 = factory->NewStringFromTwoByte(
   1473       Vector<const uc16>(str2, 6)).ToHandleChecked();
   1474 
   1475   CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
   1476   CHECK_EQ(42, captures[0]);
   1477 }
   1478 
   1479 #endif  // V8_INTERPRETED_REGEXP
   1480 
   1481 
   1482 TEST(AddInverseToTable) {
   1483   static const int kLimit = 1000;
   1484   static const int kRangeCount = 16;
   1485   for (int t = 0; t < 10; t++) {
   1486     Zone zone;
   1487     ZoneList<CharacterRange>* ranges =
   1488         new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
   1489     for (int i = 0; i < kRangeCount; i++) {
   1490       int from = PseudoRandom(t + 87, i + 25) % kLimit;
   1491       int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
   1492       if (to > kLimit) to = kLimit;
   1493       ranges->Add(CharacterRange(from, to), &zone);
   1494     }
   1495     DispatchTable table(&zone);
   1496     DispatchTableConstructor cons(&table, false, &zone);
   1497     cons.set_choice_index(0);
   1498     cons.AddInverse(ranges);
   1499     for (int i = 0; i < kLimit; i++) {
   1500       bool is_on = false;
   1501       for (int j = 0; !is_on && j < kRangeCount; j++)
   1502         is_on = ranges->at(j).Contains(i);
   1503       OutSet* set = table.Get(i);
   1504       CHECK_EQ(is_on, set->Get(0) == false);
   1505     }
   1506   }
   1507   Zone zone;
   1508   ZoneList<CharacterRange>* ranges =
   1509       new(&zone) ZoneList<CharacterRange>(1, &zone);
   1510   ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
   1511   DispatchTable table(&zone);
   1512   DispatchTableConstructor cons(&table, false, &zone);
   1513   cons.set_choice_index(0);
   1514   cons.AddInverse(ranges);
   1515   CHECK(!table.Get(0xFFFE)->Get(0));
   1516   CHECK(table.Get(0xFFFF)->Get(0));
   1517 }
   1518 
   1519 
   1520 static uc32 canonicalize(uc32 c) {
   1521   unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
   1522   int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
   1523   if (count == 0) {
   1524     return c;
   1525   } else {
   1526     CHECK_EQ(1, count);
   1527     return canon[0];
   1528   }
   1529 }
   1530 
   1531 
   1532 TEST(LatinCanonicalize) {
   1533   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
   1534   for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
   1535     unibrow::uchar upper = lower + ('A' - 'a');
   1536     CHECK_EQ(canonicalize(lower), canonicalize(upper));
   1537     unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1538     int length = un_canonicalize.get(lower, '\0', uncanon);
   1539     CHECK_EQ(2, length);
   1540     CHECK_EQ(upper, uncanon[0]);
   1541     CHECK_EQ(lower, uncanon[1]);
   1542   }
   1543   for (uc32 c = 128; c < (1 << 21); c++)
   1544     CHECK_GE(canonicalize(c), 128);
   1545   unibrow::Mapping<unibrow::ToUppercase> to_upper;
   1546   // Canonicalization is only defined for the Basic Multilingual Plane.
   1547   for (uc32 c = 0; c < (1 << 16); c++) {
   1548     unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
   1549     int length = to_upper.get(c, '\0', upper);
   1550     if (length == 0) {
   1551       length = 1;
   1552       upper[0] = c;
   1553     }
   1554     uc32 u = upper[0];
   1555     if (length > 1 || (c >= 128 && u < 128))
   1556       u = c;
   1557     CHECK_EQ(u, canonicalize(c));
   1558   }
   1559 }
   1560 
   1561 
   1562 static uc32 CanonRangeEnd(uc32 c) {
   1563   unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
   1564   int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
   1565   if (count == 0) {
   1566     return c;
   1567   } else {
   1568     CHECK_EQ(1, count);
   1569     return canon[0];
   1570   }
   1571 }
   1572 
   1573 
   1574 TEST(RangeCanonicalization) {
   1575   // Check that we arrive at the same result when using the basic
   1576   // range canonicalization primitives as when using immediate
   1577   // canonicalization.
   1578   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
   1579   int block_start = 0;
   1580   while (block_start <= 0xFFFF) {
   1581     uc32 block_end = CanonRangeEnd(block_start);
   1582     unsigned block_length = block_end - block_start + 1;
   1583     if (block_length > 1) {
   1584       unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1585       int first_length = un_canonicalize.get(block_start, '\0', first);
   1586       for (unsigned i = 1; i < block_length; i++) {
   1587         unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1588         int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
   1589         CHECK_EQ(first_length, succ_length);
   1590         for (int j = 0; j < succ_length; j++) {
   1591           int calc = first[j] + i;
   1592           int found = succ[j];
   1593           CHECK_EQ(calc, found);
   1594         }
   1595       }
   1596     }
   1597     block_start = block_start + block_length;
   1598   }
   1599 }
   1600 
   1601 
   1602 TEST(UncanonicalizeEquivalence) {
   1603   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
   1604   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1605   for (int i = 0; i < (1 << 16); i++) {
   1606     int length = un_canonicalize.get(i, '\0', chars);
   1607     for (int j = 0; j < length; j++) {
   1608       unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
   1609       int length2 = un_canonicalize.get(chars[j], '\0', chars2);
   1610       CHECK_EQ(length, length2);
   1611       for (int k = 0; k < length; k++)
   1612         CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
   1613     }
   1614   }
   1615 }
   1616 
   1617 
   1618 static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
   1619                                       Vector<CharacterRange> expected) {
   1620   Zone zone;
   1621   int count = expected.length();
   1622   ZoneList<CharacterRange>* list =
   1623       new(&zone) ZoneList<CharacterRange>(count, &zone);
   1624   input.AddCaseEquivalents(isolate, &zone, list, false);
   1625   CHECK_EQ(count, list->length());
   1626   for (int i = 0; i < list->length(); i++) {
   1627     CHECK_EQ(expected[i].from(), list->at(i).from());
   1628     CHECK_EQ(expected[i].to(), list->at(i).to());
   1629   }
   1630 }
   1631 
   1632 
   1633 static void TestSimpleRangeCaseIndependence(Isolate* isolate,
   1634                                             CharacterRange input,
   1635                                             CharacterRange expected) {
   1636   EmbeddedVector<CharacterRange, 1> vector;
   1637   vector[0] = expected;
   1638   TestRangeCaseIndependence(isolate, input, vector);
   1639 }
   1640 
   1641 
   1642 TEST(CharacterRangeCaseIndependence) {
   1643   Isolate* isolate = CcTest::i_isolate();
   1644   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
   1645                                   CharacterRange::Singleton('A'));
   1646   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
   1647                                   CharacterRange::Singleton('Z'));
   1648   TestSimpleRangeCaseIndependence(isolate, CharacterRange('a', 'z'),
   1649                                   CharacterRange('A', 'Z'));
   1650   TestSimpleRangeCaseIndependence(isolate, CharacterRange('c', 'f'),
   1651                                   CharacterRange('C', 'F'));
   1652   TestSimpleRangeCaseIndependence(isolate, CharacterRange('a', 'b'),
   1653                                   CharacterRange('A', 'B'));
   1654   TestSimpleRangeCaseIndependence(isolate, CharacterRange('y', 'z'),
   1655                                   CharacterRange('Y', 'Z'));
   1656   TestSimpleRangeCaseIndependence(isolate, CharacterRange('a' - 1, 'z' + 1),
   1657                                   CharacterRange('A', 'Z'));
   1658   TestSimpleRangeCaseIndependence(isolate, CharacterRange('A', 'Z'),
   1659                                   CharacterRange('a', 'z'));
   1660   TestSimpleRangeCaseIndependence(isolate, CharacterRange('C', 'F'),
   1661                                   CharacterRange('c', 'f'));
   1662   TestSimpleRangeCaseIndependence(isolate, CharacterRange('A' - 1, 'Z' + 1),
   1663                                   CharacterRange('a', 'z'));
   1664   // Here we need to add [l-z] to complete the case independence of
   1665   // [A-Za-z] but we expect [a-z] to be added since we always add a
   1666   // whole block at a time.
   1667   TestSimpleRangeCaseIndependence(isolate, CharacterRange('A', 'k'),
   1668                                   CharacterRange('a', 'z'));
   1669 }
   1670 
   1671 
   1672 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
   1673   if (ranges == NULL)
   1674     return false;
   1675   for (int i = 0; i < ranges->length(); i++) {
   1676     CharacterRange range = ranges->at(i);
   1677     if (range.from() <= c && c <= range.to())
   1678       return true;
   1679   }
   1680   return false;
   1681 }
   1682 
   1683 
   1684 TEST(CharClassDifference) {
   1685   Zone zone;
   1686   ZoneList<CharacterRange>* base =
   1687       new(&zone) ZoneList<CharacterRange>(1, &zone);
   1688   base->Add(CharacterRange::Everything(), &zone);
   1689   Vector<const int> overlay = CharacterRange::GetWordBounds();
   1690   ZoneList<CharacterRange>* included = NULL;
   1691   ZoneList<CharacterRange>* excluded = NULL;
   1692   CharacterRange::Split(base, overlay, &included, &excluded, &zone);
   1693   for (int i = 0; i < (1 << 16); i++) {
   1694     bool in_base = InClass(i, base);
   1695     if (in_base) {
   1696       bool in_overlay = false;
   1697       for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
   1698         if (overlay[j] <= i && i < overlay[j+1])
   1699           in_overlay = true;
   1700       }
   1701       CHECK_EQ(in_overlay, InClass(i, included));
   1702       CHECK_EQ(!in_overlay, InClass(i, excluded));
   1703     } else {
   1704       CHECK(!InClass(i, included));
   1705       CHECK(!InClass(i, excluded));
   1706     }
   1707   }
   1708 }
   1709 
   1710 
   1711 TEST(CanonicalizeCharacterSets) {
   1712   Zone zone;
   1713   ZoneList<CharacterRange>* list =
   1714       new(&zone) ZoneList<CharacterRange>(4, &zone);
   1715   CharacterSet set(list);
   1716 
   1717   list->Add(CharacterRange(10, 20), &zone);
   1718   list->Add(CharacterRange(30, 40), &zone);
   1719   list->Add(CharacterRange(50, 60), &zone);
   1720   set.Canonicalize();
   1721   CHECK_EQ(3, list->length());
   1722   CHECK_EQ(10, list->at(0).from());
   1723   CHECK_EQ(20, list->at(0).to());
   1724   CHECK_EQ(30, list->at(1).from());
   1725   CHECK_EQ(40, list->at(1).to());
   1726   CHECK_EQ(50, list->at(2).from());
   1727   CHECK_EQ(60, list->at(2).to());
   1728 
   1729   list->Rewind(0);
   1730   list->Add(CharacterRange(10, 20), &zone);
   1731   list->Add(CharacterRange(50, 60), &zone);
   1732   list->Add(CharacterRange(30, 40), &zone);
   1733   set.Canonicalize();
   1734   CHECK_EQ(3, list->length());
   1735   CHECK_EQ(10, list->at(0).from());
   1736   CHECK_EQ(20, list->at(0).to());
   1737   CHECK_EQ(30, list->at(1).from());
   1738   CHECK_EQ(40, list->at(1).to());
   1739   CHECK_EQ(50, list->at(2).from());
   1740   CHECK_EQ(60, list->at(2).to());
   1741 
   1742   list->Rewind(0);
   1743   list->Add(CharacterRange(30, 40), &zone);
   1744   list->Add(CharacterRange(10, 20), &zone);
   1745   list->Add(CharacterRange(25, 25), &zone);
   1746   list->Add(CharacterRange(100, 100), &zone);
   1747   list->Add(CharacterRange(1, 1), &zone);
   1748   set.Canonicalize();
   1749   CHECK_EQ(5, list->length());
   1750   CHECK_EQ(1, list->at(0).from());
   1751   CHECK_EQ(1, list->at(0).to());
   1752   CHECK_EQ(10, list->at(1).from());
   1753   CHECK_EQ(20, list->at(1).to());
   1754   CHECK_EQ(25, list->at(2).from());
   1755   CHECK_EQ(25, list->at(2).to());
   1756   CHECK_EQ(30, list->at(3).from());
   1757   CHECK_EQ(40, list->at(3).to());
   1758   CHECK_EQ(100, list->at(4).from());
   1759   CHECK_EQ(100, list->at(4).to());
   1760 
   1761   list->Rewind(0);
   1762   list->Add(CharacterRange(10, 19), &zone);
   1763   list->Add(CharacterRange(21, 30), &zone);
   1764   list->Add(CharacterRange(20, 20), &zone);
   1765   set.Canonicalize();
   1766   CHECK_EQ(1, list->length());
   1767   CHECK_EQ(10, list->at(0).from());
   1768   CHECK_EQ(30, list->at(0).to());
   1769 }
   1770 
   1771 
   1772 TEST(CharacterRangeMerge) {
   1773   Zone zone;
   1774   ZoneList<CharacterRange> l1(4, &zone);
   1775   ZoneList<CharacterRange> l2(4, &zone);
   1776   // Create all combinations of intersections of ranges, both singletons and
   1777   // longer.
   1778 
   1779   int offset = 0;
   1780 
   1781   // The five kinds of singleton intersections:
   1782   //     X
   1783   //   Y      - outside before
   1784   //    Y     - outside touching start
   1785   //     Y    - overlap
   1786   //      Y   - outside touching end
   1787   //       Y  - outside after
   1788 
   1789   for (int i = 0; i < 5; i++) {
   1790     l1.Add(CharacterRange::Singleton(offset + 2), &zone);
   1791     l2.Add(CharacterRange::Singleton(offset + i), &zone);
   1792     offset += 6;
   1793   }
   1794 
   1795   // The seven kinds of singleton/non-singleton intersections:
   1796   //    XXX
   1797   //  Y        - outside before
   1798   //   Y       - outside touching start
   1799   //    Y      - inside touching start
   1800   //     Y     - entirely inside
   1801   //      Y    - inside touching end
   1802   //       Y   - outside touching end
   1803   //        Y  - disjoint after
   1804 
   1805   for (int i = 0; i < 7; i++) {
   1806     l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
   1807     l2.Add(CharacterRange::Singleton(offset + i), &zone);
   1808     offset += 8;
   1809   }
   1810 
   1811   // The eleven kinds of non-singleton intersections:
   1812   //
   1813   //       XXXXXXXX
   1814   // YYYY                  - outside before.
   1815   //   YYYY                - outside touching start.
   1816   //     YYYY              - overlapping start
   1817   //       YYYY            - inside touching start
   1818   //         YYYY          - entirely inside
   1819   //           YYYY        - inside touching end
   1820   //             YYYY      - overlapping end
   1821   //               YYYY    - outside touching end
   1822   //                 YYYY  - outside after
   1823   //       YYYYYYYY        - identical
   1824   //     YYYYYYYYYYYY      - containing entirely.
   1825 
   1826   for (int i = 0; i < 9; i++) {
   1827     l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
   1828     l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
   1829     offset += 22;
   1830   }
   1831   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
   1832   l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
   1833   offset += 22;
   1834   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
   1835   l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
   1836   offset += 22;
   1837 
   1838   // Different kinds of multi-range overlap:
   1839   // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
   1840   //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
   1841 
   1842   l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
   1843   l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
   1844   for (int i = 0; i < 6; i++) {
   1845     l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
   1846     l2.Add(CharacterRange::Singleton(offset + 8), &zone);
   1847     offset += 9;
   1848   }
   1849 
   1850   CHECK(CharacterRange::IsCanonical(&l1));
   1851   CHECK(CharacterRange::IsCanonical(&l2));
   1852 
   1853   ZoneList<CharacterRange> first_only(4, &zone);
   1854   ZoneList<CharacterRange> second_only(4, &zone);
   1855   ZoneList<CharacterRange> both(4, &zone);
   1856 }
   1857 
   1858 
   1859 TEST(Graph) {
   1860   Execute("\\b\\w+\\b", false, true, true);
   1861 }
   1862 
   1863 
   1864 namespace {
   1865 
   1866 int* global_use_counts = NULL;
   1867 
   1868 void MockUseCounterCallback(v8::Isolate* isolate,
   1869                             v8::Isolate::UseCounterFeature feature) {
   1870   ++global_use_counts[feature];
   1871 }
   1872 }
   1873 
   1874 
   1875 // Test that ES2015 RegExp compatibility fixes are in place, that they
   1876 // are not overly broad, and the appropriate UseCounters are incremented
   1877 TEST(UseCountRegExp) {
   1878   i::FLAG_harmony_regexps = true;
   1879   v8::Isolate* isolate = CcTest::isolate();
   1880   v8::HandleScope scope(isolate);
   1881   LocalContext env;
   1882   int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
   1883   global_use_counts = use_counts;
   1884   CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
   1885 
   1886   // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
   1887   v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
   1888   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
   1889   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
   1890   CHECK(resultSticky->IsUndefined());
   1891 
   1892   // re.sticky has approriate value and doesn't touch UseCounter
   1893   v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
   1894   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
   1895   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
   1896   CHECK(resultReSticky->IsFalse());
   1897 
   1898   // When the getter is caleld on another object, throw an exception
   1899   // and don't increment the UseCounter
   1900   v8::Local<v8::Value> resultStickyError = CompileRun(
   1901       "var exception;"
   1902       "try { "
   1903       "  Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
   1904       "      .get.call(null);"
   1905       "} catch (e) {"
   1906       "  exception = e;"
   1907       "}"
   1908       "exception");
   1909   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
   1910   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
   1911   CHECK(resultStickyError->IsObject());
   1912 
   1913   // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
   1914   // a UseCounter is incremented to track it.
   1915   v8::Local<v8::Value> resultToString =
   1916       CompileRun("RegExp.prototype.toString().length");
   1917   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
   1918   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
   1919   CHECK(resultToString->IsInt32());
   1920   CHECK_EQ(6,
   1921            resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
   1922 
   1923   // .toString() works on normal RegExps
   1924   v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
   1925   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
   1926   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
   1927   CHECK(resultReToString->IsInt32());
   1928   CHECK_EQ(
   1929       3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
   1930 
   1931   // .toString() throws on non-RegExps that aren't RegExp.prototype
   1932   v8::Local<v8::Value> resultToStringError = CompileRun(
   1933       "var exception;"
   1934       "try { RegExp.prototype.toString.call(null) }"
   1935       "catch (e) { exception = e; }"
   1936       "exception");
   1937   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
   1938   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
   1939   CHECK(resultToStringError->IsObject());
   1940 }
   1941