1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 29 #include <stdlib.h> 30 31 #include "src/v8.h" 32 33 #include "src/ast.h" 34 #include "src/char-predicates-inl.h" 35 #include "src/jsregexp.h" 36 #include "src/ostreams.h" 37 #include "src/parser.h" 38 #include "src/regexp-macro-assembler.h" 39 #include "src/regexp-macro-assembler-irregexp.h" 40 #include "src/string-stream.h" 41 #include "src/zone-inl.h" 42 #ifdef V8_INTERPRETED_REGEXP 43 #include "src/interpreter-irregexp.h" 44 #else // V8_INTERPRETED_REGEXP 45 #include "src/macro-assembler.h" 46 #if V8_TARGET_ARCH_ARM 47 #include "src/arm/assembler-arm.h" // NOLINT 48 #include "src/arm/macro-assembler-arm.h" 49 #include "src/arm/regexp-macro-assembler-arm.h" 50 #endif 51 #if V8_TARGET_ARCH_ARM64 52 #include "src/arm64/assembler-arm64.h" 53 #include "src/arm64/macro-assembler-arm64.h" 54 #include "src/arm64/regexp-macro-assembler-arm64.h" 55 #endif 56 #if V8_TARGET_ARCH_MIPS 57 #include "src/mips/assembler-mips.h" 58 #include "src/mips/macro-assembler-mips.h" 59 #include "src/mips/regexp-macro-assembler-mips.h" 60 #endif 61 #if V8_TARGET_ARCH_MIPS64 62 #include "src/mips64/assembler-mips64.h" 63 #include "src/mips64/macro-assembler-mips64.h" 64 #include "src/mips64/regexp-macro-assembler-mips64.h" 65 #endif 66 #if V8_TARGET_ARCH_X64 67 #include "src/x64/assembler-x64.h" 68 #include "src/x64/macro-assembler-x64.h" 69 #include "src/x64/regexp-macro-assembler-x64.h" 70 #endif 71 #if V8_TARGET_ARCH_IA32 72 #include "src/ia32/assembler-ia32.h" 73 #include "src/ia32/macro-assembler-ia32.h" 74 #include "src/ia32/regexp-macro-assembler-ia32.h" 75 #endif 76 #if V8_TARGET_ARCH_X87 77 #include "src/x87/assembler-x87.h" 78 #include "src/x87/macro-assembler-x87.h" 79 #include "src/x87/regexp-macro-assembler-x87.h" 80 #endif 81 #endif // V8_INTERPRETED_REGEXP 82 #include "test/cctest/cctest.h" 83 84 using namespace v8::internal; 85 86 87 static bool CheckParse(const char* input) { 88 v8::HandleScope scope(CcTest::isolate()); 89 Zone zone(CcTest::i_isolate()); 90 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 91 RegExpCompileData result; 92 return v8::internal::RegExpParser::ParseRegExp( 93 &reader, false, &result, &zone); 94 } 95 96 97 static void CheckParseEq(const char* input, const char* expected) { 98 v8::HandleScope scope(CcTest::isolate()); 99 Zone zone(CcTest::i_isolate()); 100 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 101 RegExpCompileData result; 102 CHECK(v8::internal::RegExpParser::ParseRegExp( 103 &reader, false, &result, &zone)); 104 CHECK(result.tree != NULL); 105 CHECK(result.error.is_null()); 106 OStringStream os; 107 result.tree->Print(os, &zone); 108 CHECK_EQ(expected, os.c_str()); 109 } 110 111 112 static bool CheckSimple(const char* input) { 113 v8::HandleScope scope(CcTest::isolate()); 114 Zone zone(CcTest::i_isolate()); 115 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 116 RegExpCompileData result; 117 CHECK(v8::internal::RegExpParser::ParseRegExp( 118 &reader, false, &result, &zone)); 119 CHECK(result.tree != NULL); 120 CHECK(result.error.is_null()); 121 return result.simple; 122 } 123 124 struct MinMaxPair { 125 int min_match; 126 int max_match; 127 }; 128 129 130 static MinMaxPair CheckMinMaxMatch(const char* input) { 131 v8::HandleScope scope(CcTest::isolate()); 132 Zone zone(CcTest::i_isolate()); 133 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 134 RegExpCompileData result; 135 CHECK(v8::internal::RegExpParser::ParseRegExp( 136 &reader, false, &result, &zone)); 137 CHECK(result.tree != NULL); 138 CHECK(result.error.is_null()); 139 int min_match = result.tree->min_match(); 140 int max_match = result.tree->max_match(); 141 MinMaxPair pair = { min_match, max_match }; 142 return pair; 143 } 144 145 146 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input)) 147 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input)); 148 #define CHECK_MIN_MAX(input, min, max) \ 149 { MinMaxPair min_max = CheckMinMaxMatch(input); \ 150 CHECK_EQ(min, min_max.min_match); \ 151 CHECK_EQ(max, min_max.max_match); \ 152 } 153 154 TEST(Parser) { 155 CHECK_PARSE_ERROR("?"); 156 157 CheckParseEq("abc", "'abc'"); 158 CheckParseEq("", "%"); 159 CheckParseEq("abc|def", "(| 'abc' 'def')"); 160 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 161 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)"); 162 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); 163 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); 164 CheckParseEq("a*", "(# 0 - g 'a')"); 165 CheckParseEq("a*?", "(# 0 - n 'a')"); 166 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))"); 167 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))"); 168 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))"); 169 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))"); 170 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))"); 171 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))"); 172 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))"); 173 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))"); 174 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))"); 175 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))"); 176 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))"); 177 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))"); 178 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'"); 179 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')"); 180 CheckParseEq("(?:foo)", "'foo'"); 181 CheckParseEq("(?: foo )", "' foo '"); 182 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); 183 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); 184 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); 185 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); 186 CheckParseEq("()", "(^ %)"); 187 CheckParseEq("(?=)", "(-> + %)"); 188 CheckParseEq("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows 189 CheckParseEq("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252 190 CheckParseEq("[x]", "[x]"); 191 CheckParseEq("[xyz]", "[x y z]"); 192 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); 193 CheckParseEq("[-123]", "[- 1 2 3]"); 194 CheckParseEq("[^123]", "^[1 2 3]"); 195 CheckParseEq("]", "']'"); 196 CheckParseEq("}", "'}'"); 197 CheckParseEq("[a-b-c]", "[a-b - c]"); 198 CheckParseEq("[\\d]", "[0-9]"); 199 CheckParseEq("[x\\dz]", "[x 0-9 z]"); 200 CheckParseEq("[\\d-z]", "[0-9 - z]"); 201 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]"); 202 CheckParseEq("[z-\\d]", "[z - 0-9]"); 203 // Control character outside character class. 204 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); 205 CheckParseEq("\\c!", "'\\c!'"); 206 CheckParseEq("\\c_", "'\\c_'"); 207 CheckParseEq("\\c~", "'\\c~'"); 208 CheckParseEq("\\c1", "'\\c1'"); 209 // Control character inside character class. 210 CheckParseEq("[\\c!]", "[\\ c !]"); 211 CheckParseEq("[\\c_]", "[\\x1f]"); 212 CheckParseEq("[\\c~]", "[\\ c ~]"); 213 CheckParseEq("[\\ca]", "[\\x01]"); 214 CheckParseEq("[\\cz]", "[\\x1a]"); 215 CheckParseEq("[\\cA]", "[\\x01]"); 216 CheckParseEq("[\\cZ]", "[\\x1a]"); 217 CheckParseEq("[\\c1]", "[\\x11]"); 218 219 CheckParseEq("[a\\]c]", "[a ] c]"); 220 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); 221 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); 222 CheckParseEq("\\0", "'\\x00'"); 223 CheckParseEq("\\8", "'8'"); 224 CheckParseEq("\\9", "'9'"); 225 CheckParseEq("\\11", "'\\x09'"); 226 CheckParseEq("\\11a", "'\\x09a'"); 227 CheckParseEq("\\011", "'\\x09'"); 228 CheckParseEq("\\00011", "'\\x0011'"); 229 CheckParseEq("\\118", "'\\x098'"); 230 CheckParseEq("\\111", "'I'"); 231 CheckParseEq("\\1111", "'I1'"); 232 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))"); 233 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))"); 234 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))"); 235 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')"); 236 CheckParseEq("(x)(x)(x)\\1*", 237 "(: (^ 'x') (^ 'x') (^ 'x')" 238 " (# 0 - g (<- 1)))"); 239 CheckParseEq("(x)(x)(x)\\2*", 240 "(: (^ 'x') (^ 'x') (^ 'x')" 241 " (# 0 - g (<- 2)))"); 242 CheckParseEq("(x)(x)(x)\\3*", 243 "(: (^ 'x') (^ 'x') (^ 'x')" 244 " (# 0 - g (<- 3)))"); 245 CheckParseEq("(x)(x)(x)\\4*", 246 "(: (^ 'x') (^ 'x') (^ 'x')" 247 " (# 0 - g '\\x04'))"); 248 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", 249 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 250 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); 251 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", 252 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 253 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')"); 254 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))"); 255 CheckParseEq("(a\\1)", "(^ 'a')"); 256 CheckParseEq("(\\1a)", "(^ 'a')"); 257 CheckParseEq("(?=a)?a", "'a'"); 258 CheckParseEq("(?=a){0,10}a", "'a'"); 259 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')"); 260 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')"); 261 CheckParseEq("(?!a)?a", "'a'"); 262 CheckParseEq("\\1(a)", "(^ 'a')"); 263 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))"); 264 CheckParseEq("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))"); 265 CheckParseEq("[\\0]", "[\\x00]"); 266 CheckParseEq("[\\11]", "[\\x09]"); 267 CheckParseEq("[\\11a]", "[\\x09 a]"); 268 CheckParseEq("[\\011]", "[\\x09]"); 269 CheckParseEq("[\\00011]", "[\\x00 1 1]"); 270 CheckParseEq("[\\118]", "[\\x09 8]"); 271 CheckParseEq("[\\111]", "[I]"); 272 CheckParseEq("[\\1111]", "[I 1]"); 273 CheckParseEq("\\x34", "'\x34'"); 274 CheckParseEq("\\x60", "'\x60'"); 275 CheckParseEq("\\x3z", "'x3z'"); 276 CheckParseEq("\\c", "'\\c'"); 277 CheckParseEq("\\u0034", "'\x34'"); 278 CheckParseEq("\\u003z", "'u003z'"); 279 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))"); 280 281 CHECK_SIMPLE("", false); 282 CHECK_SIMPLE("a", true); 283 CHECK_SIMPLE("a|b", false); 284 CHECK_SIMPLE("a\\n", false); 285 CHECK_SIMPLE("^a", false); 286 CHECK_SIMPLE("a$", false); 287 CHECK_SIMPLE("a\\b!", false); 288 CHECK_SIMPLE("a\\Bb", false); 289 CHECK_SIMPLE("a*", false); 290 CHECK_SIMPLE("a*?", false); 291 CHECK_SIMPLE("a?", false); 292 CHECK_SIMPLE("a??", false); 293 CHECK_SIMPLE("a{0,1}?", false); 294 CHECK_SIMPLE("a{1,1}?", false); 295 CHECK_SIMPLE("a{1,2}?", false); 296 CHECK_SIMPLE("a+?", false); 297 CHECK_SIMPLE("(a)", false); 298 CHECK_SIMPLE("(a)\\1", false); 299 CHECK_SIMPLE("(\\1a)", false); 300 CHECK_SIMPLE("\\1(a)", false); 301 CHECK_SIMPLE("a\\s", false); 302 CHECK_SIMPLE("a\\S", false); 303 CHECK_SIMPLE("a\\d", false); 304 CHECK_SIMPLE("a\\D", false); 305 CHECK_SIMPLE("a\\w", false); 306 CHECK_SIMPLE("a\\W", false); 307 CHECK_SIMPLE("a.", false); 308 CHECK_SIMPLE("a\\q", false); 309 CHECK_SIMPLE("a[a]", false); 310 CHECK_SIMPLE("a[^a]", false); 311 CHECK_SIMPLE("a[a-z]", false); 312 CHECK_SIMPLE("a[\\q]", false); 313 CHECK_SIMPLE("a(?:b)", false); 314 CHECK_SIMPLE("a(?=b)", false); 315 CHECK_SIMPLE("a(?!b)", false); 316 CHECK_SIMPLE("\\x60", false); 317 CHECK_SIMPLE("\\u0060", false); 318 CHECK_SIMPLE("\\cA", false); 319 CHECK_SIMPLE("\\q", false); 320 CHECK_SIMPLE("\\1112", false); 321 CHECK_SIMPLE("\\0", false); 322 CHECK_SIMPLE("(a)\\1", false); 323 CHECK_SIMPLE("(?=a)?a", false); 324 CHECK_SIMPLE("(?!a)?a\\1", false); 325 CHECK_SIMPLE("(?:(?=a))a\\1", false); 326 327 CheckParseEq("a{}", "'a{}'"); 328 CheckParseEq("a{,}", "'a{,}'"); 329 CheckParseEq("a{", "'a{'"); 330 CheckParseEq("a{z}", "'a{z}'"); 331 CheckParseEq("a{1z}", "'a{1z}'"); 332 CheckParseEq("a{12z}", "'a{12z}'"); 333 CheckParseEq("a{12,", "'a{12,'"); 334 CheckParseEq("a{12,3b", "'a{12,3b'"); 335 CheckParseEq("{}", "'{}'"); 336 CheckParseEq("{,}", "'{,}'"); 337 CheckParseEq("{", "'{'"); 338 CheckParseEq("{z}", "'{z}'"); 339 CheckParseEq("{1z}", "'{1z}'"); 340 CheckParseEq("{12z}", "'{12z}'"); 341 CheckParseEq("{12,", "'{12,'"); 342 CheckParseEq("{12,3b", "'{12,3b'"); 343 344 CHECK_MIN_MAX("a", 1, 1); 345 CHECK_MIN_MAX("abc", 3, 3); 346 CHECK_MIN_MAX("a[bc]d", 3, 3); 347 CHECK_MIN_MAX("a|bc", 1, 2); 348 CHECK_MIN_MAX("ab|c", 1, 2); 349 CHECK_MIN_MAX("a||bc", 0, 2); 350 CHECK_MIN_MAX("|", 0, 0); 351 CHECK_MIN_MAX("(?:ab)", 2, 2); 352 CHECK_MIN_MAX("(?:ab|cde)", 2, 3); 353 CHECK_MIN_MAX("(?:ab)|cde", 2, 3); 354 CHECK_MIN_MAX("(ab)", 2, 2); 355 CHECK_MIN_MAX("(ab|cde)", 2, 3); 356 CHECK_MIN_MAX("(ab)\\1", 2, 4); 357 CHECK_MIN_MAX("(ab|cde)\\1", 2, 6); 358 CHECK_MIN_MAX("(?:ab)?", 0, 2); 359 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity); 360 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity); 361 CHECK_MIN_MAX("a?", 0, 1); 362 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity); 363 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity); 364 CHECK_MIN_MAX("a??", 0, 1); 365 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity); 366 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity); 367 CHECK_MIN_MAX("(?:a?)?", 0, 1); 368 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity); 369 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity); 370 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity); 371 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity); 372 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity); 373 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity); 374 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity); 375 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity); 376 CHECK_MIN_MAX("a{0}", 0, 0); 377 CHECK_MIN_MAX("(?:a+){0}", 0, 0); 378 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0); 379 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity); 380 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity); 381 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity); 382 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity); 383 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity); 384 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14); 385 CHECK_MIN_MAX("a\\bc", 2, 2); 386 CHECK_MIN_MAX("a\\Bc", 2, 2); 387 CHECK_MIN_MAX("a\\sc", 3, 3); 388 CHECK_MIN_MAX("a\\Sc", 3, 3); 389 CHECK_MIN_MAX("a(?=b)c", 2, 2); 390 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2); 391 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2); 392 } 393 394 395 TEST(ParserRegression) { 396 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])"); 397 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')"); 398 CheckParseEq("{", "'{'"); 399 CheckParseEq("a|", "(| 'a' %)"); 400 } 401 402 static void ExpectError(const char* input, 403 const char* expected) { 404 v8::HandleScope scope(CcTest::isolate()); 405 Zone zone(CcTest::i_isolate()); 406 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 407 RegExpCompileData result; 408 CHECK(!v8::internal::RegExpParser::ParseRegExp( 409 &reader, false, &result, &zone)); 410 CHECK(result.tree == NULL); 411 CHECK(!result.error.is_null()); 412 SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS); 413 CHECK_EQ(expected, str.get()); 414 } 415 416 417 TEST(Errors) { 418 const char* kEndBackslash = "\\ at end of pattern"; 419 ExpectError("\\", kEndBackslash); 420 const char* kUnterminatedGroup = "Unterminated group"; 421 ExpectError("(foo", kUnterminatedGroup); 422 const char* kInvalidGroup = "Invalid group"; 423 ExpectError("(?", kInvalidGroup); 424 const char* kUnterminatedCharacterClass = "Unterminated character class"; 425 ExpectError("[", kUnterminatedCharacterClass); 426 ExpectError("[a-", kUnterminatedCharacterClass); 427 const char* kNothingToRepeat = "Nothing to repeat"; 428 ExpectError("*", kNothingToRepeat); 429 ExpectError("?", kNothingToRepeat); 430 ExpectError("+", kNothingToRepeat); 431 ExpectError("{1}", kNothingToRepeat); 432 ExpectError("{1,2}", kNothingToRepeat); 433 ExpectError("{1,}", kNothingToRepeat); 434 435 // Check that we don't allow more than kMaxCapture captures 436 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures. 437 const char* kTooManyCaptures = "Too many captures"; 438 OStringStream os; 439 for (int i = 0; i <= kMaxCaptures; i++) { 440 os << "()"; 441 } 442 ExpectError(os.c_str(), kTooManyCaptures); 443 } 444 445 446 static bool IsDigit(uc16 c) { 447 return ('0' <= c && c <= '9'); 448 } 449 450 451 static bool NotDigit(uc16 c) { 452 return !IsDigit(c); 453 } 454 455 456 static bool IsWhiteSpaceOrLineTerminator(uc16 c) { 457 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes 458 // WhiteSpace (7.2) and LineTerminator (7.3) values. 459 return v8::internal::WhiteSpaceOrLineTerminator::Is(c); 460 } 461 462 463 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) { 464 return !IsWhiteSpaceOrLineTerminator(c); 465 } 466 467 468 static bool NotWord(uc16 c) { 469 return !IsRegExpWord(c); 470 } 471 472 473 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { 474 Zone zone(CcTest::i_isolate()); 475 ZoneList<CharacterRange>* ranges = 476 new(&zone) ZoneList<CharacterRange>(2, &zone); 477 CharacterRange::AddClassEscape(c, ranges, &zone); 478 for (unsigned i = 0; i < (1 << 16); i++) { 479 bool in_class = false; 480 for (int j = 0; !in_class && j < ranges->length(); j++) { 481 CharacterRange& range = ranges->at(j); 482 in_class = (range.from() <= i && i <= range.to()); 483 } 484 CHECK_EQ(pred(i), in_class); 485 } 486 } 487 488 489 TEST(CharacterClassEscapes) { 490 TestCharacterClassEscapes('.', IsRegExpNewline); 491 TestCharacterClassEscapes('d', IsDigit); 492 TestCharacterClassEscapes('D', NotDigit); 493 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator); 494 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor); 495 TestCharacterClassEscapes('w', IsRegExpWord); 496 TestCharacterClassEscapes('W', NotWord); 497 } 498 499 500 static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte, 501 Zone* zone) { 502 Isolate* isolate = CcTest::i_isolate(); 503 FlatStringReader reader(isolate, CStrVector(input)); 504 RegExpCompileData compile_data; 505 if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline, 506 &compile_data, zone)) 507 return NULL; 508 Handle<String> pattern = isolate->factory()-> 509 NewStringFromUtf8(CStrVector(input)).ToHandleChecked(); 510 Handle<String> sample_subject = 511 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked(); 512 RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern, 513 sample_subject, is_one_byte, zone); 514 return compile_data.node; 515 } 516 517 518 static void Execute(const char* input, bool multiline, bool is_one_byte, 519 bool dot_output = false) { 520 v8::HandleScope scope(CcTest::isolate()); 521 Zone zone(CcTest::i_isolate()); 522 RegExpNode* node = Compile(input, multiline, is_one_byte, &zone); 523 USE(node); 524 #ifdef DEBUG 525 if (dot_output) { 526 RegExpEngine::DotPrint(input, node, false); 527 } 528 #endif // DEBUG 529 } 530 531 532 class TestConfig { 533 public: 534 typedef int Key; 535 typedef int Value; 536 static const int kNoKey; 537 static int NoValue() { return 0; } 538 static inline int Compare(int a, int b) { 539 if (a < b) 540 return -1; 541 else if (a > b) 542 return 1; 543 else 544 return 0; 545 } 546 }; 547 548 549 const int TestConfig::kNoKey = 0; 550 551 552 static unsigned PseudoRandom(int i, int j) { 553 return ~(~((i * 781) ^ (j * 329))); 554 } 555 556 557 TEST(SplayTreeSimple) { 558 static const unsigned kLimit = 1000; 559 Zone zone(CcTest::i_isolate()); 560 ZoneSplayTree<TestConfig> tree(&zone); 561 bool seen[kLimit]; 562 for (unsigned i = 0; i < kLimit; i++) seen[i] = false; 563 #define CHECK_MAPS_EQUAL() do { \ 564 for (unsigned k = 0; k < kLimit; k++) \ 565 CHECK_EQ(seen[k], tree.Find(k, &loc)); \ 566 } while (false) 567 for (int i = 0; i < 50; i++) { 568 for (int j = 0; j < 50; j++) { 569 unsigned next = PseudoRandom(i, j) % kLimit; 570 if (seen[next]) { 571 // We've already seen this one. Check the value and remove 572 // it. 573 ZoneSplayTree<TestConfig>::Locator loc; 574 CHECK(tree.Find(next, &loc)); 575 CHECK_EQ(next, loc.key()); 576 CHECK_EQ(3 * next, loc.value()); 577 tree.Remove(next); 578 seen[next] = false; 579 CHECK_MAPS_EQUAL(); 580 } else { 581 // Check that it wasn't there already and then add it. 582 ZoneSplayTree<TestConfig>::Locator loc; 583 CHECK(!tree.Find(next, &loc)); 584 CHECK(tree.Insert(next, &loc)); 585 CHECK_EQ(next, loc.key()); 586 loc.set_value(3 * next); 587 seen[next] = true; 588 CHECK_MAPS_EQUAL(); 589 } 590 int val = PseudoRandom(j, i) % kLimit; 591 if (seen[val]) { 592 ZoneSplayTree<TestConfig>::Locator loc; 593 CHECK(tree.FindGreatestLessThan(val, &loc)); 594 CHECK_EQ(loc.key(), val); 595 break; 596 } 597 val = PseudoRandom(i + j, i - j) % kLimit; 598 if (seen[val]) { 599 ZoneSplayTree<TestConfig>::Locator loc; 600 CHECK(tree.FindLeastGreaterThan(val, &loc)); 601 CHECK_EQ(loc.key(), val); 602 break; 603 } 604 } 605 } 606 } 607 608 609 TEST(DispatchTableConstruction) { 610 // Initialize test data. 611 static const int kLimit = 1000; 612 static const int kRangeCount = 8; 613 static const int kRangeSize = 16; 614 uc16 ranges[kRangeCount][2 * kRangeSize]; 615 for (int i = 0; i < kRangeCount; i++) { 616 Vector<uc16> range(ranges[i], 2 * kRangeSize); 617 for (int j = 0; j < 2 * kRangeSize; j++) { 618 range[j] = PseudoRandom(i + 25, j + 87) % kLimit; 619 } 620 range.Sort(); 621 for (int j = 1; j < 2 * kRangeSize; j++) { 622 CHECK(range[j-1] <= range[j]); 623 } 624 } 625 // Enter test data into dispatch table. 626 Zone zone(CcTest::i_isolate()); 627 DispatchTable table(&zone); 628 for (int i = 0; i < kRangeCount; i++) { 629 uc16* range = ranges[i]; 630 for (int j = 0; j < 2 * kRangeSize; j += 2) 631 table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone); 632 } 633 // Check that the table looks as we would expect 634 for (int p = 0; p < kLimit; p++) { 635 OutSet* outs = table.Get(p); 636 for (int j = 0; j < kRangeCount; j++) { 637 uc16* range = ranges[j]; 638 bool is_on = false; 639 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) 640 is_on = (range[k] <= p && p <= range[k + 1]); 641 CHECK_EQ(is_on, outs->Get(j)); 642 } 643 } 644 } 645 646 647 // Test of debug-only syntax. 648 #ifdef DEBUG 649 650 TEST(ParsePossessiveRepetition) { 651 bool old_flag_value = FLAG_regexp_possessive_quantifier; 652 653 // Enable possessive quantifier syntax. 654 FLAG_regexp_possessive_quantifier = true; 655 656 CheckParseEq("a*+", "(# 0 - p 'a')"); 657 CheckParseEq("a++", "(# 1 - p 'a')"); 658 CheckParseEq("a?+", "(# 0 1 p 'a')"); 659 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')"); 660 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')"); 661 662 // Disable possessive quantifier syntax. 663 FLAG_regexp_possessive_quantifier = false; 664 665 CHECK_PARSE_ERROR("a*+"); 666 CHECK_PARSE_ERROR("a++"); 667 CHECK_PARSE_ERROR("a?+"); 668 CHECK_PARSE_ERROR("a{10,20}+"); 669 CHECK_PARSE_ERROR("a{10,20}+b"); 670 671 FLAG_regexp_possessive_quantifier = old_flag_value; 672 } 673 674 #endif 675 676 // Tests of interpreter. 677 678 679 #ifndef V8_INTERPRETED_REGEXP 680 681 #if V8_TARGET_ARCH_IA32 682 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler; 683 #elif V8_TARGET_ARCH_X64 684 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler; 685 #elif V8_TARGET_ARCH_ARM 686 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler; 687 #elif V8_TARGET_ARCH_ARM64 688 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler; 689 #elif V8_TARGET_ARCH_MIPS 690 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler; 691 #elif V8_TARGET_ARCH_MIPS64 692 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler; 693 #elif V8_TARGET_ARCH_X87 694 typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler; 695 #endif 696 697 class ContextInitializer { 698 public: 699 ContextInitializer() 700 : scope_(CcTest::isolate()), 701 env_(v8::Context::New(CcTest::isolate())) { 702 env_->Enter(); 703 } 704 ~ContextInitializer() { 705 env_->Exit(); 706 } 707 private: 708 v8::HandleScope scope_; 709 v8::Handle<v8::Context> env_; 710 }; 711 712 713 static ArchRegExpMacroAssembler::Result Execute(Code* code, 714 String* input, 715 int start_offset, 716 const byte* input_start, 717 const byte* input_end, 718 int* captures) { 719 return NativeRegExpMacroAssembler::Execute( 720 code, 721 input, 722 start_offset, 723 input_start, 724 input_end, 725 captures, 726 0, 727 CcTest::i_isolate()); 728 } 729 730 731 TEST(MacroAssemblerNativeSuccess) { 732 v8::V8::Initialize(); 733 ContextInitializer initializer; 734 Isolate* isolate = CcTest::i_isolate(); 735 Factory* factory = isolate->factory(); 736 Zone zone(isolate); 737 738 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone); 739 740 m.Succeed(); 741 742 Handle<String> source = factory->NewStringFromStaticChars(""); 743 Handle<Object> code_object = m.GetCode(source); 744 Handle<Code> code = Handle<Code>::cast(code_object); 745 746 int captures[4] = {42, 37, 87, 117}; 747 Handle<String> input = factory->NewStringFromStaticChars("foofoo"); 748 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 749 const byte* start_adr = 750 reinterpret_cast<const byte*>(seq_input->GetCharsAddress()); 751 752 NativeRegExpMacroAssembler::Result result = 753 Execute(*code, 754 *input, 755 0, 756 start_adr, 757 start_adr + seq_input->length(), 758 captures); 759 760 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 761 CHECK_EQ(-1, captures[0]); 762 CHECK_EQ(-1, captures[1]); 763 CHECK_EQ(-1, captures[2]); 764 CHECK_EQ(-1, captures[3]); 765 } 766 767 768 TEST(MacroAssemblerNativeSimple) { 769 v8::V8::Initialize(); 770 ContextInitializer initializer; 771 Isolate* isolate = CcTest::i_isolate(); 772 Factory* factory = isolate->factory(); 773 Zone zone(isolate); 774 775 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone); 776 777 Label fail, backtrack; 778 m.PushBacktrack(&fail); 779 m.CheckNotAtStart(NULL); 780 m.LoadCurrentCharacter(2, NULL); 781 m.CheckNotCharacter('o', NULL); 782 m.LoadCurrentCharacter(1, NULL, false); 783 m.CheckNotCharacter('o', NULL); 784 m.LoadCurrentCharacter(0, NULL, false); 785 m.CheckNotCharacter('f', NULL); 786 m.WriteCurrentPositionToRegister(0, 0); 787 m.WriteCurrentPositionToRegister(1, 3); 788 m.AdvanceCurrentPosition(3); 789 m.PushBacktrack(&backtrack); 790 m.Succeed(); 791 m.Bind(&backtrack); 792 m.Backtrack(); 793 m.Bind(&fail); 794 m.Fail(); 795 796 Handle<String> source = factory->NewStringFromStaticChars("^foo"); 797 Handle<Object> code_object = m.GetCode(source); 798 Handle<Code> code = Handle<Code>::cast(code_object); 799 800 int captures[4] = {42, 37, 87, 117}; 801 Handle<String> input = factory->NewStringFromStaticChars("foofoo"); 802 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 803 Address start_adr = seq_input->GetCharsAddress(); 804 805 NativeRegExpMacroAssembler::Result result = 806 Execute(*code, 807 *input, 808 0, 809 start_adr, 810 start_adr + input->length(), 811 captures); 812 813 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 814 CHECK_EQ(0, captures[0]); 815 CHECK_EQ(3, captures[1]); 816 CHECK_EQ(-1, captures[2]); 817 CHECK_EQ(-1, captures[3]); 818 819 input = factory->NewStringFromStaticChars("barbarbar"); 820 seq_input = Handle<SeqOneByteString>::cast(input); 821 start_adr = seq_input->GetCharsAddress(); 822 823 result = Execute(*code, 824 *input, 825 0, 826 start_adr, 827 start_adr + input->length(), 828 captures); 829 830 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 831 } 832 833 834 TEST(MacroAssemblerNativeSimpleUC16) { 835 v8::V8::Initialize(); 836 ContextInitializer initializer; 837 Isolate* isolate = CcTest::i_isolate(); 838 Factory* factory = isolate->factory(); 839 Zone zone(isolate); 840 841 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone); 842 843 Label fail, backtrack; 844 m.PushBacktrack(&fail); 845 m.CheckNotAtStart(NULL); 846 m.LoadCurrentCharacter(2, NULL); 847 m.CheckNotCharacter('o', NULL); 848 m.LoadCurrentCharacter(1, NULL, false); 849 m.CheckNotCharacter('o', NULL); 850 m.LoadCurrentCharacter(0, NULL, false); 851 m.CheckNotCharacter('f', NULL); 852 m.WriteCurrentPositionToRegister(0, 0); 853 m.WriteCurrentPositionToRegister(1, 3); 854 m.AdvanceCurrentPosition(3); 855 m.PushBacktrack(&backtrack); 856 m.Succeed(); 857 m.Bind(&backtrack); 858 m.Backtrack(); 859 m.Bind(&fail); 860 m.Fail(); 861 862 Handle<String> source = factory->NewStringFromStaticChars("^foo"); 863 Handle<Object> code_object = m.GetCode(source); 864 Handle<Code> code = Handle<Code>::cast(code_object); 865 866 int captures[4] = {42, 37, 87, 117}; 867 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', 868 static_cast<uc16>(0x2603)}; 869 Handle<String> input = factory->NewStringFromTwoByte( 870 Vector<const uc16>(input_data, 6)).ToHandleChecked(); 871 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 872 Address start_adr = seq_input->GetCharsAddress(); 873 874 NativeRegExpMacroAssembler::Result result = 875 Execute(*code, 876 *input, 877 0, 878 start_adr, 879 start_adr + input->length(), 880 captures); 881 882 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 883 CHECK_EQ(0, captures[0]); 884 CHECK_EQ(3, captures[1]); 885 CHECK_EQ(-1, captures[2]); 886 CHECK_EQ(-1, captures[3]); 887 888 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', 889 static_cast<uc16>(0x2603)}; 890 input = factory->NewStringFromTwoByte( 891 Vector<const uc16>(input_data2, 9)).ToHandleChecked(); 892 seq_input = Handle<SeqTwoByteString>::cast(input); 893 start_adr = seq_input->GetCharsAddress(); 894 895 result = Execute(*code, 896 *input, 897 0, 898 start_adr, 899 start_adr + input->length() * 2, 900 captures); 901 902 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 903 } 904 905 906 TEST(MacroAssemblerNativeBacktrack) { 907 v8::V8::Initialize(); 908 ContextInitializer initializer; 909 Isolate* isolate = CcTest::i_isolate(); 910 Factory* factory = isolate->factory(); 911 Zone zone(isolate); 912 913 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone); 914 915 Label fail; 916 Label backtrack; 917 m.LoadCurrentCharacter(10, &fail); 918 m.Succeed(); 919 m.Bind(&fail); 920 m.PushBacktrack(&backtrack); 921 m.LoadCurrentCharacter(10, NULL); 922 m.Succeed(); 923 m.Bind(&backtrack); 924 m.Fail(); 925 926 Handle<String> source = factory->NewStringFromStaticChars(".........."); 927 Handle<Object> code_object = m.GetCode(source); 928 Handle<Code> code = Handle<Code>::cast(code_object); 929 930 Handle<String> input = factory->NewStringFromStaticChars("foofoo"); 931 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 932 Address start_adr = seq_input->GetCharsAddress(); 933 934 NativeRegExpMacroAssembler::Result result = 935 Execute(*code, 936 *input, 937 0, 938 start_adr, 939 start_adr + input->length(), 940 NULL); 941 942 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 943 } 944 945 946 TEST(MacroAssemblerNativeBackReferenceLATIN1) { 947 v8::V8::Initialize(); 948 ContextInitializer initializer; 949 Isolate* isolate = CcTest::i_isolate(); 950 Factory* factory = isolate->factory(); 951 Zone zone(isolate); 952 953 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone); 954 955 m.WriteCurrentPositionToRegister(0, 0); 956 m.AdvanceCurrentPosition(2); 957 m.WriteCurrentPositionToRegister(1, 0); 958 Label nomatch; 959 m.CheckNotBackReference(0, &nomatch); 960 m.Fail(); 961 m.Bind(&nomatch); 962 m.AdvanceCurrentPosition(2); 963 Label missing_match; 964 m.CheckNotBackReference(0, &missing_match); 965 m.WriteCurrentPositionToRegister(2, 0); 966 m.Succeed(); 967 m.Bind(&missing_match); 968 m.Fail(); 969 970 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1"); 971 Handle<Object> code_object = m.GetCode(source); 972 Handle<Code> code = Handle<Code>::cast(code_object); 973 974 Handle<String> input = factory->NewStringFromStaticChars("fooofo"); 975 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 976 Address start_adr = seq_input->GetCharsAddress(); 977 978 int output[4]; 979 NativeRegExpMacroAssembler::Result result = 980 Execute(*code, 981 *input, 982 0, 983 start_adr, 984 start_adr + input->length(), 985 output); 986 987 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 988 CHECK_EQ(0, output[0]); 989 CHECK_EQ(2, output[1]); 990 CHECK_EQ(6, output[2]); 991 CHECK_EQ(-1, output[3]); 992 } 993 994 995 TEST(MacroAssemblerNativeBackReferenceUC16) { 996 v8::V8::Initialize(); 997 ContextInitializer initializer; 998 Isolate* isolate = CcTest::i_isolate(); 999 Factory* factory = isolate->factory(); 1000 Zone zone(isolate); 1001 1002 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone); 1003 1004 m.WriteCurrentPositionToRegister(0, 0); 1005 m.AdvanceCurrentPosition(2); 1006 m.WriteCurrentPositionToRegister(1, 0); 1007 Label nomatch; 1008 m.CheckNotBackReference(0, &nomatch); 1009 m.Fail(); 1010 m.Bind(&nomatch); 1011 m.AdvanceCurrentPosition(2); 1012 Label missing_match; 1013 m.CheckNotBackReference(0, &missing_match); 1014 m.WriteCurrentPositionToRegister(2, 0); 1015 m.Succeed(); 1016 m.Bind(&missing_match); 1017 m.Fail(); 1018 1019 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1"); 1020 Handle<Object> code_object = m.GetCode(source); 1021 Handle<Code> code = Handle<Code>::cast(code_object); 1022 1023 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028}; 1024 Handle<String> input = factory->NewStringFromTwoByte( 1025 Vector<const uc16>(input_data, 6)).ToHandleChecked(); 1026 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 1027 Address start_adr = seq_input->GetCharsAddress(); 1028 1029 int output[4]; 1030 NativeRegExpMacroAssembler::Result result = 1031 Execute(*code, 1032 *input, 1033 0, 1034 start_adr, 1035 start_adr + input->length() * 2, 1036 output); 1037 1038 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1039 CHECK_EQ(0, output[0]); 1040 CHECK_EQ(2, output[1]); 1041 CHECK_EQ(6, output[2]); 1042 CHECK_EQ(-1, output[3]); 1043 } 1044 1045 1046 1047 TEST(MacroAssemblernativeAtStart) { 1048 v8::V8::Initialize(); 1049 ContextInitializer initializer; 1050 Isolate* isolate = CcTest::i_isolate(); 1051 Factory* factory = isolate->factory(); 1052 Zone zone(isolate); 1053 1054 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone); 1055 1056 Label not_at_start, newline, fail; 1057 m.CheckNotAtStart(¬_at_start); 1058 // Check that prevchar = '\n' and current = 'f'. 1059 m.CheckCharacter('\n', &newline); 1060 m.Bind(&fail); 1061 m.Fail(); 1062 m.Bind(&newline); 1063 m.LoadCurrentCharacter(0, &fail); 1064 m.CheckNotCharacter('f', &fail); 1065 m.Succeed(); 1066 1067 m.Bind(¬_at_start); 1068 // Check that prevchar = 'o' and current = 'b'. 1069 Label prevo; 1070 m.CheckCharacter('o', &prevo); 1071 m.Fail(); 1072 m.Bind(&prevo); 1073 m.LoadCurrentCharacter(0, &fail); 1074 m.CheckNotCharacter('b', &fail); 1075 m.Succeed(); 1076 1077 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)"); 1078 Handle<Object> code_object = m.GetCode(source); 1079 Handle<Code> code = Handle<Code>::cast(code_object); 1080 1081 Handle<String> input = factory->NewStringFromStaticChars("foobar"); 1082 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1083 Address start_adr = seq_input->GetCharsAddress(); 1084 1085 NativeRegExpMacroAssembler::Result result = 1086 Execute(*code, 1087 *input, 1088 0, 1089 start_adr, 1090 start_adr + input->length(), 1091 NULL); 1092 1093 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1094 1095 result = Execute(*code, 1096 *input, 1097 3, 1098 start_adr + 3, 1099 start_adr + input->length(), 1100 NULL); 1101 1102 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1103 } 1104 1105 1106 TEST(MacroAssemblerNativeBackRefNoCase) { 1107 v8::V8::Initialize(); 1108 ContextInitializer initializer; 1109 Isolate* isolate = CcTest::i_isolate(); 1110 Factory* factory = isolate->factory(); 1111 Zone zone(isolate); 1112 1113 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone); 1114 1115 Label fail, succ; 1116 1117 m.WriteCurrentPositionToRegister(0, 0); 1118 m.WriteCurrentPositionToRegister(2, 0); 1119 m.AdvanceCurrentPosition(3); 1120 m.WriteCurrentPositionToRegister(3, 0); 1121 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC". 1122 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC". 1123 Label expected_fail; 1124 m.CheckNotBackReferenceIgnoreCase(2, &expected_fail); 1125 m.Bind(&fail); 1126 m.Fail(); 1127 1128 m.Bind(&expected_fail); 1129 m.AdvanceCurrentPosition(3); // Skip "xYz" 1130 m.CheckNotBackReferenceIgnoreCase(2, &succ); 1131 m.Fail(); 1132 1133 m.Bind(&succ); 1134 m.WriteCurrentPositionToRegister(1, 0); 1135 m.Succeed(); 1136 1137 Handle<String> source = 1138 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)"); 1139 Handle<Object> code_object = m.GetCode(source); 1140 Handle<Code> code = Handle<Code>::cast(code_object); 1141 1142 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab"); 1143 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1144 Address start_adr = seq_input->GetCharsAddress(); 1145 1146 int output[4]; 1147 NativeRegExpMacroAssembler::Result result = 1148 Execute(*code, 1149 *input, 1150 0, 1151 start_adr, 1152 start_adr + input->length(), 1153 output); 1154 1155 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1156 CHECK_EQ(0, output[0]); 1157 CHECK_EQ(12, output[1]); 1158 CHECK_EQ(0, output[2]); 1159 CHECK_EQ(3, output[3]); 1160 } 1161 1162 1163 1164 TEST(MacroAssemblerNativeRegisters) { 1165 v8::V8::Initialize(); 1166 ContextInitializer initializer; 1167 Isolate* isolate = CcTest::i_isolate(); 1168 Factory* factory = isolate->factory(); 1169 Zone zone(isolate); 1170 1171 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 6, &zone); 1172 1173 uc16 foo_chars[3] = {'f', 'o', 'o'}; 1174 Vector<const uc16> foo(foo_chars, 3); 1175 1176 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt }; 1177 Label fail; 1178 Label backtrack; 1179 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0] 1180 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1181 m.PushBacktrack(&backtrack); 1182 m.WriteStackPointerToRegister(sp); 1183 // Fill stack and registers 1184 m.AdvanceCurrentPosition(2); 1185 m.WriteCurrentPositionToRegister(out1, 0); 1186 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1187 m.PushBacktrack(&fail); 1188 // Drop backtrack stack frames. 1189 m.ReadStackPointerFromRegister(sp); 1190 // And take the first backtrack (to &backtrack) 1191 m.Backtrack(); 1192 1193 m.PushCurrentPosition(); 1194 m.AdvanceCurrentPosition(2); 1195 m.PopCurrentPosition(); 1196 1197 m.Bind(&backtrack); 1198 m.PopRegister(out1); 1199 m.ReadCurrentPositionFromRegister(out1); 1200 m.AdvanceCurrentPosition(3); 1201 m.WriteCurrentPositionToRegister(out2, 0); // [0,3] 1202 1203 Label loop; 1204 m.SetRegister(loop_cnt, 0); // loop counter 1205 m.Bind(&loop); 1206 m.AdvanceRegister(loop_cnt, 1); 1207 m.AdvanceCurrentPosition(1); 1208 m.IfRegisterLT(loop_cnt, 3, &loop); 1209 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6] 1210 1211 Label loop2; 1212 m.SetRegister(loop_cnt, 2); // loop counter 1213 m.Bind(&loop2); 1214 m.AdvanceRegister(loop_cnt, -1); 1215 m.AdvanceCurrentPosition(1); 1216 m.IfRegisterGE(loop_cnt, 0, &loop2); 1217 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9] 1218 1219 Label loop3; 1220 Label exit_loop3; 1221 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1222 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1223 m.ReadCurrentPositionFromRegister(out3); 1224 m.Bind(&loop3); 1225 m.AdvanceCurrentPosition(1); 1226 m.CheckGreedyLoop(&exit_loop3); 1227 m.GoTo(&loop3); 1228 m.Bind(&exit_loop3); 1229 m.PopCurrentPosition(); 1230 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1] 1231 1232 m.Succeed(); 1233 1234 m.Bind(&fail); 1235 m.Fail(); 1236 1237 Handle<String> source = factory->NewStringFromStaticChars("<loop test>"); 1238 Handle<Object> code_object = m.GetCode(source); 1239 Handle<Code> code = Handle<Code>::cast(code_object); 1240 1241 // String long enough for test (content doesn't matter). 1242 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo"); 1243 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1244 Address start_adr = seq_input->GetCharsAddress(); 1245 1246 int output[6]; 1247 NativeRegExpMacroAssembler::Result result = 1248 Execute(*code, 1249 *input, 1250 0, 1251 start_adr, 1252 start_adr + input->length(), 1253 output); 1254 1255 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1256 CHECK_EQ(0, output[0]); 1257 CHECK_EQ(3, output[1]); 1258 CHECK_EQ(6, output[2]); 1259 CHECK_EQ(9, output[3]); 1260 CHECK_EQ(9, output[4]); 1261 CHECK_EQ(-1, output[5]); 1262 } 1263 1264 1265 TEST(MacroAssemblerStackOverflow) { 1266 v8::V8::Initialize(); 1267 ContextInitializer initializer; 1268 Isolate* isolate = CcTest::i_isolate(); 1269 Factory* factory = isolate->factory(); 1270 Zone zone(isolate); 1271 1272 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone); 1273 1274 Label loop; 1275 m.Bind(&loop); 1276 m.PushBacktrack(&loop); 1277 m.GoTo(&loop); 1278 1279 Handle<String> source = 1280 factory->NewStringFromStaticChars("<stack overflow test>"); 1281 Handle<Object> code_object = m.GetCode(source); 1282 Handle<Code> code = Handle<Code>::cast(code_object); 1283 1284 // String long enough for test (content doesn't matter). 1285 Handle<String> input = factory->NewStringFromStaticChars("dummy"); 1286 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1287 Address start_adr = seq_input->GetCharsAddress(); 1288 1289 NativeRegExpMacroAssembler::Result result = 1290 Execute(*code, 1291 *input, 1292 0, 1293 start_adr, 1294 start_adr + input->length(), 1295 NULL); 1296 1297 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result); 1298 CHECK(isolate->has_pending_exception()); 1299 isolate->clear_pending_exception(); 1300 } 1301 1302 1303 TEST(MacroAssemblerNativeLotsOfRegisters) { 1304 v8::V8::Initialize(); 1305 ContextInitializer initializer; 1306 Isolate* isolate = CcTest::i_isolate(); 1307 Factory* factory = isolate->factory(); 1308 Zone zone(isolate); 1309 1310 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 2, &zone); 1311 1312 // At least 2048, to ensure the allocated space for registers 1313 // span one full page. 1314 const int large_number = 8000; 1315 m.WriteCurrentPositionToRegister(large_number, 42); 1316 m.WriteCurrentPositionToRegister(0, 0); 1317 m.WriteCurrentPositionToRegister(1, 1); 1318 Label done; 1319 m.CheckNotBackReference(0, &done); // Performs a system-stack push. 1320 m.Bind(&done); 1321 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck); 1322 m.PopRegister(1); 1323 m.Succeed(); 1324 1325 Handle<String> source = 1326 factory->NewStringFromStaticChars("<huge register space test>"); 1327 Handle<Object> code_object = m.GetCode(source); 1328 Handle<Code> code = Handle<Code>::cast(code_object); 1329 1330 // String long enough for test (content doesn't matter). 1331 Handle<String> input = factory->NewStringFromStaticChars("sample text"); 1332 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1333 Address start_adr = seq_input->GetCharsAddress(); 1334 1335 int captures[2]; 1336 NativeRegExpMacroAssembler::Result result = 1337 Execute(*code, 1338 *input, 1339 0, 1340 start_adr, 1341 start_adr + input->length(), 1342 captures); 1343 1344 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1345 CHECK_EQ(0, captures[0]); 1346 CHECK_EQ(42, captures[1]); 1347 1348 isolate->clear_pending_exception(); 1349 } 1350 1351 #else // V8_INTERPRETED_REGEXP 1352 1353 TEST(MacroAssembler) { 1354 byte codes[1024]; 1355 Zone zone(CcTest::i_isolate()); 1356 RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone); 1357 // ^f(o)o. 1358 Label start, fail, backtrack; 1359 1360 m.SetRegister(4, 42); 1361 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck); 1362 m.AdvanceRegister(4, 42); 1363 m.GoTo(&start); 1364 m.Fail(); 1365 m.Bind(&start); 1366 m.PushBacktrack(&fail); 1367 m.CheckNotAtStart(NULL); 1368 m.LoadCurrentCharacter(0, NULL); 1369 m.CheckNotCharacter('f', NULL); 1370 m.LoadCurrentCharacter(1, NULL); 1371 m.CheckNotCharacter('o', NULL); 1372 m.LoadCurrentCharacter(2, NULL); 1373 m.CheckNotCharacter('o', NULL); 1374 m.WriteCurrentPositionToRegister(0, 0); 1375 m.WriteCurrentPositionToRegister(1, 3); 1376 m.WriteCurrentPositionToRegister(2, 1); 1377 m.WriteCurrentPositionToRegister(3, 2); 1378 m.AdvanceCurrentPosition(3); 1379 m.PushBacktrack(&backtrack); 1380 m.Succeed(); 1381 m.Bind(&backtrack); 1382 m.ClearRegisters(2, 3); 1383 m.Backtrack(); 1384 m.Bind(&fail); 1385 m.PopRegister(0); 1386 m.Fail(); 1387 1388 Isolate* isolate = CcTest::i_isolate(); 1389 Factory* factory = isolate->factory(); 1390 HandleScope scope(isolate); 1391 1392 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o"); 1393 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source)); 1394 int captures[5]; 1395 1396 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'}; 1397 Handle<String> f1_16 = factory->NewStringFromTwoByte( 1398 Vector<const uc16>(str1, 6)).ToHandleChecked(); 1399 1400 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0)); 1401 CHECK_EQ(0, captures[0]); 1402 CHECK_EQ(3, captures[1]); 1403 CHECK_EQ(1, captures[2]); 1404 CHECK_EQ(2, captures[3]); 1405 CHECK_EQ(84, captures[4]); 1406 1407 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'}; 1408 Handle<String> f2_16 = factory->NewStringFromTwoByte( 1409 Vector<const uc16>(str2, 6)).ToHandleChecked(); 1410 1411 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0)); 1412 CHECK_EQ(42, captures[0]); 1413 } 1414 1415 #endif // V8_INTERPRETED_REGEXP 1416 1417 1418 TEST(AddInverseToTable) { 1419 static const int kLimit = 1000; 1420 static const int kRangeCount = 16; 1421 for (int t = 0; t < 10; t++) { 1422 Zone zone(CcTest::i_isolate()); 1423 ZoneList<CharacterRange>* ranges = 1424 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone); 1425 for (int i = 0; i < kRangeCount; i++) { 1426 int from = PseudoRandom(t + 87, i + 25) % kLimit; 1427 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); 1428 if (to > kLimit) to = kLimit; 1429 ranges->Add(CharacterRange(from, to), &zone); 1430 } 1431 DispatchTable table(&zone); 1432 DispatchTableConstructor cons(&table, false, &zone); 1433 cons.set_choice_index(0); 1434 cons.AddInverse(ranges); 1435 for (int i = 0; i < kLimit; i++) { 1436 bool is_on = false; 1437 for (int j = 0; !is_on && j < kRangeCount; j++) 1438 is_on = ranges->at(j).Contains(i); 1439 OutSet* set = table.Get(i); 1440 CHECK_EQ(is_on, set->Get(0) == false); 1441 } 1442 } 1443 Zone zone(CcTest::i_isolate()); 1444 ZoneList<CharacterRange>* ranges = 1445 new(&zone) ZoneList<CharacterRange>(1, &zone); 1446 ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone); 1447 DispatchTable table(&zone); 1448 DispatchTableConstructor cons(&table, false, &zone); 1449 cons.set_choice_index(0); 1450 cons.AddInverse(ranges); 1451 CHECK(!table.Get(0xFFFE)->Get(0)); 1452 CHECK(table.Get(0xFFFF)->Get(0)); 1453 } 1454 1455 1456 static uc32 canonicalize(uc32 c) { 1457 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; 1458 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); 1459 if (count == 0) { 1460 return c; 1461 } else { 1462 CHECK_EQ(1, count); 1463 return canon[0]; 1464 } 1465 } 1466 1467 1468 TEST(LatinCanonicalize) { 1469 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1470 for (char lower = 'a'; lower <= 'z'; lower++) { 1471 char upper = lower + ('A' - 'a'); 1472 CHECK_EQ(canonicalize(lower), canonicalize(upper)); 1473 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1474 int length = un_canonicalize.get(lower, '\0', uncanon); 1475 CHECK_EQ(2, length); 1476 CHECK_EQ(upper, uncanon[0]); 1477 CHECK_EQ(lower, uncanon[1]); 1478 } 1479 for (uc32 c = 128; c < (1 << 21); c++) 1480 CHECK_GE(canonicalize(c), 128); 1481 unibrow::Mapping<unibrow::ToUppercase> to_upper; 1482 // Canonicalization is only defined for the Basic Multilingual Plane. 1483 for (uc32 c = 0; c < (1 << 16); c++) { 1484 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth]; 1485 int length = to_upper.get(c, '\0', upper); 1486 if (length == 0) { 1487 length = 1; 1488 upper[0] = c; 1489 } 1490 uc32 u = upper[0]; 1491 if (length > 1 || (c >= 128 && u < 128)) 1492 u = c; 1493 CHECK_EQ(u, canonicalize(c)); 1494 } 1495 } 1496 1497 1498 static uc32 CanonRangeEnd(uc32 c) { 1499 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth]; 1500 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL); 1501 if (count == 0) { 1502 return c; 1503 } else { 1504 CHECK_EQ(1, count); 1505 return canon[0]; 1506 } 1507 } 1508 1509 1510 TEST(RangeCanonicalization) { 1511 // Check that we arrive at the same result when using the basic 1512 // range canonicalization primitives as when using immediate 1513 // canonicalization. 1514 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1515 int block_start = 0; 1516 while (block_start <= 0xFFFF) { 1517 uc32 block_end = CanonRangeEnd(block_start); 1518 unsigned block_length = block_end - block_start + 1; 1519 if (block_length > 1) { 1520 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1521 int first_length = un_canonicalize.get(block_start, '\0', first); 1522 for (unsigned i = 1; i < block_length; i++) { 1523 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1524 int succ_length = un_canonicalize.get(block_start + i, '\0', succ); 1525 CHECK_EQ(first_length, succ_length); 1526 for (int j = 0; j < succ_length; j++) { 1527 int calc = first[j] + i; 1528 int found = succ[j]; 1529 CHECK_EQ(calc, found); 1530 } 1531 } 1532 } 1533 block_start = block_start + block_length; 1534 } 1535 } 1536 1537 1538 TEST(UncanonicalizeEquivalence) { 1539 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1540 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1541 for (int i = 0; i < (1 << 16); i++) { 1542 int length = un_canonicalize.get(i, '\0', chars); 1543 for (int j = 0; j < length; j++) { 1544 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1545 int length2 = un_canonicalize.get(chars[j], '\0', chars2); 1546 CHECK_EQ(length, length2); 1547 for (int k = 0; k < length; k++) 1548 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k])); 1549 } 1550 } 1551 } 1552 1553 1554 static void TestRangeCaseIndependence(CharacterRange input, 1555 Vector<CharacterRange> expected) { 1556 Zone zone(CcTest::i_isolate()); 1557 int count = expected.length(); 1558 ZoneList<CharacterRange>* list = 1559 new(&zone) ZoneList<CharacterRange>(count, &zone); 1560 input.AddCaseEquivalents(list, false, &zone); 1561 CHECK_EQ(count, list->length()); 1562 for (int i = 0; i < list->length(); i++) { 1563 CHECK_EQ(expected[i].from(), list->at(i).from()); 1564 CHECK_EQ(expected[i].to(), list->at(i).to()); 1565 } 1566 } 1567 1568 1569 static void TestSimpleRangeCaseIndependence(CharacterRange input, 1570 CharacterRange expected) { 1571 EmbeddedVector<CharacterRange, 1> vector; 1572 vector[0] = expected; 1573 TestRangeCaseIndependence(input, vector); 1574 } 1575 1576 1577 TEST(CharacterRangeCaseIndependence) { 1578 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'), 1579 CharacterRange::Singleton('A')); 1580 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'), 1581 CharacterRange::Singleton('Z')); 1582 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'), 1583 CharacterRange('A', 'Z')); 1584 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'), 1585 CharacterRange('C', 'F')); 1586 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'), 1587 CharacterRange('A', 'B')); 1588 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'), 1589 CharacterRange('Y', 'Z')); 1590 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1), 1591 CharacterRange('A', 'Z')); 1592 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'), 1593 CharacterRange('a', 'z')); 1594 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'), 1595 CharacterRange('c', 'f')); 1596 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1), 1597 CharacterRange('a', 'z')); 1598 // Here we need to add [l-z] to complete the case independence of 1599 // [A-Za-z] but we expect [a-z] to be added since we always add a 1600 // whole block at a time. 1601 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'), 1602 CharacterRange('a', 'z')); 1603 } 1604 1605 1606 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) { 1607 if (ranges == NULL) 1608 return false; 1609 for (int i = 0; i < ranges->length(); i++) { 1610 CharacterRange range = ranges->at(i); 1611 if (range.from() <= c && c <= range.to()) 1612 return true; 1613 } 1614 return false; 1615 } 1616 1617 1618 TEST(CharClassDifference) { 1619 Zone zone(CcTest::i_isolate()); 1620 ZoneList<CharacterRange>* base = 1621 new(&zone) ZoneList<CharacterRange>(1, &zone); 1622 base->Add(CharacterRange::Everything(), &zone); 1623 Vector<const int> overlay = CharacterRange::GetWordBounds(); 1624 ZoneList<CharacterRange>* included = NULL; 1625 ZoneList<CharacterRange>* excluded = NULL; 1626 CharacterRange::Split(base, overlay, &included, &excluded, &zone); 1627 for (int i = 0; i < (1 << 16); i++) { 1628 bool in_base = InClass(i, base); 1629 if (in_base) { 1630 bool in_overlay = false; 1631 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) { 1632 if (overlay[j] <= i && i < overlay[j+1]) 1633 in_overlay = true; 1634 } 1635 CHECK_EQ(in_overlay, InClass(i, included)); 1636 CHECK_EQ(!in_overlay, InClass(i, excluded)); 1637 } else { 1638 CHECK(!InClass(i, included)); 1639 CHECK(!InClass(i, excluded)); 1640 } 1641 } 1642 } 1643 1644 1645 TEST(CanonicalizeCharacterSets) { 1646 Zone zone(CcTest::i_isolate()); 1647 ZoneList<CharacterRange>* list = 1648 new(&zone) ZoneList<CharacterRange>(4, &zone); 1649 CharacterSet set(list); 1650 1651 list->Add(CharacterRange(10, 20), &zone); 1652 list->Add(CharacterRange(30, 40), &zone); 1653 list->Add(CharacterRange(50, 60), &zone); 1654 set.Canonicalize(); 1655 DCHECK_EQ(3, list->length()); 1656 DCHECK_EQ(10, list->at(0).from()); 1657 DCHECK_EQ(20, list->at(0).to()); 1658 DCHECK_EQ(30, list->at(1).from()); 1659 DCHECK_EQ(40, list->at(1).to()); 1660 DCHECK_EQ(50, list->at(2).from()); 1661 DCHECK_EQ(60, list->at(2).to()); 1662 1663 list->Rewind(0); 1664 list->Add(CharacterRange(10, 20), &zone); 1665 list->Add(CharacterRange(50, 60), &zone); 1666 list->Add(CharacterRange(30, 40), &zone); 1667 set.Canonicalize(); 1668 DCHECK_EQ(3, list->length()); 1669 DCHECK_EQ(10, list->at(0).from()); 1670 DCHECK_EQ(20, list->at(0).to()); 1671 DCHECK_EQ(30, list->at(1).from()); 1672 DCHECK_EQ(40, list->at(1).to()); 1673 DCHECK_EQ(50, list->at(2).from()); 1674 DCHECK_EQ(60, list->at(2).to()); 1675 1676 list->Rewind(0); 1677 list->Add(CharacterRange(30, 40), &zone); 1678 list->Add(CharacterRange(10, 20), &zone); 1679 list->Add(CharacterRange(25, 25), &zone); 1680 list->Add(CharacterRange(100, 100), &zone); 1681 list->Add(CharacterRange(1, 1), &zone); 1682 set.Canonicalize(); 1683 DCHECK_EQ(5, list->length()); 1684 DCHECK_EQ(1, list->at(0).from()); 1685 DCHECK_EQ(1, list->at(0).to()); 1686 DCHECK_EQ(10, list->at(1).from()); 1687 DCHECK_EQ(20, list->at(1).to()); 1688 DCHECK_EQ(25, list->at(2).from()); 1689 DCHECK_EQ(25, list->at(2).to()); 1690 DCHECK_EQ(30, list->at(3).from()); 1691 DCHECK_EQ(40, list->at(3).to()); 1692 DCHECK_EQ(100, list->at(4).from()); 1693 DCHECK_EQ(100, list->at(4).to()); 1694 1695 list->Rewind(0); 1696 list->Add(CharacterRange(10, 19), &zone); 1697 list->Add(CharacterRange(21, 30), &zone); 1698 list->Add(CharacterRange(20, 20), &zone); 1699 set.Canonicalize(); 1700 DCHECK_EQ(1, list->length()); 1701 DCHECK_EQ(10, list->at(0).from()); 1702 DCHECK_EQ(30, list->at(0).to()); 1703 } 1704 1705 1706 TEST(CharacterRangeMerge) { 1707 Zone zone(CcTest::i_isolate()); 1708 ZoneList<CharacterRange> l1(4, &zone); 1709 ZoneList<CharacterRange> l2(4, &zone); 1710 // Create all combinations of intersections of ranges, both singletons and 1711 // longer. 1712 1713 int offset = 0; 1714 1715 // The five kinds of singleton intersections: 1716 // X 1717 // Y - outside before 1718 // Y - outside touching start 1719 // Y - overlap 1720 // Y - outside touching end 1721 // Y - outside after 1722 1723 for (int i = 0; i < 5; i++) { 1724 l1.Add(CharacterRange::Singleton(offset + 2), &zone); 1725 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1726 offset += 6; 1727 } 1728 1729 // The seven kinds of singleton/non-singleton intersections: 1730 // XXX 1731 // Y - outside before 1732 // Y - outside touching start 1733 // Y - inside touching start 1734 // Y - entirely inside 1735 // Y - inside touching end 1736 // Y - outside touching end 1737 // Y - disjoint after 1738 1739 for (int i = 0; i < 7; i++) { 1740 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone); 1741 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1742 offset += 8; 1743 } 1744 1745 // The eleven kinds of non-singleton intersections: 1746 // 1747 // XXXXXXXX 1748 // YYYY - outside before. 1749 // YYYY - outside touching start. 1750 // YYYY - overlapping start 1751 // YYYY - inside touching start 1752 // YYYY - entirely inside 1753 // YYYY - inside touching end 1754 // YYYY - overlapping end 1755 // YYYY - outside touching end 1756 // YYYY - outside after 1757 // YYYYYYYY - identical 1758 // YYYYYYYYYYYY - containing entirely. 1759 1760 for (int i = 0; i < 9; i++) { 1761 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8. 1762 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone); 1763 offset += 22; 1764 } 1765 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1766 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1767 offset += 22; 1768 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1769 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone); 1770 offset += 22; 1771 1772 // Different kinds of multi-range overlap: 1773 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX 1774 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y 1775 1776 l1.Add(CharacterRange::Range(offset, offset + 21), &zone); 1777 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone); 1778 for (int i = 0; i < 6; i++) { 1779 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone); 1780 l2.Add(CharacterRange::Singleton(offset + 8), &zone); 1781 offset += 9; 1782 } 1783 1784 DCHECK(CharacterRange::IsCanonical(&l1)); 1785 DCHECK(CharacterRange::IsCanonical(&l2)); 1786 1787 ZoneList<CharacterRange> first_only(4, &zone); 1788 ZoneList<CharacterRange> second_only(4, &zone); 1789 ZoneList<CharacterRange> both(4, &zone); 1790 } 1791 1792 1793 TEST(Graph) { 1794 Execute("\\b\\w+\\b", false, true, true); 1795 } 1796