1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 29 #include <stdlib.h> 30 31 #include "src/v8.h" 32 33 #include "src/ast.h" 34 #include "src/char-predicates-inl.h" 35 #include "src/jsregexp.h" 36 #include "src/parser.h" 37 #include "src/regexp-macro-assembler-irregexp.h" 38 #include "src/regexp-macro-assembler.h" 39 #include "src/string-stream.h" 40 #include "src/zone-inl.h" 41 #ifdef V8_INTERPRETED_REGEXP 42 #include "src/interpreter-irregexp.h" 43 #else // V8_INTERPRETED_REGEXP 44 #include "src/macro-assembler.h" 45 #include "src/code.h" 46 #if V8_TARGET_ARCH_ARM 47 #include "src/arm/assembler-arm.h" 48 #include "src/arm/macro-assembler-arm.h" 49 #include "src/arm/regexp-macro-assembler-arm.h" 50 #endif 51 #if V8_TARGET_ARCH_ARM64 52 #include "src/arm64/assembler-arm64.h" 53 #include "src/arm64/macro-assembler-arm64.h" 54 #include "src/arm64/regexp-macro-assembler-arm64.h" 55 #endif 56 #if V8_TARGET_ARCH_MIPS 57 #include "src/mips/assembler-mips.h" 58 #include "src/mips/macro-assembler-mips.h" 59 #include "src/mips/regexp-macro-assembler-mips.h" 60 #endif 61 #if V8_TARGET_ARCH_X64 62 #include "src/x64/assembler-x64.h" 63 #include "src/x64/macro-assembler-x64.h" 64 #include "src/x64/regexp-macro-assembler-x64.h" 65 #endif 66 #if V8_TARGET_ARCH_IA32 67 #include "src/ia32/assembler-ia32.h" 68 #include "src/ia32/macro-assembler-ia32.h" 69 #include "src/ia32/regexp-macro-assembler-ia32.h" 70 #endif 71 #if V8_TARGET_ARCH_X87 72 #include "src/x87/assembler-x87.h" 73 #include "src/x87/macro-assembler-x87.h" 74 #include "src/x87/regexp-macro-assembler-x87.h" 75 #endif 76 #endif // V8_INTERPRETED_REGEXP 77 #include "test/cctest/cctest.h" 78 79 using namespace v8::internal; 80 81 82 static bool CheckParse(const char* input) { 83 V8::Initialize(NULL); 84 v8::HandleScope scope(CcTest::isolate()); 85 Zone zone(CcTest::i_isolate()); 86 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 87 RegExpCompileData result; 88 return v8::internal::RegExpParser::ParseRegExp( 89 &reader, false, &result, &zone); 90 } 91 92 93 static SmartArrayPointer<const char> Parse(const char* input) { 94 V8::Initialize(NULL); 95 v8::HandleScope scope(CcTest::isolate()); 96 Zone zone(CcTest::i_isolate()); 97 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 98 RegExpCompileData result; 99 CHECK(v8::internal::RegExpParser::ParseRegExp( 100 &reader, false, &result, &zone)); 101 CHECK(result.tree != NULL); 102 CHECK(result.error.is_null()); 103 SmartArrayPointer<const char> output = result.tree->ToString(&zone); 104 return output; 105 } 106 107 108 static bool CheckSimple(const char* input) { 109 V8::Initialize(NULL); 110 v8::HandleScope scope(CcTest::isolate()); 111 Zone zone(CcTest::i_isolate()); 112 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 113 RegExpCompileData result; 114 CHECK(v8::internal::RegExpParser::ParseRegExp( 115 &reader, false, &result, &zone)); 116 CHECK(result.tree != NULL); 117 CHECK(result.error.is_null()); 118 return result.simple; 119 } 120 121 struct MinMaxPair { 122 int min_match; 123 int max_match; 124 }; 125 126 127 static MinMaxPair CheckMinMaxMatch(const char* input) { 128 V8::Initialize(NULL); 129 v8::HandleScope scope(CcTest::isolate()); 130 Zone zone(CcTest::i_isolate()); 131 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 132 RegExpCompileData result; 133 CHECK(v8::internal::RegExpParser::ParseRegExp( 134 &reader, false, &result, &zone)); 135 CHECK(result.tree != NULL); 136 CHECK(result.error.is_null()); 137 int min_match = result.tree->min_match(); 138 int max_match = result.tree->max_match(); 139 MinMaxPair pair = { min_match, max_match }; 140 return pair; 141 } 142 143 144 #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input)) 145 #define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, Parse(input).get()) 146 #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input)); 147 #define CHECK_MIN_MAX(input, min, max) \ 148 { MinMaxPair min_max = CheckMinMaxMatch(input); \ 149 CHECK_EQ(min, min_max.min_match); \ 150 CHECK_EQ(max, min_max.max_match); \ 151 } 152 153 TEST(Parser) { 154 V8::Initialize(NULL); 155 156 CHECK_PARSE_ERROR("?"); 157 158 CHECK_PARSE_EQ("abc", "'abc'"); 159 CHECK_PARSE_EQ("", "%"); 160 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); 161 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 162 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); 163 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); 164 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); 165 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); 166 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); 167 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))"); 168 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))"); 169 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))"); 170 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))"); 171 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))"); 172 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))"); 173 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))"); 174 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))"); 175 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))"); 176 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))"); 177 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))"); 178 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))"); 179 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'"); 180 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')"); 181 CHECK_PARSE_EQ("(?:foo)", "'foo'"); 182 CHECK_PARSE_EQ("(?: foo )", "' foo '"); 183 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); 184 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); 185 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); 186 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); 187 CHECK_PARSE_EQ("()", "(^ %)"); 188 CHECK_PARSE_EQ("(?=)", "(-> + %)"); 189 CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows 190 CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252 191 CHECK_PARSE_EQ("[x]", "[x]"); 192 CHECK_PARSE_EQ("[xyz]", "[x y z]"); 193 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); 194 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); 195 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); 196 CHECK_PARSE_EQ("]", "']'"); 197 CHECK_PARSE_EQ("}", "'}'"); 198 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); 199 CHECK_PARSE_EQ("[\\d]", "[0-9]"); 200 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]"); 201 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]"); 202 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]"); 203 CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]"); 204 // Control character outside character class. 205 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", 206 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); 207 CHECK_PARSE_EQ("\\c!", "'\\c!'"); 208 CHECK_PARSE_EQ("\\c_", "'\\c_'"); 209 CHECK_PARSE_EQ("\\c~", "'\\c~'"); 210 CHECK_PARSE_EQ("\\c1", "'\\c1'"); 211 // Control character inside character class. 212 CHECK_PARSE_EQ("[\\c!]", "[\\ c !]"); 213 CHECK_PARSE_EQ("[\\c_]", "[\\x1f]"); 214 CHECK_PARSE_EQ("[\\c~]", "[\\ c ~]"); 215 CHECK_PARSE_EQ("[\\ca]", "[\\x01]"); 216 CHECK_PARSE_EQ("[\\cz]", "[\\x1a]"); 217 CHECK_PARSE_EQ("[\\cA]", "[\\x01]"); 218 CHECK_PARSE_EQ("[\\cZ]", "[\\x1a]"); 219 CHECK_PARSE_EQ("[\\c1]", "[\\x11]"); 220 221 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); 222 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); 223 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); 224 CHECK_PARSE_EQ("\\0", "'\\x00'"); 225 CHECK_PARSE_EQ("\\8", "'8'"); 226 CHECK_PARSE_EQ("\\9", "'9'"); 227 CHECK_PARSE_EQ("\\11", "'\\x09'"); 228 CHECK_PARSE_EQ("\\11a", "'\\x09a'"); 229 CHECK_PARSE_EQ("\\011", "'\\x09'"); 230 CHECK_PARSE_EQ("\\00011", "'\\x0011'"); 231 CHECK_PARSE_EQ("\\118", "'\\x098'"); 232 CHECK_PARSE_EQ("\\111", "'I'"); 233 CHECK_PARSE_EQ("\\1111", "'I1'"); 234 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))"); 235 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))"); 236 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))"); 237 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')"); 238 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')" 239 " (# 0 - g (<- 1)))"); 240 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')" 241 " (# 0 - g (<- 2)))"); 242 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')" 243 " (# 0 - g (<- 3)))"); 244 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')" 245 " (# 0 - g '\\x04'))"); 246 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", 247 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 248 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); 249 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", 250 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 251 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')"); 252 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))"); 253 CHECK_PARSE_EQ("(a\\1)", "(^ 'a')"); 254 CHECK_PARSE_EQ("(\\1a)", "(^ 'a')"); 255 CHECK_PARSE_EQ("(?=a)?a", "'a'"); 256 CHECK_PARSE_EQ("(?=a){0,10}a", "'a'"); 257 CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')"); 258 CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')"); 259 CHECK_PARSE_EQ("(?!a)?a", "'a'"); 260 CHECK_PARSE_EQ("\\1(a)", "(^ 'a')"); 261 CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))"); 262 CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))"); 263 CHECK_PARSE_EQ("[\\0]", "[\\x00]"); 264 CHECK_PARSE_EQ("[\\11]", "[\\x09]"); 265 CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]"); 266 CHECK_PARSE_EQ("[\\011]", "[\\x09]"); 267 CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]"); 268 CHECK_PARSE_EQ("[\\118]", "[\\x09 8]"); 269 CHECK_PARSE_EQ("[\\111]", "[I]"); 270 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); 271 CHECK_PARSE_EQ("\\x34", "'\x34'"); 272 CHECK_PARSE_EQ("\\x60", "'\x60'"); 273 CHECK_PARSE_EQ("\\x3z", "'x3z'"); 274 CHECK_PARSE_EQ("\\c", "'\\c'"); 275 CHECK_PARSE_EQ("\\u0034", "'\x34'"); 276 CHECK_PARSE_EQ("\\u003z", "'u003z'"); 277 CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))"); 278 279 CHECK_SIMPLE("", false); 280 CHECK_SIMPLE("a", true); 281 CHECK_SIMPLE("a|b", false); 282 CHECK_SIMPLE("a\\n", false); 283 CHECK_SIMPLE("^a", false); 284 CHECK_SIMPLE("a$", false); 285 CHECK_SIMPLE("a\\b!", false); 286 CHECK_SIMPLE("a\\Bb", false); 287 CHECK_SIMPLE("a*", false); 288 CHECK_SIMPLE("a*?", false); 289 CHECK_SIMPLE("a?", false); 290 CHECK_SIMPLE("a??", false); 291 CHECK_SIMPLE("a{0,1}?", false); 292 CHECK_SIMPLE("a{1,1}?", false); 293 CHECK_SIMPLE("a{1,2}?", false); 294 CHECK_SIMPLE("a+?", false); 295 CHECK_SIMPLE("(a)", false); 296 CHECK_SIMPLE("(a)\\1", false); 297 CHECK_SIMPLE("(\\1a)", false); 298 CHECK_SIMPLE("\\1(a)", false); 299 CHECK_SIMPLE("a\\s", false); 300 CHECK_SIMPLE("a\\S", false); 301 CHECK_SIMPLE("a\\d", false); 302 CHECK_SIMPLE("a\\D", false); 303 CHECK_SIMPLE("a\\w", false); 304 CHECK_SIMPLE("a\\W", false); 305 CHECK_SIMPLE("a.", false); 306 CHECK_SIMPLE("a\\q", false); 307 CHECK_SIMPLE("a[a]", false); 308 CHECK_SIMPLE("a[^a]", false); 309 CHECK_SIMPLE("a[a-z]", false); 310 CHECK_SIMPLE("a[\\q]", false); 311 CHECK_SIMPLE("a(?:b)", false); 312 CHECK_SIMPLE("a(?=b)", false); 313 CHECK_SIMPLE("a(?!b)", false); 314 CHECK_SIMPLE("\\x60", false); 315 CHECK_SIMPLE("\\u0060", false); 316 CHECK_SIMPLE("\\cA", false); 317 CHECK_SIMPLE("\\q", false); 318 CHECK_SIMPLE("\\1112", false); 319 CHECK_SIMPLE("\\0", false); 320 CHECK_SIMPLE("(a)\\1", false); 321 CHECK_SIMPLE("(?=a)?a", false); 322 CHECK_SIMPLE("(?!a)?a\\1", false); 323 CHECK_SIMPLE("(?:(?=a))a\\1", false); 324 325 CHECK_PARSE_EQ("a{}", "'a{}'"); 326 CHECK_PARSE_EQ("a{,}", "'a{,}'"); 327 CHECK_PARSE_EQ("a{", "'a{'"); 328 CHECK_PARSE_EQ("a{z}", "'a{z}'"); 329 CHECK_PARSE_EQ("a{1z}", "'a{1z}'"); 330 CHECK_PARSE_EQ("a{12z}", "'a{12z}'"); 331 CHECK_PARSE_EQ("a{12,", "'a{12,'"); 332 CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'"); 333 CHECK_PARSE_EQ("{}", "'{}'"); 334 CHECK_PARSE_EQ("{,}", "'{,}'"); 335 CHECK_PARSE_EQ("{", "'{'"); 336 CHECK_PARSE_EQ("{z}", "'{z}'"); 337 CHECK_PARSE_EQ("{1z}", "'{1z}'"); 338 CHECK_PARSE_EQ("{12z}", "'{12z}'"); 339 CHECK_PARSE_EQ("{12,", "'{12,'"); 340 CHECK_PARSE_EQ("{12,3b", "'{12,3b'"); 341 342 CHECK_MIN_MAX("a", 1, 1); 343 CHECK_MIN_MAX("abc", 3, 3); 344 CHECK_MIN_MAX("a[bc]d", 3, 3); 345 CHECK_MIN_MAX("a|bc", 1, 2); 346 CHECK_MIN_MAX("ab|c", 1, 2); 347 CHECK_MIN_MAX("a||bc", 0, 2); 348 CHECK_MIN_MAX("|", 0, 0); 349 CHECK_MIN_MAX("(?:ab)", 2, 2); 350 CHECK_MIN_MAX("(?:ab|cde)", 2, 3); 351 CHECK_MIN_MAX("(?:ab)|cde", 2, 3); 352 CHECK_MIN_MAX("(ab)", 2, 2); 353 CHECK_MIN_MAX("(ab|cde)", 2, 3); 354 CHECK_MIN_MAX("(ab)\\1", 2, 4); 355 CHECK_MIN_MAX("(ab|cde)\\1", 2, 6); 356 CHECK_MIN_MAX("(?:ab)?", 0, 2); 357 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity); 358 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity); 359 CHECK_MIN_MAX("a?", 0, 1); 360 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity); 361 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity); 362 CHECK_MIN_MAX("a??", 0, 1); 363 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity); 364 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity); 365 CHECK_MIN_MAX("(?:a?)?", 0, 1); 366 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity); 367 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity); 368 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity); 369 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity); 370 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity); 371 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity); 372 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity); 373 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity); 374 CHECK_MIN_MAX("a{0}", 0, 0); 375 CHECK_MIN_MAX("(?:a+){0}", 0, 0); 376 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0); 377 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity); 378 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity); 379 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity); 380 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity); 381 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity); 382 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14); 383 CHECK_MIN_MAX("a\\bc", 2, 2); 384 CHECK_MIN_MAX("a\\Bc", 2, 2); 385 CHECK_MIN_MAX("a\\sc", 3, 3); 386 CHECK_MIN_MAX("a\\Sc", 3, 3); 387 CHECK_MIN_MAX("a(?=b)c", 2, 2); 388 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2); 389 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2); 390 } 391 392 393 TEST(ParserRegression) { 394 CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])"); 395 CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')"); 396 CHECK_PARSE_EQ("{", "'{'"); 397 CHECK_PARSE_EQ("a|", "(| 'a' %)"); 398 } 399 400 static void ExpectError(const char* input, 401 const char* expected) { 402 V8::Initialize(NULL); 403 v8::HandleScope scope(CcTest::isolate()); 404 Zone zone(CcTest::i_isolate()); 405 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 406 RegExpCompileData result; 407 CHECK(!v8::internal::RegExpParser::ParseRegExp( 408 &reader, false, &result, &zone)); 409 CHECK(result.tree == NULL); 410 CHECK(!result.error.is_null()); 411 SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS); 412 CHECK_EQ(expected, str.get()); 413 } 414 415 416 TEST(Errors) { 417 const char* kEndBackslash = "\\ at end of pattern"; 418 ExpectError("\\", kEndBackslash); 419 const char* kUnterminatedGroup = "Unterminated group"; 420 ExpectError("(foo", kUnterminatedGroup); 421 const char* kInvalidGroup = "Invalid group"; 422 ExpectError("(?", kInvalidGroup); 423 const char* kUnterminatedCharacterClass = "Unterminated character class"; 424 ExpectError("[", kUnterminatedCharacterClass); 425 ExpectError("[a-", kUnterminatedCharacterClass); 426 const char* kNothingToRepeat = "Nothing to repeat"; 427 ExpectError("*", kNothingToRepeat); 428 ExpectError("?", kNothingToRepeat); 429 ExpectError("+", kNothingToRepeat); 430 ExpectError("{1}", kNothingToRepeat); 431 ExpectError("{1,2}", kNothingToRepeat); 432 ExpectError("{1,}", kNothingToRepeat); 433 434 // Check that we don't allow more than kMaxCapture captures 435 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures. 436 const char* kTooManyCaptures = "Too many captures"; 437 HeapStringAllocator allocator; 438 StringStream accumulator(&allocator); 439 for (int i = 0; i <= kMaxCaptures; i++) { 440 accumulator.Add("()"); 441 } 442 SmartArrayPointer<const char> many_captures(accumulator.ToCString()); 443 ExpectError(many_captures.get(), kTooManyCaptures); 444 } 445 446 447 static bool IsDigit(uc16 c) { 448 return ('0' <= c && c <= '9'); 449 } 450 451 452 static bool NotDigit(uc16 c) { 453 return !IsDigit(c); 454 } 455 456 457 static bool IsWhiteSpaceOrLineTerminator(uc16 c) { 458 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes 459 // WhiteSpace (7.2) and LineTerminator (7.3) values. 460 return v8::internal::WhiteSpaceOrLineTerminator::Is(c); 461 } 462 463 464 static bool NotWhiteSpaceNorLineTermiantor(uc16 c) { 465 return !IsWhiteSpaceOrLineTerminator(c); 466 } 467 468 469 static bool NotWord(uc16 c) { 470 return !IsRegExpWord(c); 471 } 472 473 474 static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { 475 Zone zone(CcTest::i_isolate()); 476 ZoneList<CharacterRange>* ranges = 477 new(&zone) ZoneList<CharacterRange>(2, &zone); 478 CharacterRange::AddClassEscape(c, ranges, &zone); 479 for (unsigned i = 0; i < (1 << 16); i++) { 480 bool in_class = false; 481 for (int j = 0; !in_class && j < ranges->length(); j++) { 482 CharacterRange& range = ranges->at(j); 483 in_class = (range.from() <= i && i <= range.to()); 484 } 485 CHECK_EQ(pred(i), in_class); 486 } 487 } 488 489 490 TEST(CharacterClassEscapes) { 491 v8::internal::V8::Initialize(NULL); 492 TestCharacterClassEscapes('.', IsRegExpNewline); 493 TestCharacterClassEscapes('d', IsDigit); 494 TestCharacterClassEscapes('D', NotDigit); 495 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator); 496 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor); 497 TestCharacterClassEscapes('w', IsRegExpWord); 498 TestCharacterClassEscapes('W', NotWord); 499 } 500 501 502 static RegExpNode* Compile(const char* input, 503 bool multiline, 504 bool is_ascii, 505 Zone* zone) { 506 V8::Initialize(NULL); 507 Isolate* isolate = CcTest::i_isolate(); 508 FlatStringReader reader(isolate, CStrVector(input)); 509 RegExpCompileData compile_data; 510 if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline, 511 &compile_data, zone)) 512 return NULL; 513 Handle<String> pattern = isolate->factory()-> 514 NewStringFromUtf8(CStrVector(input)).ToHandleChecked(); 515 Handle<String> sample_subject = 516 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked(); 517 RegExpEngine::Compile(&compile_data, 518 false, 519 false, 520 multiline, 521 pattern, 522 sample_subject, 523 is_ascii, 524 zone); 525 return compile_data.node; 526 } 527 528 529 static void Execute(const char* input, 530 bool multiline, 531 bool is_ascii, 532 bool dot_output = false) { 533 v8::HandleScope scope(CcTest::isolate()); 534 Zone zone(CcTest::i_isolate()); 535 RegExpNode* node = Compile(input, multiline, is_ascii, &zone); 536 USE(node); 537 #ifdef DEBUG 538 if (dot_output) { 539 RegExpEngine::DotPrint(input, node, false); 540 } 541 #endif // DEBUG 542 } 543 544 545 class TestConfig { 546 public: 547 typedef int Key; 548 typedef int Value; 549 static const int kNoKey; 550 static int NoValue() { return 0; } 551 static inline int Compare(int a, int b) { 552 if (a < b) 553 return -1; 554 else if (a > b) 555 return 1; 556 else 557 return 0; 558 } 559 }; 560 561 562 const int TestConfig::kNoKey = 0; 563 564 565 static unsigned PseudoRandom(int i, int j) { 566 return ~(~((i * 781) ^ (j * 329))); 567 } 568 569 570 TEST(SplayTreeSimple) { 571 v8::internal::V8::Initialize(NULL); 572 static const unsigned kLimit = 1000; 573 Zone zone(CcTest::i_isolate()); 574 ZoneSplayTree<TestConfig> tree(&zone); 575 bool seen[kLimit]; 576 for (unsigned i = 0; i < kLimit; i++) seen[i] = false; 577 #define CHECK_MAPS_EQUAL() do { \ 578 for (unsigned k = 0; k < kLimit; k++) \ 579 CHECK_EQ(seen[k], tree.Find(k, &loc)); \ 580 } while (false) 581 for (int i = 0; i < 50; i++) { 582 for (int j = 0; j < 50; j++) { 583 unsigned next = PseudoRandom(i, j) % kLimit; 584 if (seen[next]) { 585 // We've already seen this one. Check the value and remove 586 // it. 587 ZoneSplayTree<TestConfig>::Locator loc; 588 CHECK(tree.Find(next, &loc)); 589 CHECK_EQ(next, loc.key()); 590 CHECK_EQ(3 * next, loc.value()); 591 tree.Remove(next); 592 seen[next] = false; 593 CHECK_MAPS_EQUAL(); 594 } else { 595 // Check that it wasn't there already and then add it. 596 ZoneSplayTree<TestConfig>::Locator loc; 597 CHECK(!tree.Find(next, &loc)); 598 CHECK(tree.Insert(next, &loc)); 599 CHECK_EQ(next, loc.key()); 600 loc.set_value(3 * next); 601 seen[next] = true; 602 CHECK_MAPS_EQUAL(); 603 } 604 int val = PseudoRandom(j, i) % kLimit; 605 if (seen[val]) { 606 ZoneSplayTree<TestConfig>::Locator loc; 607 CHECK(tree.FindGreatestLessThan(val, &loc)); 608 CHECK_EQ(loc.key(), val); 609 break; 610 } 611 val = PseudoRandom(i + j, i - j) % kLimit; 612 if (seen[val]) { 613 ZoneSplayTree<TestConfig>::Locator loc; 614 CHECK(tree.FindLeastGreaterThan(val, &loc)); 615 CHECK_EQ(loc.key(), val); 616 break; 617 } 618 } 619 } 620 } 621 622 623 TEST(DispatchTableConstruction) { 624 v8::internal::V8::Initialize(NULL); 625 // Initialize test data. 626 static const int kLimit = 1000; 627 static const int kRangeCount = 8; 628 static const int kRangeSize = 16; 629 uc16 ranges[kRangeCount][2 * kRangeSize]; 630 for (int i = 0; i < kRangeCount; i++) { 631 Vector<uc16> range(ranges[i], 2 * kRangeSize); 632 for (int j = 0; j < 2 * kRangeSize; j++) { 633 range[j] = PseudoRandom(i + 25, j + 87) % kLimit; 634 } 635 range.Sort(); 636 for (int j = 1; j < 2 * kRangeSize; j++) { 637 CHECK(range[j-1] <= range[j]); 638 } 639 } 640 // Enter test data into dispatch table. 641 Zone zone(CcTest::i_isolate()); 642 DispatchTable table(&zone); 643 for (int i = 0; i < kRangeCount; i++) { 644 uc16* range = ranges[i]; 645 for (int j = 0; j < 2 * kRangeSize; j += 2) 646 table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone); 647 } 648 // Check that the table looks as we would expect 649 for (int p = 0; p < kLimit; p++) { 650 OutSet* outs = table.Get(p); 651 for (int j = 0; j < kRangeCount; j++) { 652 uc16* range = ranges[j]; 653 bool is_on = false; 654 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) 655 is_on = (range[k] <= p && p <= range[k + 1]); 656 CHECK_EQ(is_on, outs->Get(j)); 657 } 658 } 659 } 660 661 662 // Test of debug-only syntax. 663 #ifdef DEBUG 664 665 TEST(ParsePossessiveRepetition) { 666 bool old_flag_value = FLAG_regexp_possessive_quantifier; 667 668 // Enable possessive quantifier syntax. 669 FLAG_regexp_possessive_quantifier = true; 670 671 CHECK_PARSE_EQ("a*+", "(# 0 - p 'a')"); 672 CHECK_PARSE_EQ("a++", "(# 1 - p 'a')"); 673 CHECK_PARSE_EQ("a?+", "(# 0 1 p 'a')"); 674 CHECK_PARSE_EQ("a{10,20}+", "(# 10 20 p 'a')"); 675 CHECK_PARSE_EQ("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')"); 676 677 // Disable possessive quantifier syntax. 678 FLAG_regexp_possessive_quantifier = false; 679 680 CHECK_PARSE_ERROR("a*+"); 681 CHECK_PARSE_ERROR("a++"); 682 CHECK_PARSE_ERROR("a?+"); 683 CHECK_PARSE_ERROR("a{10,20}+"); 684 CHECK_PARSE_ERROR("a{10,20}+b"); 685 686 FLAG_regexp_possessive_quantifier = old_flag_value; 687 } 688 689 #endif 690 691 // Tests of interpreter. 692 693 694 #ifndef V8_INTERPRETED_REGEXP 695 696 #if V8_TARGET_ARCH_IA32 697 typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler; 698 #elif V8_TARGET_ARCH_X64 699 typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler; 700 #elif V8_TARGET_ARCH_ARM 701 typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler; 702 #elif V8_TARGET_ARCH_ARM64 703 typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler; 704 #elif V8_TARGET_ARCH_MIPS 705 typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler; 706 #elif V8_TARGET_ARCH_X87 707 typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler; 708 #endif 709 710 class ContextInitializer { 711 public: 712 ContextInitializer() 713 : scope_(CcTest::isolate()), 714 env_(v8::Context::New(CcTest::isolate())) { 715 env_->Enter(); 716 } 717 ~ContextInitializer() { 718 env_->Exit(); 719 } 720 private: 721 v8::HandleScope scope_; 722 v8::Handle<v8::Context> env_; 723 }; 724 725 726 static ArchRegExpMacroAssembler::Result Execute(Code* code, 727 String* input, 728 int start_offset, 729 const byte* input_start, 730 const byte* input_end, 731 int* captures) { 732 return NativeRegExpMacroAssembler::Execute( 733 code, 734 input, 735 start_offset, 736 input_start, 737 input_end, 738 captures, 739 0, 740 CcTest::i_isolate()); 741 } 742 743 744 TEST(MacroAssemblerNativeSuccess) { 745 v8::V8::Initialize(); 746 ContextInitializer initializer; 747 Isolate* isolate = CcTest::i_isolate(); 748 Factory* factory = isolate->factory(); 749 Zone zone(isolate); 750 751 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 752 753 m.Succeed(); 754 755 Handle<String> source = factory->NewStringFromStaticAscii(""); 756 Handle<Object> code_object = m.GetCode(source); 757 Handle<Code> code = Handle<Code>::cast(code_object); 758 759 int captures[4] = {42, 37, 87, 117}; 760 Handle<String> input = factory->NewStringFromStaticAscii("foofoo"); 761 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 762 const byte* start_adr = 763 reinterpret_cast<const byte*>(seq_input->GetCharsAddress()); 764 765 NativeRegExpMacroAssembler::Result result = 766 Execute(*code, 767 *input, 768 0, 769 start_adr, 770 start_adr + seq_input->length(), 771 captures); 772 773 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 774 CHECK_EQ(-1, captures[0]); 775 CHECK_EQ(-1, captures[1]); 776 CHECK_EQ(-1, captures[2]); 777 CHECK_EQ(-1, captures[3]); 778 } 779 780 781 TEST(MacroAssemblerNativeSimple) { 782 v8::V8::Initialize(); 783 ContextInitializer initializer; 784 Isolate* isolate = CcTest::i_isolate(); 785 Factory* factory = isolate->factory(); 786 Zone zone(isolate); 787 788 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 789 790 Label fail, backtrack; 791 m.PushBacktrack(&fail); 792 m.CheckNotAtStart(NULL); 793 m.LoadCurrentCharacter(2, NULL); 794 m.CheckNotCharacter('o', NULL); 795 m.LoadCurrentCharacter(1, NULL, false); 796 m.CheckNotCharacter('o', NULL); 797 m.LoadCurrentCharacter(0, NULL, false); 798 m.CheckNotCharacter('f', NULL); 799 m.WriteCurrentPositionToRegister(0, 0); 800 m.WriteCurrentPositionToRegister(1, 3); 801 m.AdvanceCurrentPosition(3); 802 m.PushBacktrack(&backtrack); 803 m.Succeed(); 804 m.Bind(&backtrack); 805 m.Backtrack(); 806 m.Bind(&fail); 807 m.Fail(); 808 809 Handle<String> source = factory->NewStringFromStaticAscii("^foo"); 810 Handle<Object> code_object = m.GetCode(source); 811 Handle<Code> code = Handle<Code>::cast(code_object); 812 813 int captures[4] = {42, 37, 87, 117}; 814 Handle<String> input = factory->NewStringFromStaticAscii("foofoo"); 815 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 816 Address start_adr = seq_input->GetCharsAddress(); 817 818 NativeRegExpMacroAssembler::Result result = 819 Execute(*code, 820 *input, 821 0, 822 start_adr, 823 start_adr + input->length(), 824 captures); 825 826 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 827 CHECK_EQ(0, captures[0]); 828 CHECK_EQ(3, captures[1]); 829 CHECK_EQ(-1, captures[2]); 830 CHECK_EQ(-1, captures[3]); 831 832 input = factory->NewStringFromStaticAscii("barbarbar"); 833 seq_input = Handle<SeqOneByteString>::cast(input); 834 start_adr = seq_input->GetCharsAddress(); 835 836 result = Execute(*code, 837 *input, 838 0, 839 start_adr, 840 start_adr + input->length(), 841 captures); 842 843 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 844 } 845 846 847 TEST(MacroAssemblerNativeSimpleUC16) { 848 v8::V8::Initialize(); 849 ContextInitializer initializer; 850 Isolate* isolate = CcTest::i_isolate(); 851 Factory* factory = isolate->factory(); 852 Zone zone(isolate); 853 854 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone); 855 856 Label fail, backtrack; 857 m.PushBacktrack(&fail); 858 m.CheckNotAtStart(NULL); 859 m.LoadCurrentCharacter(2, NULL); 860 m.CheckNotCharacter('o', NULL); 861 m.LoadCurrentCharacter(1, NULL, false); 862 m.CheckNotCharacter('o', NULL); 863 m.LoadCurrentCharacter(0, NULL, false); 864 m.CheckNotCharacter('f', NULL); 865 m.WriteCurrentPositionToRegister(0, 0); 866 m.WriteCurrentPositionToRegister(1, 3); 867 m.AdvanceCurrentPosition(3); 868 m.PushBacktrack(&backtrack); 869 m.Succeed(); 870 m.Bind(&backtrack); 871 m.Backtrack(); 872 m.Bind(&fail); 873 m.Fail(); 874 875 Handle<String> source = factory->NewStringFromStaticAscii("^foo"); 876 Handle<Object> code_object = m.GetCode(source); 877 Handle<Code> code = Handle<Code>::cast(code_object); 878 879 int captures[4] = {42, 37, 87, 117}; 880 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', 881 static_cast<uc16>(0x2603)}; 882 Handle<String> input = factory->NewStringFromTwoByte( 883 Vector<const uc16>(input_data, 6)).ToHandleChecked(); 884 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 885 Address start_adr = seq_input->GetCharsAddress(); 886 887 NativeRegExpMacroAssembler::Result result = 888 Execute(*code, 889 *input, 890 0, 891 start_adr, 892 start_adr + input->length(), 893 captures); 894 895 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 896 CHECK_EQ(0, captures[0]); 897 CHECK_EQ(3, captures[1]); 898 CHECK_EQ(-1, captures[2]); 899 CHECK_EQ(-1, captures[3]); 900 901 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', 902 static_cast<uc16>(0x2603)}; 903 input = factory->NewStringFromTwoByte( 904 Vector<const uc16>(input_data2, 9)).ToHandleChecked(); 905 seq_input = Handle<SeqTwoByteString>::cast(input); 906 start_adr = seq_input->GetCharsAddress(); 907 908 result = Execute(*code, 909 *input, 910 0, 911 start_adr, 912 start_adr + input->length() * 2, 913 captures); 914 915 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 916 } 917 918 919 TEST(MacroAssemblerNativeBacktrack) { 920 v8::V8::Initialize(); 921 ContextInitializer initializer; 922 Isolate* isolate = CcTest::i_isolate(); 923 Factory* factory = isolate->factory(); 924 Zone zone(isolate); 925 926 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone); 927 928 Label fail; 929 Label backtrack; 930 m.LoadCurrentCharacter(10, &fail); 931 m.Succeed(); 932 m.Bind(&fail); 933 m.PushBacktrack(&backtrack); 934 m.LoadCurrentCharacter(10, NULL); 935 m.Succeed(); 936 m.Bind(&backtrack); 937 m.Fail(); 938 939 Handle<String> source = factory->NewStringFromStaticAscii(".........."); 940 Handle<Object> code_object = m.GetCode(source); 941 Handle<Code> code = Handle<Code>::cast(code_object); 942 943 Handle<String> input = factory->NewStringFromStaticAscii("foofoo"); 944 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 945 Address start_adr = seq_input->GetCharsAddress(); 946 947 NativeRegExpMacroAssembler::Result result = 948 Execute(*code, 949 *input, 950 0, 951 start_adr, 952 start_adr + input->length(), 953 NULL); 954 955 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 956 } 957 958 959 TEST(MacroAssemblerNativeBackReferenceASCII) { 960 v8::V8::Initialize(); 961 ContextInitializer initializer; 962 Isolate* isolate = CcTest::i_isolate(); 963 Factory* factory = isolate->factory(); 964 Zone zone(isolate); 965 966 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 967 968 m.WriteCurrentPositionToRegister(0, 0); 969 m.AdvanceCurrentPosition(2); 970 m.WriteCurrentPositionToRegister(1, 0); 971 Label nomatch; 972 m.CheckNotBackReference(0, &nomatch); 973 m.Fail(); 974 m.Bind(&nomatch); 975 m.AdvanceCurrentPosition(2); 976 Label missing_match; 977 m.CheckNotBackReference(0, &missing_match); 978 m.WriteCurrentPositionToRegister(2, 0); 979 m.Succeed(); 980 m.Bind(&missing_match); 981 m.Fail(); 982 983 Handle<String> source = factory->NewStringFromStaticAscii("^(..)..\1"); 984 Handle<Object> code_object = m.GetCode(source); 985 Handle<Code> code = Handle<Code>::cast(code_object); 986 987 Handle<String> input = factory->NewStringFromStaticAscii("fooofo"); 988 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 989 Address start_adr = seq_input->GetCharsAddress(); 990 991 int output[4]; 992 NativeRegExpMacroAssembler::Result result = 993 Execute(*code, 994 *input, 995 0, 996 start_adr, 997 start_adr + input->length(), 998 output); 999 1000 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1001 CHECK_EQ(0, output[0]); 1002 CHECK_EQ(2, output[1]); 1003 CHECK_EQ(6, output[2]); 1004 CHECK_EQ(-1, output[3]); 1005 } 1006 1007 1008 TEST(MacroAssemblerNativeBackReferenceUC16) { 1009 v8::V8::Initialize(); 1010 ContextInitializer initializer; 1011 Isolate* isolate = CcTest::i_isolate(); 1012 Factory* factory = isolate->factory(); 1013 Zone zone(isolate); 1014 1015 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone); 1016 1017 m.WriteCurrentPositionToRegister(0, 0); 1018 m.AdvanceCurrentPosition(2); 1019 m.WriteCurrentPositionToRegister(1, 0); 1020 Label nomatch; 1021 m.CheckNotBackReference(0, &nomatch); 1022 m.Fail(); 1023 m.Bind(&nomatch); 1024 m.AdvanceCurrentPosition(2); 1025 Label missing_match; 1026 m.CheckNotBackReference(0, &missing_match); 1027 m.WriteCurrentPositionToRegister(2, 0); 1028 m.Succeed(); 1029 m.Bind(&missing_match); 1030 m.Fail(); 1031 1032 Handle<String> source = factory->NewStringFromStaticAscii("^(..)..\1"); 1033 Handle<Object> code_object = m.GetCode(source); 1034 Handle<Code> code = Handle<Code>::cast(code_object); 1035 1036 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028}; 1037 Handle<String> input = factory->NewStringFromTwoByte( 1038 Vector<const uc16>(input_data, 6)).ToHandleChecked(); 1039 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 1040 Address start_adr = seq_input->GetCharsAddress(); 1041 1042 int output[4]; 1043 NativeRegExpMacroAssembler::Result result = 1044 Execute(*code, 1045 *input, 1046 0, 1047 start_adr, 1048 start_adr + input->length() * 2, 1049 output); 1050 1051 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1052 CHECK_EQ(0, output[0]); 1053 CHECK_EQ(2, output[1]); 1054 CHECK_EQ(6, output[2]); 1055 CHECK_EQ(-1, output[3]); 1056 } 1057 1058 1059 1060 TEST(MacroAssemblernativeAtStart) { 1061 v8::V8::Initialize(); 1062 ContextInitializer initializer; 1063 Isolate* isolate = CcTest::i_isolate(); 1064 Factory* factory = isolate->factory(); 1065 Zone zone(isolate); 1066 1067 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone); 1068 1069 Label not_at_start, newline, fail; 1070 m.CheckNotAtStart(¬_at_start); 1071 // Check that prevchar = '\n' and current = 'f'. 1072 m.CheckCharacter('\n', &newline); 1073 m.Bind(&fail); 1074 m.Fail(); 1075 m.Bind(&newline); 1076 m.LoadCurrentCharacter(0, &fail); 1077 m.CheckNotCharacter('f', &fail); 1078 m.Succeed(); 1079 1080 m.Bind(¬_at_start); 1081 // Check that prevchar = 'o' and current = 'b'. 1082 Label prevo; 1083 m.CheckCharacter('o', &prevo); 1084 m.Fail(); 1085 m.Bind(&prevo); 1086 m.LoadCurrentCharacter(0, &fail); 1087 m.CheckNotCharacter('b', &fail); 1088 m.Succeed(); 1089 1090 Handle<String> source = factory->NewStringFromStaticAscii("(^f|ob)"); 1091 Handle<Object> code_object = m.GetCode(source); 1092 Handle<Code> code = Handle<Code>::cast(code_object); 1093 1094 Handle<String> input = factory->NewStringFromStaticAscii("foobar"); 1095 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1096 Address start_adr = seq_input->GetCharsAddress(); 1097 1098 NativeRegExpMacroAssembler::Result result = 1099 Execute(*code, 1100 *input, 1101 0, 1102 start_adr, 1103 start_adr + input->length(), 1104 NULL); 1105 1106 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1107 1108 result = Execute(*code, 1109 *input, 1110 3, 1111 start_adr + 3, 1112 start_adr + input->length(), 1113 NULL); 1114 1115 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1116 } 1117 1118 1119 TEST(MacroAssemblerNativeBackRefNoCase) { 1120 v8::V8::Initialize(); 1121 ContextInitializer initializer; 1122 Isolate* isolate = CcTest::i_isolate(); 1123 Factory* factory = isolate->factory(); 1124 Zone zone(isolate); 1125 1126 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 1127 1128 Label fail, succ; 1129 1130 m.WriteCurrentPositionToRegister(0, 0); 1131 m.WriteCurrentPositionToRegister(2, 0); 1132 m.AdvanceCurrentPosition(3); 1133 m.WriteCurrentPositionToRegister(3, 0); 1134 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC". 1135 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC". 1136 Label expected_fail; 1137 m.CheckNotBackReferenceIgnoreCase(2, &expected_fail); 1138 m.Bind(&fail); 1139 m.Fail(); 1140 1141 m.Bind(&expected_fail); 1142 m.AdvanceCurrentPosition(3); // Skip "xYz" 1143 m.CheckNotBackReferenceIgnoreCase(2, &succ); 1144 m.Fail(); 1145 1146 m.Bind(&succ); 1147 m.WriteCurrentPositionToRegister(1, 0); 1148 m.Succeed(); 1149 1150 Handle<String> source = 1151 factory->NewStringFromStaticAscii("^(abc)\1\1(?!\1)...(?!\1)"); 1152 Handle<Object> code_object = m.GetCode(source); 1153 Handle<Code> code = Handle<Code>::cast(code_object); 1154 1155 Handle<String> input = 1156 factory->NewStringFromStaticAscii("aBcAbCABCxYzab"); 1157 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1158 Address start_adr = seq_input->GetCharsAddress(); 1159 1160 int output[4]; 1161 NativeRegExpMacroAssembler::Result result = 1162 Execute(*code, 1163 *input, 1164 0, 1165 start_adr, 1166 start_adr + input->length(), 1167 output); 1168 1169 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1170 CHECK_EQ(0, output[0]); 1171 CHECK_EQ(12, output[1]); 1172 CHECK_EQ(0, output[2]); 1173 CHECK_EQ(3, output[3]); 1174 } 1175 1176 1177 1178 TEST(MacroAssemblerNativeRegisters) { 1179 v8::V8::Initialize(); 1180 ContextInitializer initializer; 1181 Isolate* isolate = CcTest::i_isolate(); 1182 Factory* factory = isolate->factory(); 1183 Zone zone(isolate); 1184 1185 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6, &zone); 1186 1187 uc16 foo_chars[3] = {'f', 'o', 'o'}; 1188 Vector<const uc16> foo(foo_chars, 3); 1189 1190 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt }; 1191 Label fail; 1192 Label backtrack; 1193 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0] 1194 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1195 m.PushBacktrack(&backtrack); 1196 m.WriteStackPointerToRegister(sp); 1197 // Fill stack and registers 1198 m.AdvanceCurrentPosition(2); 1199 m.WriteCurrentPositionToRegister(out1, 0); 1200 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1201 m.PushBacktrack(&fail); 1202 // Drop backtrack stack frames. 1203 m.ReadStackPointerFromRegister(sp); 1204 // And take the first backtrack (to &backtrack) 1205 m.Backtrack(); 1206 1207 m.PushCurrentPosition(); 1208 m.AdvanceCurrentPosition(2); 1209 m.PopCurrentPosition(); 1210 1211 m.Bind(&backtrack); 1212 m.PopRegister(out1); 1213 m.ReadCurrentPositionFromRegister(out1); 1214 m.AdvanceCurrentPosition(3); 1215 m.WriteCurrentPositionToRegister(out2, 0); // [0,3] 1216 1217 Label loop; 1218 m.SetRegister(loop_cnt, 0); // loop counter 1219 m.Bind(&loop); 1220 m.AdvanceRegister(loop_cnt, 1); 1221 m.AdvanceCurrentPosition(1); 1222 m.IfRegisterLT(loop_cnt, 3, &loop); 1223 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6] 1224 1225 Label loop2; 1226 m.SetRegister(loop_cnt, 2); // loop counter 1227 m.Bind(&loop2); 1228 m.AdvanceRegister(loop_cnt, -1); 1229 m.AdvanceCurrentPosition(1); 1230 m.IfRegisterGE(loop_cnt, 0, &loop2); 1231 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9] 1232 1233 Label loop3; 1234 Label exit_loop3; 1235 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1236 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1237 m.ReadCurrentPositionFromRegister(out3); 1238 m.Bind(&loop3); 1239 m.AdvanceCurrentPosition(1); 1240 m.CheckGreedyLoop(&exit_loop3); 1241 m.GoTo(&loop3); 1242 m.Bind(&exit_loop3); 1243 m.PopCurrentPosition(); 1244 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1] 1245 1246 m.Succeed(); 1247 1248 m.Bind(&fail); 1249 m.Fail(); 1250 1251 Handle<String> source = 1252 factory->NewStringFromStaticAscii("<loop test>"); 1253 Handle<Object> code_object = m.GetCode(source); 1254 Handle<Code> code = Handle<Code>::cast(code_object); 1255 1256 // String long enough for test (content doesn't matter). 1257 Handle<String> input = 1258 factory->NewStringFromStaticAscii("foofoofoofoofoo"); 1259 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1260 Address start_adr = seq_input->GetCharsAddress(); 1261 1262 int output[6]; 1263 NativeRegExpMacroAssembler::Result result = 1264 Execute(*code, 1265 *input, 1266 0, 1267 start_adr, 1268 start_adr + input->length(), 1269 output); 1270 1271 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1272 CHECK_EQ(0, output[0]); 1273 CHECK_EQ(3, output[1]); 1274 CHECK_EQ(6, output[2]); 1275 CHECK_EQ(9, output[3]); 1276 CHECK_EQ(9, output[4]); 1277 CHECK_EQ(-1, output[5]); 1278 } 1279 1280 1281 TEST(MacroAssemblerStackOverflow) { 1282 v8::V8::Initialize(); 1283 ContextInitializer initializer; 1284 Isolate* isolate = CcTest::i_isolate(); 1285 Factory* factory = isolate->factory(); 1286 Zone zone(isolate); 1287 1288 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone); 1289 1290 Label loop; 1291 m.Bind(&loop); 1292 m.PushBacktrack(&loop); 1293 m.GoTo(&loop); 1294 1295 Handle<String> source = 1296 factory->NewStringFromStaticAscii("<stack overflow test>"); 1297 Handle<Object> code_object = m.GetCode(source); 1298 Handle<Code> code = Handle<Code>::cast(code_object); 1299 1300 // String long enough for test (content doesn't matter). 1301 Handle<String> input = 1302 factory->NewStringFromStaticAscii("dummy"); 1303 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1304 Address start_adr = seq_input->GetCharsAddress(); 1305 1306 NativeRegExpMacroAssembler::Result result = 1307 Execute(*code, 1308 *input, 1309 0, 1310 start_adr, 1311 start_adr + input->length(), 1312 NULL); 1313 1314 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result); 1315 CHECK(isolate->has_pending_exception()); 1316 isolate->clear_pending_exception(); 1317 } 1318 1319 1320 TEST(MacroAssemblerNativeLotsOfRegisters) { 1321 v8::V8::Initialize(); 1322 ContextInitializer initializer; 1323 Isolate* isolate = CcTest::i_isolate(); 1324 Factory* factory = isolate->factory(); 1325 Zone zone(isolate); 1326 1327 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2, &zone); 1328 1329 // At least 2048, to ensure the allocated space for registers 1330 // span one full page. 1331 const int large_number = 8000; 1332 m.WriteCurrentPositionToRegister(large_number, 42); 1333 m.WriteCurrentPositionToRegister(0, 0); 1334 m.WriteCurrentPositionToRegister(1, 1); 1335 Label done; 1336 m.CheckNotBackReference(0, &done); // Performs a system-stack push. 1337 m.Bind(&done); 1338 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck); 1339 m.PopRegister(1); 1340 m.Succeed(); 1341 1342 Handle<String> source = 1343 factory->NewStringFromStaticAscii("<huge register space test>"); 1344 Handle<Object> code_object = m.GetCode(source); 1345 Handle<Code> code = Handle<Code>::cast(code_object); 1346 1347 // String long enough for test (content doesn't matter). 1348 Handle<String> input = 1349 factory->NewStringFromStaticAscii("sample text"); 1350 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1351 Address start_adr = seq_input->GetCharsAddress(); 1352 1353 int captures[2]; 1354 NativeRegExpMacroAssembler::Result result = 1355 Execute(*code, 1356 *input, 1357 0, 1358 start_adr, 1359 start_adr + input->length(), 1360 captures); 1361 1362 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1363 CHECK_EQ(0, captures[0]); 1364 CHECK_EQ(42, captures[1]); 1365 1366 isolate->clear_pending_exception(); 1367 } 1368 1369 #else // V8_INTERPRETED_REGEXP 1370 1371 TEST(MacroAssembler) { 1372 V8::Initialize(NULL); 1373 byte codes[1024]; 1374 Zone zone(CcTest::i_isolate()); 1375 RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone); 1376 // ^f(o)o. 1377 Label start, fail, backtrack; 1378 1379 m.SetRegister(4, 42); 1380 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck); 1381 m.AdvanceRegister(4, 42); 1382 m.GoTo(&start); 1383 m.Fail(); 1384 m.Bind(&start); 1385 m.PushBacktrack(&fail); 1386 m.CheckNotAtStart(NULL); 1387 m.LoadCurrentCharacter(0, NULL); 1388 m.CheckNotCharacter('f', NULL); 1389 m.LoadCurrentCharacter(1, NULL); 1390 m.CheckNotCharacter('o', NULL); 1391 m.LoadCurrentCharacter(2, NULL); 1392 m.CheckNotCharacter('o', NULL); 1393 m.WriteCurrentPositionToRegister(0, 0); 1394 m.WriteCurrentPositionToRegister(1, 3); 1395 m.WriteCurrentPositionToRegister(2, 1); 1396 m.WriteCurrentPositionToRegister(3, 2); 1397 m.AdvanceCurrentPosition(3); 1398 m.PushBacktrack(&backtrack); 1399 m.Succeed(); 1400 m.Bind(&backtrack); 1401 m.ClearRegisters(2, 3); 1402 m.Backtrack(); 1403 m.Bind(&fail); 1404 m.PopRegister(0); 1405 m.Fail(); 1406 1407 Isolate* isolate = CcTest::i_isolate(); 1408 Factory* factory = isolate->factory(); 1409 HandleScope scope(isolate); 1410 1411 Handle<String> source = factory->NewStringFromStaticAscii("^f(o)o"); 1412 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source)); 1413 int captures[5]; 1414 1415 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'}; 1416 Handle<String> f1_16 = factory->NewStringFromTwoByte( 1417 Vector<const uc16>(str1, 6)).ToHandleChecked(); 1418 1419 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0)); 1420 CHECK_EQ(0, captures[0]); 1421 CHECK_EQ(3, captures[1]); 1422 CHECK_EQ(1, captures[2]); 1423 CHECK_EQ(2, captures[3]); 1424 CHECK_EQ(84, captures[4]); 1425 1426 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'}; 1427 Handle<String> f2_16 = factory->NewStringFromTwoByte( 1428 Vector<const uc16>(str2, 6)).ToHandleChecked(); 1429 1430 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0)); 1431 CHECK_EQ(42, captures[0]); 1432 } 1433 1434 #endif // V8_INTERPRETED_REGEXP 1435 1436 1437 TEST(AddInverseToTable) { 1438 v8::internal::V8::Initialize(NULL); 1439 static const int kLimit = 1000; 1440 static const int kRangeCount = 16; 1441 for (int t = 0; t < 10; t++) { 1442 Zone zone(CcTest::i_isolate()); 1443 ZoneList<CharacterRange>* ranges = 1444 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone); 1445 for (int i = 0; i < kRangeCount; i++) { 1446 int from = PseudoRandom(t + 87, i + 25) % kLimit; 1447 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); 1448 if (to > kLimit) to = kLimit; 1449 ranges->Add(CharacterRange(from, to), &zone); 1450 } 1451 DispatchTable table(&zone); 1452 DispatchTableConstructor cons(&table, false, &zone); 1453 cons.set_choice_index(0); 1454 cons.AddInverse(ranges); 1455 for (int i = 0; i < kLimit; i++) { 1456 bool is_on = false; 1457 for (int j = 0; !is_on && j < kRangeCount; j++) 1458 is_on = ranges->at(j).Contains(i); 1459 OutSet* set = table.Get(i); 1460 CHECK_EQ(is_on, set->Get(0) == false); 1461 } 1462 } 1463 Zone zone(CcTest::i_isolate()); 1464 ZoneList<CharacterRange>* ranges = 1465 new(&zone) ZoneList<CharacterRange>(1, &zone); 1466 ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone); 1467 DispatchTable table(&zone); 1468 DispatchTableConstructor cons(&table, false, &zone); 1469 cons.set_choice_index(0); 1470 cons.AddInverse(ranges); 1471 CHECK(!table.Get(0xFFFE)->Get(0)); 1472 CHECK(table.Get(0xFFFF)->Get(0)); 1473 } 1474 1475 1476 static uc32 canonicalize(uc32 c) { 1477 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; 1478 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); 1479 if (count == 0) { 1480 return c; 1481 } else { 1482 CHECK_EQ(1, count); 1483 return canon[0]; 1484 } 1485 } 1486 1487 1488 TEST(LatinCanonicalize) { 1489 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1490 for (char lower = 'a'; lower <= 'z'; lower++) { 1491 char upper = lower + ('A' - 'a'); 1492 CHECK_EQ(canonicalize(lower), canonicalize(upper)); 1493 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1494 int length = un_canonicalize.get(lower, '\0', uncanon); 1495 CHECK_EQ(2, length); 1496 CHECK_EQ(upper, uncanon[0]); 1497 CHECK_EQ(lower, uncanon[1]); 1498 } 1499 for (uc32 c = 128; c < (1 << 21); c++) 1500 CHECK_GE(canonicalize(c), 128); 1501 unibrow::Mapping<unibrow::ToUppercase> to_upper; 1502 // Canonicalization is only defined for the Basic Multilingual Plane. 1503 for (uc32 c = 0; c < (1 << 16); c++) { 1504 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth]; 1505 int length = to_upper.get(c, '\0', upper); 1506 if (length == 0) { 1507 length = 1; 1508 upper[0] = c; 1509 } 1510 uc32 u = upper[0]; 1511 if (length > 1 || (c >= 128 && u < 128)) 1512 u = c; 1513 CHECK_EQ(u, canonicalize(c)); 1514 } 1515 } 1516 1517 1518 static uc32 CanonRangeEnd(uc32 c) { 1519 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth]; 1520 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL); 1521 if (count == 0) { 1522 return c; 1523 } else { 1524 CHECK_EQ(1, count); 1525 return canon[0]; 1526 } 1527 } 1528 1529 1530 TEST(RangeCanonicalization) { 1531 // Check that we arrive at the same result when using the basic 1532 // range canonicalization primitives as when using immediate 1533 // canonicalization. 1534 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1535 int block_start = 0; 1536 while (block_start <= 0xFFFF) { 1537 uc32 block_end = CanonRangeEnd(block_start); 1538 unsigned block_length = block_end - block_start + 1; 1539 if (block_length > 1) { 1540 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1541 int first_length = un_canonicalize.get(block_start, '\0', first); 1542 for (unsigned i = 1; i < block_length; i++) { 1543 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1544 int succ_length = un_canonicalize.get(block_start + i, '\0', succ); 1545 CHECK_EQ(first_length, succ_length); 1546 for (int j = 0; j < succ_length; j++) { 1547 int calc = first[j] + i; 1548 int found = succ[j]; 1549 CHECK_EQ(calc, found); 1550 } 1551 } 1552 } 1553 block_start = block_start + block_length; 1554 } 1555 } 1556 1557 1558 TEST(UncanonicalizeEquivalence) { 1559 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1560 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1561 for (int i = 0; i < (1 << 16); i++) { 1562 int length = un_canonicalize.get(i, '\0', chars); 1563 for (int j = 0; j < length; j++) { 1564 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1565 int length2 = un_canonicalize.get(chars[j], '\0', chars2); 1566 CHECK_EQ(length, length2); 1567 for (int k = 0; k < length; k++) 1568 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k])); 1569 } 1570 } 1571 } 1572 1573 1574 static void TestRangeCaseIndependence(CharacterRange input, 1575 Vector<CharacterRange> expected) { 1576 Zone zone(CcTest::i_isolate()); 1577 int count = expected.length(); 1578 ZoneList<CharacterRange>* list = 1579 new(&zone) ZoneList<CharacterRange>(count, &zone); 1580 input.AddCaseEquivalents(list, false, &zone); 1581 CHECK_EQ(count, list->length()); 1582 for (int i = 0; i < list->length(); i++) { 1583 CHECK_EQ(expected[i].from(), list->at(i).from()); 1584 CHECK_EQ(expected[i].to(), list->at(i).to()); 1585 } 1586 } 1587 1588 1589 static void TestSimpleRangeCaseIndependence(CharacterRange input, 1590 CharacterRange expected) { 1591 EmbeddedVector<CharacterRange, 1> vector; 1592 vector[0] = expected; 1593 TestRangeCaseIndependence(input, vector); 1594 } 1595 1596 1597 TEST(CharacterRangeCaseIndependence) { 1598 v8::internal::V8::Initialize(NULL); 1599 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'), 1600 CharacterRange::Singleton('A')); 1601 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'), 1602 CharacterRange::Singleton('Z')); 1603 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'), 1604 CharacterRange('A', 'Z')); 1605 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'), 1606 CharacterRange('C', 'F')); 1607 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'), 1608 CharacterRange('A', 'B')); 1609 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'), 1610 CharacterRange('Y', 'Z')); 1611 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1), 1612 CharacterRange('A', 'Z')); 1613 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'), 1614 CharacterRange('a', 'z')); 1615 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'), 1616 CharacterRange('c', 'f')); 1617 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1), 1618 CharacterRange('a', 'z')); 1619 // Here we need to add [l-z] to complete the case independence of 1620 // [A-Za-z] but we expect [a-z] to be added since we always add a 1621 // whole block at a time. 1622 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'), 1623 CharacterRange('a', 'z')); 1624 } 1625 1626 1627 static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) { 1628 if (ranges == NULL) 1629 return false; 1630 for (int i = 0; i < ranges->length(); i++) { 1631 CharacterRange range = ranges->at(i); 1632 if (range.from() <= c && c <= range.to()) 1633 return true; 1634 } 1635 return false; 1636 } 1637 1638 1639 TEST(CharClassDifference) { 1640 v8::internal::V8::Initialize(NULL); 1641 Zone zone(CcTest::i_isolate()); 1642 ZoneList<CharacterRange>* base = 1643 new(&zone) ZoneList<CharacterRange>(1, &zone); 1644 base->Add(CharacterRange::Everything(), &zone); 1645 Vector<const int> overlay = CharacterRange::GetWordBounds(); 1646 ZoneList<CharacterRange>* included = NULL; 1647 ZoneList<CharacterRange>* excluded = NULL; 1648 CharacterRange::Split(base, overlay, &included, &excluded, &zone); 1649 for (int i = 0; i < (1 << 16); i++) { 1650 bool in_base = InClass(i, base); 1651 if (in_base) { 1652 bool in_overlay = false; 1653 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) { 1654 if (overlay[j] <= i && i < overlay[j+1]) 1655 in_overlay = true; 1656 } 1657 CHECK_EQ(in_overlay, InClass(i, included)); 1658 CHECK_EQ(!in_overlay, InClass(i, excluded)); 1659 } else { 1660 CHECK(!InClass(i, included)); 1661 CHECK(!InClass(i, excluded)); 1662 } 1663 } 1664 } 1665 1666 1667 TEST(CanonicalizeCharacterSets) { 1668 v8::internal::V8::Initialize(NULL); 1669 Zone zone(CcTest::i_isolate()); 1670 ZoneList<CharacterRange>* list = 1671 new(&zone) ZoneList<CharacterRange>(4, &zone); 1672 CharacterSet set(list); 1673 1674 list->Add(CharacterRange(10, 20), &zone); 1675 list->Add(CharacterRange(30, 40), &zone); 1676 list->Add(CharacterRange(50, 60), &zone); 1677 set.Canonicalize(); 1678 ASSERT_EQ(3, list->length()); 1679 ASSERT_EQ(10, list->at(0).from()); 1680 ASSERT_EQ(20, list->at(0).to()); 1681 ASSERT_EQ(30, list->at(1).from()); 1682 ASSERT_EQ(40, list->at(1).to()); 1683 ASSERT_EQ(50, list->at(2).from()); 1684 ASSERT_EQ(60, list->at(2).to()); 1685 1686 list->Rewind(0); 1687 list->Add(CharacterRange(10, 20), &zone); 1688 list->Add(CharacterRange(50, 60), &zone); 1689 list->Add(CharacterRange(30, 40), &zone); 1690 set.Canonicalize(); 1691 ASSERT_EQ(3, list->length()); 1692 ASSERT_EQ(10, list->at(0).from()); 1693 ASSERT_EQ(20, list->at(0).to()); 1694 ASSERT_EQ(30, list->at(1).from()); 1695 ASSERT_EQ(40, list->at(1).to()); 1696 ASSERT_EQ(50, list->at(2).from()); 1697 ASSERT_EQ(60, list->at(2).to()); 1698 1699 list->Rewind(0); 1700 list->Add(CharacterRange(30, 40), &zone); 1701 list->Add(CharacterRange(10, 20), &zone); 1702 list->Add(CharacterRange(25, 25), &zone); 1703 list->Add(CharacterRange(100, 100), &zone); 1704 list->Add(CharacterRange(1, 1), &zone); 1705 set.Canonicalize(); 1706 ASSERT_EQ(5, list->length()); 1707 ASSERT_EQ(1, list->at(0).from()); 1708 ASSERT_EQ(1, list->at(0).to()); 1709 ASSERT_EQ(10, list->at(1).from()); 1710 ASSERT_EQ(20, list->at(1).to()); 1711 ASSERT_EQ(25, list->at(2).from()); 1712 ASSERT_EQ(25, list->at(2).to()); 1713 ASSERT_EQ(30, list->at(3).from()); 1714 ASSERT_EQ(40, list->at(3).to()); 1715 ASSERT_EQ(100, list->at(4).from()); 1716 ASSERT_EQ(100, list->at(4).to()); 1717 1718 list->Rewind(0); 1719 list->Add(CharacterRange(10, 19), &zone); 1720 list->Add(CharacterRange(21, 30), &zone); 1721 list->Add(CharacterRange(20, 20), &zone); 1722 set.Canonicalize(); 1723 ASSERT_EQ(1, list->length()); 1724 ASSERT_EQ(10, list->at(0).from()); 1725 ASSERT_EQ(30, list->at(0).to()); 1726 } 1727 1728 1729 TEST(CharacterRangeMerge) { 1730 v8::internal::V8::Initialize(NULL); 1731 Zone zone(CcTest::i_isolate()); 1732 ZoneList<CharacterRange> l1(4, &zone); 1733 ZoneList<CharacterRange> l2(4, &zone); 1734 // Create all combinations of intersections of ranges, both singletons and 1735 // longer. 1736 1737 int offset = 0; 1738 1739 // The five kinds of singleton intersections: 1740 // X 1741 // Y - outside before 1742 // Y - outside touching start 1743 // Y - overlap 1744 // Y - outside touching end 1745 // Y - outside after 1746 1747 for (int i = 0; i < 5; i++) { 1748 l1.Add(CharacterRange::Singleton(offset + 2), &zone); 1749 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1750 offset += 6; 1751 } 1752 1753 // The seven kinds of singleton/non-singleton intersections: 1754 // XXX 1755 // Y - outside before 1756 // Y - outside touching start 1757 // Y - inside touching start 1758 // Y - entirely inside 1759 // Y - inside touching end 1760 // Y - outside touching end 1761 // Y - disjoint after 1762 1763 for (int i = 0; i < 7; i++) { 1764 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone); 1765 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1766 offset += 8; 1767 } 1768 1769 // The eleven kinds of non-singleton intersections: 1770 // 1771 // XXXXXXXX 1772 // YYYY - outside before. 1773 // YYYY - outside touching start. 1774 // YYYY - overlapping start 1775 // YYYY - inside touching start 1776 // YYYY - entirely inside 1777 // YYYY - inside touching end 1778 // YYYY - overlapping end 1779 // YYYY - outside touching end 1780 // YYYY - outside after 1781 // YYYYYYYY - identical 1782 // YYYYYYYYYYYY - containing entirely. 1783 1784 for (int i = 0; i < 9; i++) { 1785 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8. 1786 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone); 1787 offset += 22; 1788 } 1789 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1790 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1791 offset += 22; 1792 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1793 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone); 1794 offset += 22; 1795 1796 // Different kinds of multi-range overlap: 1797 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX 1798 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y 1799 1800 l1.Add(CharacterRange::Range(offset, offset + 21), &zone); 1801 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone); 1802 for (int i = 0; i < 6; i++) { 1803 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone); 1804 l2.Add(CharacterRange::Singleton(offset + 8), &zone); 1805 offset += 9; 1806 } 1807 1808 ASSERT(CharacterRange::IsCanonical(&l1)); 1809 ASSERT(CharacterRange::IsCanonical(&l2)); 1810 1811 ZoneList<CharacterRange> first_only(4, &zone); 1812 ZoneList<CharacterRange> second_only(4, &zone); 1813 ZoneList<CharacterRange> both(4, &zone); 1814 } 1815 1816 1817 TEST(Graph) { 1818 V8::Initialize(NULL); 1819 Execute("\\b\\w+\\b", false, true, true); 1820 } 1821