1 // -*- coding: utf-8 -*- 2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 // TODO: Test extractions for PartialMatch/Consume 7 8 #include <sys/types.h> 9 #ifndef WIN32 10 #include <sys/mman.h> 11 #endif 12 #include <sys/stat.h> 13 #include <errno.h> 14 #include <vector> 15 #include "util/test.h" 16 #include "re2/re2.h" 17 #include "re2/regexp.h" 18 19 #ifdef WIN32 20 #include <stdio.h> 21 #define snprintf _snprintf 22 #endif 23 24 DECLARE_bool(logtostderr); 25 26 namespace re2 { 27 28 TEST(RE2, HexTests) { 29 30 VLOG(1) << "hex tests"; 31 32 #define CHECK_HEX(type, value) \ 33 do { \ 34 type v; \ 35 CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \ 36 CHECK_EQ(v, 0x ## value); \ 37 CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \ 38 CHECK_EQ(v, 0x ## value); \ 39 } while(0) 40 41 CHECK_HEX(short, 2bad); 42 CHECK_HEX(unsigned short, 2badU); 43 CHECK_HEX(int, dead); 44 CHECK_HEX(unsigned int, deadU); 45 CHECK_HEX(long, 7eadbeefL); 46 CHECK_HEX(unsigned long, deadbeefUL); 47 CHECK_HEX(long long, 12345678deadbeefLL); 48 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); 49 50 #undef CHECK_HEX 51 } 52 53 TEST(RE2, OctalTests) { 54 VLOG(1) << "octal tests"; 55 56 #define CHECK_OCTAL(type, value) \ 57 do { \ 58 type v; \ 59 CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \ 60 CHECK_EQ(v, 0 ## value); \ 61 CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \ 62 CHECK_EQ(v, 0 ## value); \ 63 } while(0) 64 65 CHECK_OCTAL(short, 77777); 66 CHECK_OCTAL(unsigned short, 177777U); 67 CHECK_OCTAL(int, 17777777777); 68 CHECK_OCTAL(unsigned int, 37777777777U); 69 CHECK_OCTAL(long, 17777777777L); 70 CHECK_OCTAL(unsigned long, 37777777777UL); 71 CHECK_OCTAL(long long, 777777777777777777777LL); 72 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); 73 74 #undef CHECK_OCTAL 75 } 76 77 TEST(RE2, DecimalTests) { 78 VLOG(1) << "decimal tests"; 79 80 #define CHECK_DECIMAL(type, value) \ 81 do { \ 82 type v; \ 83 CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \ 84 CHECK_EQ(v, value); \ 85 CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \ 86 CHECK_EQ(v, value); \ 87 } while(0) 88 89 CHECK_DECIMAL(short, -1); 90 CHECK_DECIMAL(unsigned short, 9999); 91 CHECK_DECIMAL(int, -1000); 92 CHECK_DECIMAL(unsigned int, 12345U); 93 CHECK_DECIMAL(long, -10000000L); 94 CHECK_DECIMAL(unsigned long, 3083324652U); 95 CHECK_DECIMAL(long long, -100000000000000LL); 96 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); 97 98 #undef CHECK_DECIMAL 99 } 100 101 TEST(RE2, Replace) { 102 VLOG(1) << "TestReplace"; 103 104 struct ReplaceTest { 105 const char *regexp; 106 const char *rewrite; 107 const char *original; 108 const char *single; 109 const char *global; 110 int greplace_count; 111 }; 112 static const ReplaceTest tests[] = { 113 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", 114 "\\2\\1ay", 115 "the quick brown fox jumps over the lazy dogs.", 116 "ethay quick brown fox jumps over the lazy dogs.", 117 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", 118 9 }, 119 { "\\w+", 120 "\\0-NOSPAM", 121 "abcd.efghi (at) google.com", 122 "abcd-NOSPAM.efghi (at) google.com", 123 "abcd-NOSPAM.efghi-NOSPAM (at) google-NOSPAM.com-NOSPAM", 124 4 }, 125 { "^", 126 "(START)", 127 "foo", 128 "(START)foo", 129 "(START)foo", 130 1 }, 131 { "^", 132 "(START)", 133 "", 134 "(START)", 135 "(START)", 136 1 }, 137 { "$", 138 "(END)", 139 "", 140 "(END)", 141 "(END)", 142 1 }, 143 { "b", 144 "bb", 145 "ababababab", 146 "abbabababab", 147 "abbabbabbabbabb", 148 5 }, 149 { "b", 150 "bb", 151 "bbbbbb", 152 "bbbbbbb", 153 "bbbbbbbbbbbb", 154 6 }, 155 { "b+", 156 "bb", 157 "bbbbbb", 158 "bb", 159 "bb", 160 1 }, 161 { "b*", 162 "bb", 163 "bbbbbb", 164 "bb", 165 "bb", 166 1 }, 167 { "b*", 168 "bb", 169 "aaaaa", 170 "bbaaaaa", 171 "bbabbabbabbabbabb", 172 6 }, 173 // Check newline handling 174 { "a.*a", 175 "(\\0)", 176 "aba\naba", 177 "(aba)\naba", 178 "(aba)\n(aba)", 179 2 }, 180 { "", NULL, NULL, NULL, NULL, 0 } 181 }; 182 183 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 184 VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite); 185 string one(t->original); 186 CHECK(RE2::Replace(&one, t->regexp, t->rewrite)); 187 CHECK_EQ(one, t->single); 188 string all(t->original); 189 CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count) 190 << "Got: " << all; 191 CHECK_EQ(all, t->global); 192 } 193 } 194 195 static void TestCheckRewriteString(const char* regexp, const char* rewrite, 196 bool expect_ok) { 197 string error; 198 RE2 exp(regexp); 199 bool actual_ok = exp.CheckRewriteString(rewrite, &error); 200 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error; 201 } 202 203 TEST(CheckRewriteString, all) { 204 TestCheckRewriteString("abc", "foo", true); 205 TestCheckRewriteString("abc", "foo\\", false); 206 TestCheckRewriteString("abc", "foo\\0bar", true); 207 208 TestCheckRewriteString("a(b)c", "foo", true); 209 TestCheckRewriteString("a(b)c", "foo\\0bar", true); 210 TestCheckRewriteString("a(b)c", "foo\\1bar", true); 211 TestCheckRewriteString("a(b)c", "foo\\2bar", false); 212 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true); 213 214 TestCheckRewriteString("a(b)(c)", "foo\\12", true); 215 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true); 216 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false); 217 } 218 219 TEST(RE2, Extract) { 220 VLOG(1) << "TestExtract"; 221 222 string s; 223 224 CHECK(RE2::Extract("boris (at) kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s)); 225 CHECK_EQ(s, "kremvax!boris"); 226 227 CHECK(RE2::Extract("foo", ".*", "'\\0'", &s)); 228 CHECK_EQ(s, "'foo'"); 229 // check that false match doesn't overwrite 230 CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s)); 231 CHECK_EQ(s, "'foo'"); 232 } 233 234 TEST(RE2, Consume) { 235 VLOG(1) << "TestConsume"; 236 237 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace 238 string word; 239 240 string s(" aaa b!@#$@#$cccc"); 241 StringPiece input(s); 242 243 CHECK(RE2::Consume(&input, r, &word)); 244 CHECK_EQ(word, "aaa") << " input: " << input; 245 CHECK(RE2::Consume(&input, r, &word)); 246 CHECK_EQ(word, "b") << " input: " << input; 247 CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input; 248 } 249 250 TEST(RE2, ConsumeN) { 251 const string s(" one two three 4"); 252 StringPiece input(s); 253 254 RE2::Arg argv[2]; 255 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; 256 257 // 0 arg 258 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one". 259 260 // 1 arg 261 string word; 262 argv[0] = &word; 263 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1)); 264 EXPECT_EQ("two", word); 265 266 // Multi-args 267 int n; 268 argv[1] = &n; 269 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2)); 270 EXPECT_EQ("three", word); 271 EXPECT_EQ(4, n); 272 } 273 274 TEST(RE2, FindAndConsume) { 275 VLOG(1) << "TestFindAndConsume"; 276 277 RE2 r("(\\w+)"); // matches a word 278 string word; 279 280 string s(" aaa b!@#$@#$cccc"); 281 StringPiece input(s); 282 283 CHECK(RE2::FindAndConsume(&input, r, &word)); 284 CHECK_EQ(word, "aaa"); 285 CHECK(RE2::FindAndConsume(&input, r, &word)); 286 CHECK_EQ(word, "b"); 287 CHECK(RE2::FindAndConsume(&input, r, &word)); 288 CHECK_EQ(word, "cccc"); 289 CHECK(! RE2::FindAndConsume(&input, r, &word)); 290 291 // Check that FindAndConsume works without any submatches. 292 // Earlier version used uninitialized data for 293 // length to consume. 294 input = "aaa"; 295 CHECK(RE2::FindAndConsume(&input, "aaa")); 296 CHECK_EQ(input, ""); 297 } 298 299 TEST(RE2, FindAndConsumeN) { 300 const string s(" one two three 4"); 301 StringPiece input(s); 302 303 RE2::Arg argv[2]; 304 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; 305 306 // 0 arg 307 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one". 308 309 // 1 arg 310 string word; 311 argv[0] = &word; 312 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1)); 313 EXPECT_EQ("two", word); 314 315 // Multi-args 316 int n; 317 argv[1] = &n; 318 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2)); 319 EXPECT_EQ("three", word); 320 EXPECT_EQ(4, n); 321 } 322 323 TEST(RE2, MatchNumberPeculiarity) { 324 VLOG(1) << "TestMatchNumberPeculiarity"; 325 326 RE2 r("(foo)|(bar)|(baz)"); 327 string word1; 328 string word2; 329 string word3; 330 331 CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3)); 332 CHECK_EQ(word1, "foo"); 333 CHECK_EQ(word2, ""); 334 CHECK_EQ(word3, ""); 335 CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3)); 336 CHECK_EQ(word1, ""); 337 CHECK_EQ(word2, "bar"); 338 CHECK_EQ(word3, ""); 339 CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3)); 340 CHECK_EQ(word1, ""); 341 CHECK_EQ(word2, ""); 342 CHECK_EQ(word3, "baz"); 343 CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3)); 344 345 string a; 346 CHECK(RE2::FullMatch("hello", "(foo)|hello", &a)); 347 CHECK_EQ(a, ""); 348 } 349 350 TEST(RE2, Match) { 351 RE2 re("((\\w+):([0-9]+))"); // extracts host and port 352 StringPiece group[4]; 353 354 // No match. 355 StringPiece s = "zyzzyva"; 356 CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED, 357 group, arraysize(group))); 358 359 // Matches and extracts. 360 s = "a chrisr:9000 here"; 361 CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED, 362 group, arraysize(group))); 363 CHECK_EQ(group[0], "chrisr:9000"); 364 CHECK_EQ(group[1], "chrisr:9000"); 365 CHECK_EQ(group[2], "chrisr"); 366 CHECK_EQ(group[3], "9000"); 367 368 string all, host; 369 int port; 370 CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port)); 371 CHECK_EQ(all, "chrisr:9000"); 372 CHECK_EQ(host, "chrisr"); 373 CHECK_EQ(port, 9000); 374 } 375 376 static void TestRecursion(int size, const char *pattern) { 377 // Fill up a string repeating the pattern given 378 string domain; 379 domain.resize(size); 380 int patlen = strlen(pattern); 381 for (int i = 0; i < size; ++i) { 382 domain[i] = pattern[i % patlen]; 383 } 384 // Just make sure it doesn't crash due to too much recursion. 385 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet); 386 RE2::FullMatch(domain, re); 387 } 388 389 // A meta-quoted string, interpreted as a pattern, should always match 390 // the original unquoted string. 391 static void TestQuoteMeta(string unquoted, 392 const RE2::Options& options = RE2::DefaultOptions) { 393 string quoted = RE2::QuoteMeta(unquoted); 394 RE2 re(quoted, options); 395 EXPECT_TRUE_M(RE2::FullMatch(unquoted, re), 396 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); 397 } 398 399 // A meta-quoted string, interpreted as a pattern, should always match 400 // the original unquoted string. 401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 402 const RE2::Options& options = RE2::DefaultOptions) { 403 string quoted = RE2::QuoteMeta(unquoted); 404 RE2 re(quoted, options); 405 EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re), 406 "Unquoted='" + unquoted + "', quoted='" + quoted + "'."); 407 } 408 409 // Tests that quoted meta characters match their original strings, 410 // and that a few things that shouldn't match indeed do not. 411 TEST(QuoteMeta, Simple) { 412 TestQuoteMeta("foo"); 413 TestQuoteMeta("foo.bar"); 414 TestQuoteMeta("foo\\.bar"); 415 TestQuoteMeta("[1-9]"); 416 TestQuoteMeta("1.5-2.0?"); 417 TestQuoteMeta("\\d"); 418 TestQuoteMeta("Who doesn't like ice cream?"); 419 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); 420 TestQuoteMeta("((?!)xxx).*yyy"); 421 TestQuoteMeta("(["); 422 } 423 TEST(QuoteMeta, SimpleNegative) { 424 NegativeTestQuoteMeta("foo", "bar"); 425 NegativeTestQuoteMeta("...", "bar"); 426 NegativeTestQuoteMeta("\\.", "."); 427 NegativeTestQuoteMeta("\\.", ".."); 428 NegativeTestQuoteMeta("(a)", "a"); 429 NegativeTestQuoteMeta("(a|b)", "a"); 430 NegativeTestQuoteMeta("(a|b)", "(a)"); 431 NegativeTestQuoteMeta("(a|b)", "a|b"); 432 NegativeTestQuoteMeta("[0-9]", "0"); 433 NegativeTestQuoteMeta("[0-9]", "0-9"); 434 NegativeTestQuoteMeta("[0-9]", "[9]"); 435 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); 436 } 437 438 TEST(QuoteMeta, Latin1) { 439 TestQuoteMeta("3\xb2 = 9", RE2::Latin1); 440 } 441 442 TEST(QuoteMeta, UTF8) { 443 TestQuoteMeta("Plcido Domingo"); 444 TestQuoteMeta("xyz"); // No fancy utf8. 445 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol. 446 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character. 447 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime. 448 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note. 449 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should 450 // still work. 451 NegativeTestQuoteMeta("27\xc2\xb0", 452 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol. 453 } 454 455 TEST(QuoteMeta, HasNull) { 456 string has_null; 457 458 // string with one null character 459 has_null += '\0'; 460 TestQuoteMeta(has_null); 461 NegativeTestQuoteMeta(has_null, ""); 462 463 // Don't want null-followed-by-'1' to be interpreted as '\01'. 464 has_null += '1'; 465 TestQuoteMeta(has_null); 466 NegativeTestQuoteMeta(has_null, "\1"); 467 } 468 469 TEST(ProgramSize, BigProgram) { 470 RE2 re_simple("simple regexp"); 471 RE2 re_medium("medium.*regexp"); 472 RE2 re_complex("hard.{1,128}regexp"); 473 474 CHECK_GT(re_simple.ProgramSize(), 0); 475 CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize()); 476 CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize()); 477 } 478 479 // Issue 956519: handling empty character sets was 480 // causing NULL dereference. This tests a few empty character sets. 481 // (The way to get an empty character set is to negate a full one.) 482 TEST(EmptyCharset, Fuzz) { 483 static const char *empties[] = { 484 "[^\\S\\s]", 485 "[^\\S[:space:]]", 486 "[^\\D\\d]", 487 "[^\\D[:digit:]]" 488 }; 489 for (int i = 0; i < arraysize(empties); i++) 490 CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0)); 491 } 492 493 // Test that named groups work correctly. 494 TEST(Capture, NamedGroups) { 495 { 496 RE2 re("(hello world)"); 497 CHECK_EQ(re.NumberOfCapturingGroups(), 1); 498 const map<string, int>& m = re.NamedCapturingGroups(); 499 CHECK_EQ(m.size(), 0); 500 } 501 502 { 503 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))"); 504 CHECK_EQ(re.NumberOfCapturingGroups(), 6); 505 const map<string, int>& m = re.NamedCapturingGroups(); 506 CHECK_EQ(m.size(), 4); 507 CHECK_EQ(m.find("A")->second, 1); 508 CHECK_EQ(m.find("B")->second, 2); 509 CHECK_EQ(m.find("C")->second, 3); 510 CHECK_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous 511 } 512 } 513 514 TEST(RE2, FullMatchWithNoArgs) { 515 CHECK(RE2::FullMatch("h", "h")); 516 CHECK(RE2::FullMatch("hello", "hello")); 517 CHECK(RE2::FullMatch("hello", "h.*o")); 518 CHECK(!RE2::FullMatch("othello", "h.*o")); // Must be anchored at front 519 CHECK(!RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end 520 } 521 522 TEST(RE2, PartialMatch) { 523 CHECK(RE2::PartialMatch("x", "x")); 524 CHECK(RE2::PartialMatch("hello", "h.*o")); 525 CHECK(RE2::PartialMatch("othello", "h.*o")); 526 CHECK(RE2::PartialMatch("hello!", "h.*o")); 527 CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))")); 528 } 529 530 TEST(RE2, PartialMatchN) { 531 RE2::Arg argv[2]; 532 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; 533 534 // 0 arg 535 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0)); 536 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0)); 537 538 // 1 arg 539 int i; 540 argv[0] = &i; 541 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1)); 542 EXPECT_EQ(1001, i); 543 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1)); 544 545 // Multi-arg 546 string s; 547 argv[1] = &s; 548 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2)); 549 EXPECT_EQ(42, i); 550 EXPECT_EQ("life", s); 551 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2)); 552 } 553 554 TEST(RE2, FullMatchZeroArg) { 555 // Zero-arg 556 CHECK(RE2::FullMatch("1001", "\\d+")); 557 } 558 559 TEST(RE2, FullMatchOneArg) { 560 int i; 561 562 // Single-arg 563 CHECK(RE2::FullMatch("1001", "(\\d+)", &i)); 564 CHECK_EQ(i, 1001); 565 CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i)); 566 CHECK_EQ(i, -123); 567 CHECK(!RE2::FullMatch("10", "()\\d+", &i)); 568 CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890", 569 "(\\d+)", &i)); 570 } 571 572 TEST(RE2, FullMatchIntegerArg) { 573 int i; 574 575 // Digits surrounding integer-arg 576 CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i)); 577 CHECK_EQ(i, 23); 578 CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i)); 579 CHECK_EQ(i, 1); 580 CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i)); 581 CHECK_EQ(i, -1); 582 CHECK(RE2::PartialMatch("1234", "(\\d)", &i)); 583 CHECK_EQ(i, 1); 584 CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i)); 585 CHECK_EQ(i, -1); 586 } 587 588 TEST(RE2, FullMatchStringArg) { 589 string s; 590 // String-arg 591 CHECK(RE2::FullMatch("hello", "h(.*)o", &s)); 592 CHECK_EQ(s, string("ell")); 593 } 594 595 TEST(RE2, FullMatchStringPieceArg) { 596 int i; 597 // StringPiece-arg 598 StringPiece sp; 599 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i)); 600 CHECK_EQ(sp.size(), 4); 601 CHECK(memcmp(sp.data(), "ruby", 4) == 0); 602 CHECK_EQ(i, 1234); 603 } 604 605 TEST(RE2, FullMatchMultiArg) { 606 int i; 607 string s; 608 // Multi-arg 609 CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); 610 CHECK_EQ(s, string("ruby")); 611 CHECK_EQ(i, 1234); 612 } 613 614 TEST(RE2, FullMatchN) { 615 RE2::Arg argv[2]; 616 const RE2::Arg* const args[2] = { &argv[0], &argv[1] }; 617 618 // 0 arg 619 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0)); 620 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0)); 621 622 // 1 arg 623 int i; 624 argv[0] = &i; 625 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1)); 626 EXPECT_EQ(1001, i); 627 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1)); 628 629 // Multi-arg 630 string s; 631 argv[1] = &s; 632 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2)); 633 EXPECT_EQ(42, i); 634 EXPECT_EQ("life", s); 635 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2)); 636 } 637 638 TEST(RE2, FullMatchIgnoredArg) { 639 int i; 640 string s; 641 // Ignored arg 642 CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i)); 643 CHECK_EQ(s, string("ruby")); 644 CHECK_EQ(i, 1234); 645 } 646 647 TEST(RE2, FullMatchTypedNullArg) { 648 string s; 649 650 // Ignore non-void* NULL arg 651 CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL)); 652 CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL)); 653 CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL)); 654 CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL)); 655 CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL)); 656 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL)); 657 CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL)); 658 659 // Fail on non-void* NULL arg if the match doesn't parse for the given type. 660 CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL)); 661 CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL)); 662 CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL)); 663 CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL)); 664 CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL)); 665 } 666 667 #ifndef WIN32 668 // Check that numeric parsing code does not read past the end of 669 // the number being parsed. 670 TEST(RE2, NULTerminated) { 671 char *v; 672 int x; 673 long pagesize = sysconf(_SC_PAGE_SIZE); 674 675 #ifndef MAP_ANONYMOUS 676 #define MAP_ANONYMOUS MAP_ANON 677 #endif 678 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE, 679 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0)); 680 CHECK(v != reinterpret_cast<char*>(-1)); 681 LOG(INFO) << "Memory at " << (void*)v; 682 CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno; 683 v[pagesize - 1] = '1'; 684 685 x = 0; 686 CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x)); 687 CHECK_EQ(x, 1); 688 } 689 #endif 690 691 TEST(RE2, FullMatchTypeTests) { 692 // Type tests 693 string zeros(100, '0'); 694 { 695 char c; 696 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); 697 CHECK_EQ(c, 'H'); 698 } 699 { 700 unsigned char c; 701 CHECK(RE2::FullMatch("Hello", "(H)ello", &c)); 702 CHECK_EQ(c, static_cast<unsigned char>('H')); 703 } 704 { 705 int16 v; 706 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); 707 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); 708 CHECK(RE2::FullMatch("32767", "(-?\\d+)", &v)); CHECK_EQ(v, 32767); 709 CHECK(RE2::FullMatch("-32768", "(-?\\d+)", &v)); CHECK_EQ(v, -32768); 710 CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v)); 711 CHECK(!RE2::FullMatch("32768", "(-?\\d+)", &v)); 712 } 713 { 714 uint16 v; 715 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); 716 CHECK(RE2::FullMatch("32767", "(\\d+)", &v)); CHECK_EQ(v, 32767); 717 CHECK(RE2::FullMatch("65535", "(\\d+)", &v)); CHECK_EQ(v, 65535); 718 CHECK(!RE2::FullMatch("65536", "(\\d+)", &v)); 719 } 720 { 721 int32 v; 722 static const int32 max = 0x7fffffff; 723 static const int32 min = -max - 1; 724 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); 725 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); 726 CHECK(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); CHECK_EQ(v, max); 727 CHECK(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); CHECK_EQ(v, min); 728 CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v)); 729 CHECK(!RE2::FullMatch("2147483648", "(-?\\d+)", &v)); 730 731 CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v)); 732 CHECK_EQ(v, max); 733 CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v)); 734 CHECK_EQ(v, min); 735 736 CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v)); 737 CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v))); 738 CHECK_EQ(v, max); 739 CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v))); 740 } 741 { 742 uint32 v; 743 static const uint32 max = 0xfffffffful; 744 CHECK(RE2::FullMatch("100", "(\\d+)", &v)); CHECK_EQ(v, 100); 745 CHECK(RE2::FullMatch("4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); 746 CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v)); 747 CHECK(!RE2::FullMatch("-1", "(\\d+)", &v)); 748 749 CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max); 750 } 751 { 752 int64 v; 753 static const int64 max = 0x7fffffffffffffffull; 754 static const int64 min = -max - 1; 755 char buf[32]; 756 757 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); 758 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100); 759 760 snprintf(buf, sizeof(buf), "%lld", (long long int)max); 761 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); 762 763 snprintf(buf, sizeof(buf), "%lld", (long long int)min); 764 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, min); 765 766 snprintf(buf, sizeof(buf), "%lld", (long long int)max); 767 assert(buf[strlen(buf)-1] != '9'); 768 buf[strlen(buf)-1]++; 769 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); 770 771 snprintf(buf, sizeof(buf), "%lld", (long long int)min); 772 assert(buf[strlen(buf)-1] != '9'); 773 buf[strlen(buf)-1]++; 774 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); 775 } 776 { 777 uint64 v; 778 int64 v2; 779 static const uint64 max = 0xffffffffffffffffull; 780 char buf[32]; 781 782 CHECK(RE2::FullMatch("100", "(-?\\d+)", &v)); CHECK_EQ(v, 100); 783 CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100); 784 785 snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max); 786 CHECK(RE2::FullMatch(buf, "(-?\\d+)", &v)); CHECK_EQ(v, max); 787 788 assert(buf[strlen(buf)-1] != '9'); 789 buf[strlen(buf)-1]++; 790 CHECK(!RE2::FullMatch(buf, "(-?\\d+)", &v)); 791 } 792 } 793 794 TEST(RE2, FloatingPointFullMatchTypes) { 795 string zeros(100, '0'); 796 { 797 float v; 798 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); 799 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); 800 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, float(1e23)); 801 802 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); 803 CHECK_EQ(v, float(1e23)); 804 805 // 6700000000081920.1 is an edge case. 806 // 6700000000081920 is exactly halfway between 807 // two float32s, so the .1 should make it round up. 808 // However, the .1 is outside the precision possible with 809 // a float64: the nearest float64 is 6700000000081920. 810 // So if the code uses strtod and then converts to float32, 811 // round-to-even will make it round down instead of up. 812 // To pass the test, the parser must call strtof directly. 813 // This test case is carefully chosen to use only a 17-digit 814 // number, since C does not guarantee to get the correctly 815 // rounded answer for strtod and strtof unless the input is 816 // short. 817 CHECK(RE2::FullMatch("0.1", "(.*)", &v)); 818 CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f); 819 CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v)); 820 CHECK_EQ(v, 6700000000081920.1f) 821 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f); 822 } 823 { 824 double v; 825 CHECK(RE2::FullMatch("100", "(.*)", &v)); CHECK_EQ(v, 100); 826 CHECK(RE2::FullMatch("-100.", "(.*)", &v)); CHECK_EQ(v, -100); 827 CHECK(RE2::FullMatch("1e23", "(.*)", &v)); CHECK_EQ(v, 1e23); 828 CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v)); 829 CHECK_EQ(v, double(1e23)); 830 831 CHECK(RE2::FullMatch("0.1", "(.*)", &v)); 832 CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1); 833 CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v)); 834 CHECK_EQ(v, 1.0000000596046448) 835 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448); 836 } 837 } 838 839 TEST(RE2, FullMatchAnchored) { 840 int i; 841 // Check that matching is fully anchored 842 CHECK(!RE2::FullMatch("x1001", "(\\d+)", &i)); 843 CHECK(!RE2::FullMatch("1001x", "(\\d+)", &i)); 844 CHECK(RE2::FullMatch("x1001", "x(\\d+)", &i)); CHECK_EQ(i, 1001); 845 CHECK(RE2::FullMatch("1001x", "(\\d+)x", &i)); CHECK_EQ(i, 1001); 846 } 847 848 TEST(RE2, FullMatchBraces) { 849 // Braces 850 CHECK(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}")); 851 CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}")); 852 CHECK(!RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}")); 853 } 854 855 TEST(RE2, Complicated) { 856 // Complicated RE2 857 CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]")); 858 CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]")); 859 CHECK(RE2::FullMatch("X", "foo|bar|[A-Z]")); 860 CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]")); 861 } 862 863 TEST(RE2, FullMatchEnd) { 864 // Check full-match handling (needs '$' tacked on internally) 865 CHECK(RE2::FullMatch("fo", "fo|foo")); 866 CHECK(RE2::FullMatch("foo", "fo|foo")); 867 CHECK(RE2::FullMatch("fo", "fo|foo$")); 868 CHECK(RE2::FullMatch("foo", "fo|foo$")); 869 CHECK(RE2::FullMatch("foo", "foo$")); 870 CHECK(!RE2::FullMatch("foo$bar", "foo\\$")); 871 CHECK(!RE2::FullMatch("fox", "fo|bar")); 872 873 // Uncomment the following if we change the handling of '$' to 874 // prevent it from matching a trailing newline 875 if (false) { 876 // Check that we don't get bitten by pcre's special handling of a 877 // '\n' at the end of the string matching '$' 878 CHECK(!RE2::PartialMatch("foo\n", "foo$")); 879 } 880 } 881 882 TEST(RE2, FullMatchArgCount) { 883 // Number of args 884 int a[16]; 885 CHECK(RE2::FullMatch("", "")); 886 887 memset(a, 0, sizeof(0)); 888 CHECK(RE2::FullMatch("1", 889 "(\\d){1}", 890 &a[0])); 891 CHECK_EQ(a[0], 1); 892 893 memset(a, 0, sizeof(0)); 894 CHECK(RE2::FullMatch("12", 895 "(\\d)(\\d)", 896 &a[0], &a[1])); 897 CHECK_EQ(a[0], 1); 898 CHECK_EQ(a[1], 2); 899 900 memset(a, 0, sizeof(0)); 901 CHECK(RE2::FullMatch("123", 902 "(\\d)(\\d)(\\d)", 903 &a[0], &a[1], &a[2])); 904 CHECK_EQ(a[0], 1); 905 CHECK_EQ(a[1], 2); 906 CHECK_EQ(a[2], 3); 907 908 memset(a, 0, sizeof(0)); 909 CHECK(RE2::FullMatch("1234", 910 "(\\d)(\\d)(\\d)(\\d)", 911 &a[0], &a[1], &a[2], &a[3])); 912 CHECK_EQ(a[0], 1); 913 CHECK_EQ(a[1], 2); 914 CHECK_EQ(a[2], 3); 915 CHECK_EQ(a[3], 4); 916 917 memset(a, 0, sizeof(0)); 918 CHECK(RE2::FullMatch("12345", 919 "(\\d)(\\d)(\\d)(\\d)(\\d)", 920 &a[0], &a[1], &a[2], &a[3], 921 &a[4])); 922 CHECK_EQ(a[0], 1); 923 CHECK_EQ(a[1], 2); 924 CHECK_EQ(a[2], 3); 925 CHECK_EQ(a[3], 4); 926 CHECK_EQ(a[4], 5); 927 928 memset(a, 0, sizeof(0)); 929 CHECK(RE2::FullMatch("123456", 930 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", 931 &a[0], &a[1], &a[2], &a[3], 932 &a[4], &a[5])); 933 CHECK_EQ(a[0], 1); 934 CHECK_EQ(a[1], 2); 935 CHECK_EQ(a[2], 3); 936 CHECK_EQ(a[3], 4); 937 CHECK_EQ(a[4], 5); 938 CHECK_EQ(a[5], 6); 939 940 memset(a, 0, sizeof(0)); 941 CHECK(RE2::FullMatch("1234567", 942 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", 943 &a[0], &a[1], &a[2], &a[3], 944 &a[4], &a[5], &a[6])); 945 CHECK_EQ(a[0], 1); 946 CHECK_EQ(a[1], 2); 947 CHECK_EQ(a[2], 3); 948 CHECK_EQ(a[3], 4); 949 CHECK_EQ(a[4], 5); 950 CHECK_EQ(a[5], 6); 951 CHECK_EQ(a[6], 7); 952 953 memset(a, 0, sizeof(0)); 954 CHECK(RE2::FullMatch("1234567890123456", 955 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" 956 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", 957 &a[0], &a[1], &a[2], &a[3], 958 &a[4], &a[5], &a[6], &a[7], 959 &a[8], &a[9], &a[10], &a[11], 960 &a[12], &a[13], &a[14], &a[15])); 961 CHECK_EQ(a[0], 1); 962 CHECK_EQ(a[1], 2); 963 CHECK_EQ(a[2], 3); 964 CHECK_EQ(a[3], 4); 965 CHECK_EQ(a[4], 5); 966 CHECK_EQ(a[5], 6); 967 CHECK_EQ(a[6], 7); 968 CHECK_EQ(a[7], 8); 969 CHECK_EQ(a[8], 9); 970 CHECK_EQ(a[9], 0); 971 CHECK_EQ(a[10], 1); 972 CHECK_EQ(a[11], 2); 973 CHECK_EQ(a[12], 3); 974 CHECK_EQ(a[13], 4); 975 CHECK_EQ(a[14], 5); 976 CHECK_EQ(a[15], 6); 977 } 978 979 TEST(RE2, Accessors) { 980 // Check the pattern() accessor 981 { 982 const string kPattern = "http://([^/]+)/.*"; 983 const RE2 re(kPattern); 984 CHECK_EQ(kPattern, re.pattern()); 985 } 986 987 // Check RE2 error field. 988 { 989 RE2 re("foo"); 990 CHECK(re.error().empty()); // Must have no error 991 CHECK(re.ok()); 992 CHECK(re.error_code() == RE2::NoError); 993 } 994 } 995 996 TEST(RE2, UTF8) { 997 // Check UTF-8 handling 998 // Three Japanese characters (nihongo) 999 const char utf8_string[] = { 1000 0xe6, 0x97, 0xa5, // 65e5 1001 0xe6, 0x9c, 0xac, // 627c 1002 0xe8, 0xaa, 0x9e, // 8a9e 1003 0 1004 }; 1005 const char utf8_pattern[] = { 1006 '.', 1007 0xe6, 0x9c, 0xac, // 627c 1008 '.', 1009 0 1010 }; 1011 1012 // Both should match in either mode, bytes or UTF-8 1013 RE2 re_test1(".........", RE2::Latin1); 1014 CHECK(RE2::FullMatch(utf8_string, re_test1)); 1015 RE2 re_test2("..."); 1016 CHECK(RE2::FullMatch(utf8_string, re_test2)); 1017 1018 // Check that '.' matches one byte or UTF-8 character 1019 // according to the mode. 1020 string s; 1021 RE2 re_test3("(.)", RE2::Latin1); 1022 CHECK(RE2::PartialMatch(utf8_string, re_test3, &s)); 1023 CHECK_EQ(s, string("\xe6")); 1024 RE2 re_test4("(.)"); 1025 CHECK(RE2::PartialMatch(utf8_string, re_test4, &s)); 1026 CHECK_EQ(s, string("\xe6\x97\xa5")); 1027 1028 // Check that string matches itself in either mode 1029 RE2 re_test5(utf8_string, RE2::Latin1); 1030 CHECK(RE2::FullMatch(utf8_string, re_test5)); 1031 RE2 re_test6(utf8_string); 1032 CHECK(RE2::FullMatch(utf8_string, re_test6)); 1033 1034 // Check that pattern matches string only in UTF8 mode 1035 RE2 re_test7(utf8_pattern, RE2::Latin1); 1036 CHECK(!RE2::FullMatch(utf8_string, re_test7)); 1037 RE2 re_test8(utf8_pattern); 1038 CHECK(RE2::FullMatch(utf8_string, re_test8)); 1039 } 1040 1041 TEST(RE2, UngreedyUTF8) { 1042 // Check that ungreedy, UTF8 regular expressions don't match when they 1043 // oughtn't -- see bug 82246. 1044 { 1045 // This code always worked. 1046 const char* pattern = "\\w+X"; 1047 const string target = "a aX"; 1048 RE2 match_sentence(pattern, RE2::Latin1); 1049 RE2 match_sentence_re(pattern); 1050 1051 CHECK(!RE2::FullMatch(target, match_sentence)); 1052 CHECK(!RE2::FullMatch(target, match_sentence_re)); 1053 } 1054 { 1055 const char* pattern = "(?U)\\w+X"; 1056 const string target = "a aX"; 1057 RE2 match_sentence(pattern, RE2::Latin1); 1058 CHECK_EQ(match_sentence.error(), ""); 1059 RE2 match_sentence_re(pattern); 1060 1061 CHECK(!RE2::FullMatch(target, match_sentence)); 1062 CHECK(!RE2::FullMatch(target, match_sentence_re)); 1063 } 1064 } 1065 1066 TEST(RE2, Rejects) { 1067 { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); } 1068 { 1069 RE2 re("a[x", RE2::Quiet); 1070 CHECK(!re.ok()); 1071 } 1072 { 1073 RE2 re("a[z-a]", RE2::Quiet); 1074 CHECK(!re.ok()); 1075 } 1076 { 1077 RE2 re("a[[:foobar:]]", RE2::Quiet); 1078 CHECK(!re.ok()); 1079 } 1080 { 1081 RE2 re("a(b", RE2::Quiet); 1082 CHECK(!re.ok()); 1083 } 1084 { 1085 RE2 re("a\\", RE2::Quiet); 1086 CHECK(!re.ok()); 1087 } 1088 } 1089 1090 TEST(RE2, NoCrash) { 1091 // Test that using a bad regexp doesn't crash. 1092 { 1093 RE2 re("a\\", RE2::Quiet); 1094 CHECK(!re.ok()); 1095 CHECK(!RE2::PartialMatch("a\\b", re)); 1096 } 1097 1098 // Test that using an enormous regexp doesn't crash 1099 { 1100 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet); 1101 CHECK(!re.ok()); 1102 CHECK(!RE2::PartialMatch("aaa", re)); 1103 } 1104 1105 // Test that a crazy regexp still compiles and runs. 1106 { 1107 RE2 re(".{512}x", RE2::Quiet); 1108 CHECK(re.ok()); 1109 string s; 1110 s.append(515, 'c'); 1111 s.append("x"); 1112 CHECK(RE2::PartialMatch(s, re)); 1113 } 1114 } 1115 1116 TEST(RE2, Recursion) { 1117 // Test that recursion is stopped. 1118 // This test is PCRE-legacy -- there's no recursion in RE2. 1119 int bytes = 15 * 1024; // enough to crash PCRE 1120 TestRecursion(bytes, "."); 1121 TestRecursion(bytes, "a"); 1122 TestRecursion(bytes, "a."); 1123 TestRecursion(bytes, "ab."); 1124 TestRecursion(bytes, "abc."); 1125 } 1126 1127 TEST(RE2, BigCountedRepetition) { 1128 // Test that counted repetition works, given tons of memory. 1129 RE2::Options opt; 1130 opt.set_max_mem(256<<20); 1131 1132 RE2 re(".{512}x", opt); 1133 CHECK(re.ok()); 1134 string s; 1135 s.append(515, 'c'); 1136 s.append("x"); 1137 CHECK(RE2::PartialMatch(s, re)); 1138 } 1139 1140 TEST(RE2, DeepRecursion) { 1141 // Test for deep stack recursion. This would fail with a 1142 // segmentation violation due to stack overflow before pcre was 1143 // patched. 1144 // Again, a PCRE legacy test. RE2 doesn't recurse. 1145 string comment("x*"); 1146 string a(131072, 'a'); 1147 comment += a; 1148 comment += "*x"; 1149 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)"); 1150 CHECK(RE2::FullMatch(comment, re)); 1151 } 1152 1153 // Suggested by Josh Hyman. Failed when SearchOnePass was 1154 // not implementing case-folding. 1155 TEST(CaseInsensitive, MatchAndConsume) { 1156 string result; 1157 string text = "A fish named *Wanda*"; 1158 StringPiece sp(text); 1159 1160 EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result)); 1161 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result)); 1162 } 1163 1164 // RE2 should permit implicit conversions from string, StringPiece, const char*, 1165 // and C string literals. 1166 TEST(RE2, ImplicitConversions) { 1167 string re_string("."); 1168 StringPiece re_stringpiece("."); 1169 const char* re_cstring = "."; 1170 EXPECT_TRUE(RE2::PartialMatch("e", re_string)); 1171 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece)); 1172 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring)); 1173 EXPECT_TRUE(RE2::PartialMatch("e", ".")); 1174 } 1175 1176 // Bugs introduced by 8622304 1177 TEST(RE2, CL8622304) { 1178 // reported by ingow 1179 string dir; 1180 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok 1181 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails 1182 1183 // reported by jacobsa 1184 string key, val; 1185 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true", 1186 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?", 1187 &key, 1188 &val)); 1189 EXPECT_EQ(key, "bar"); 1190 EXPECT_EQ(val, "1,0x2F,030,4,5"); 1191 } 1192 1193 1194 // Check that RE2 returns correct regexp pieces on error. 1195 // In particular, make sure it returns whole runes 1196 // and that it always reports invalid UTF-8. 1197 // Also check that Perl error flag piece is big enough. 1198 static struct ErrorTest { 1199 const char *regexp; 1200 const char *error; 1201 } error_tests[] = { 1202 { "ab\\cd", "\\" }, 1203 { "ef\\x01", "\\x0" }, 1204 { "gh\\x101", "\\x1" }, 1205 { "ij\\x1", "\\x1" }, 1206 { "kl\\x", "\\x" }, 1207 { "uv\\x{0000}", "\\x{0000" }, 1208 { "wx\\p{ABC", "\\p{ABC" }, 1209 { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X 1210 { "aa(?smi", "(?sm" }, 1211 { "bb[abc", "[abc" }, 1212 1213 { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8 1214 { "op\377qr", "" }, 1215 { "st\\x{00000\377", "" }, 1216 { "zz\\p{\377}", "" }, 1217 { "zz\\x{00\377}", "" }, 1218 { "zz(?P<name\377>abc)", "" }, 1219 }; 1220 TEST(RE2, ErrorArgs) { 1221 for (int i = 0; i < arraysize(error_tests); i++) { 1222 RE2 re(error_tests[i].regexp, RE2::Quiet); 1223 EXPECT_FALSE(re.ok()); 1224 EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error(); 1225 } 1226 } 1227 1228 // Check that "never match \n" mode never matches \n. 1229 static struct NeverTest { 1230 const char* regexp; 1231 const char* text; 1232 const char* match; 1233 } never_tests[] = { 1234 { "(.*)", "abc\ndef\nghi\n", "abc" }, 1235 { "(?s)(abc.*def)", "abc\ndef\n", NULL }, 1236 { "(abc(.|\n)*def)", "abc\ndef\n", NULL }, 1237 { "(abc[^x]*def)", "abc\ndef\n", NULL }, 1238 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" }, 1239 }; 1240 TEST(RE2, NeverNewline) { 1241 RE2::Options opt; 1242 opt.set_never_nl(true); 1243 for (int i = 0; i < arraysize(never_tests); i++) { 1244 const NeverTest& t = never_tests[i]; 1245 RE2 re(t.regexp, opt); 1246 if (t.match == NULL) { 1247 EXPECT_FALSE(re.PartialMatch(t.text, re)); 1248 } else { 1249 StringPiece m; 1250 EXPECT_TRUE(re.PartialMatch(t.text, re, &m)); 1251 EXPECT_EQ(m, t.match); 1252 } 1253 } 1254 } 1255 1256 // Check that there are no capturing groups in "never capture" mode. 1257 TEST(RE2, NeverCapture) { 1258 RE2::Options opt; 1259 opt.set_never_capture(true); 1260 RE2 re("(r)(e)", opt); 1261 EXPECT_EQ(0, re.NumberOfCapturingGroups()); 1262 } 1263 1264 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0. 1265 // Triggered by a failed DFA search falling back to Bitstate when 1266 // using Match with a NULL submatch set. Bitstate tried to read 1267 // the submatch[0] entry even if nsubmatch was 0. 1268 TEST(RE2, BitstateCaptureBug) { 1269 RE2::Options opt; 1270 opt.set_max_mem(20000); 1271 RE2 re("(_________$)", opt); 1272 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x"; 1273 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0)); 1274 } 1275 1276 // C++ version of bug 609710. 1277 TEST(RE2, UnicodeClasses) { 1278 const string str = "ABCDEFGHI"; 1279 string a, b, c; 1280 1281 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}")); 1282 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}")); 1283 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}")); 1284 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}")); 1285 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}")); 1286 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}")); 1287 1288 EXPECT_TRUE(RE2::FullMatch("", "\\p{L}")); 1289 EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}")); 1290 EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}")); 1291 EXPECT_FALSE(RE2::FullMatch("", "\\P{L}")); 1292 EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}")); 1293 EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}")); 1294 1295 EXPECT_TRUE(RE2::FullMatch("", "\\p{L}")); 1296 EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}")); 1297 EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}")); 1298 EXPECT_FALSE(RE2::FullMatch("", "\\P{L}")); 1299 EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}")); 1300 EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}")); 1301 1302 EXPECT_TRUE(RE2::FullMatch("", "\\p{L}")); 1303 EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}")); 1304 EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}")); 1305 EXPECT_FALSE(RE2::FullMatch("", "\\P{L}")); 1306 EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}")); 1307 EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}")); 1308 1309 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c)); 1310 EXPECT_EQ("A", a); 1311 EXPECT_EQ("B", b); 1312 EXPECT_EQ("C", c); 1313 1314 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c)); 1315 EXPECT_EQ("A", a); 1316 EXPECT_EQ("B", b); 1317 EXPECT_EQ("C", c); 1318 1319 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}")); 1320 1321 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c)); 1322 EXPECT_EQ("A", a); 1323 EXPECT_EQ("B", b); 1324 EXPECT_EQ("C", c); 1325 1326 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]")); 1327 1328 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c)); 1329 EXPECT_EQ("", a); 1330 EXPECT_EQ("", b); 1331 EXPECT_EQ("", c); 1332 } 1333 1334 // Bug reported by saito. 2009/02/17 1335 TEST(RE2, NullVsEmptyString) { 1336 RE2 re2(".*"); 1337 StringPiece v1(""); 1338 EXPECT_TRUE(RE2::FullMatch(v1, re2)); 1339 1340 StringPiece v2; 1341 EXPECT_TRUE(RE2::FullMatch(v2, re2)); 1342 } 1343 1344 // Issue 1816809 1345 TEST(RE2, Bug1816809) { 1346 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))"); 1347 StringPiece piece("llx-3;llx4"); 1348 string x; 1349 EXPECT_TRUE(RE2::Consume(&piece, re, &x)); 1350 } 1351 1352 // Issue 3061120 1353 TEST(RE2, Bug3061120) { 1354 RE2 re("(?i)\\W"); 1355 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked 1356 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin 1357 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s 1358 } 1359 1360 TEST(RE2, CapturingGroupNames) { 1361 // Opening parentheses annotated with group IDs: 1362 // 12 3 45 6 7 1363 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))"); 1364 EXPECT_TRUE(re.ok()); 1365 const map<int, string>& have = re.CapturingGroupNames(); 1366 map<int, string> want; 1367 want[3] = "G2"; 1368 want[6] = "G2"; 1369 want[7] = "G1"; 1370 EXPECT_EQ(want, have); 1371 } 1372 1373 TEST(RE2, RegexpToStringLossOfAnchor) { 1374 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at"); 1375 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at"); 1376 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$"); 1377 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)"); 1378 } 1379 1380 } // namespace re2 1381