1 // -*- coding: utf-8 -*- 2 // 3 // Copyright (c) 2005 - 2010, Google Inc. 4 // All rights reserved. 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 // 32 // Author: Sanjay Ghemawat 33 // 34 // TODO: Test extractions for PartialMatch/Consume 35 36 #ifdef HAVE_CONFIG_H 37 #include "config.h" 38 #endif 39 40 #include <stdio.h> 41 #include <string.h> /* for memset and strcmp */ 42 #include <cassert> 43 #include <vector> 44 #include "pcrecpp.h" 45 46 using pcrecpp::StringPiece; 47 using pcrecpp::RE; 48 using pcrecpp::RE_Options; 49 using pcrecpp::Hex; 50 using pcrecpp::Octal; 51 using pcrecpp::CRadix; 52 53 static bool VERBOSE_TEST = false; 54 55 // CHECK dies with a fatal error if condition is not true. It is *not* 56 // controlled by NDEBUG, so the check will be executed regardless of 57 // compilation mode. Therefore, it is safe to do things like: 58 // CHECK_EQ(fp->Write(x), 4) 59 #define CHECK(condition) do { \ 60 if (!(condition)) { \ 61 fprintf(stderr, "%s:%d: Check failed: %s\n", \ 62 __FILE__, __LINE__, #condition); \ 63 exit(1); \ 64 } \ 65 } while (0) 66 67 #define CHECK_EQ(a, b) CHECK(a == b) 68 69 static void Timing1(int num_iters) { 70 // Same pattern lots of times 71 RE pattern("ruby:\\d+"); 72 StringPiece p("ruby:1234"); 73 for (int j = num_iters; j > 0; j--) { 74 CHECK(pattern.FullMatch(p)); 75 } 76 } 77 78 static void Timing2(int num_iters) { 79 // Same pattern lots of times 80 RE pattern("ruby:(\\d+)"); 81 int i; 82 for (int j = num_iters; j > 0; j--) { 83 CHECK(pattern.FullMatch("ruby:1234", &i)); 84 CHECK_EQ(i, 1234); 85 } 86 } 87 88 static void Timing3(int num_iters) { 89 string text_string; 90 for (int j = num_iters; j > 0; j--) { 91 text_string += "this is another line\n"; 92 } 93 94 RE line_matcher(".*\n"); 95 string line; 96 StringPiece text(text_string); 97 int counter = 0; 98 while (line_matcher.Consume(&text)) { 99 counter++; 100 } 101 printf("Matched %d lines\n", counter); 102 } 103 104 #if 0 // uncomment this if you have a way of defining VirtualProcessSize() 105 106 static void LeakTest() { 107 // Check for memory leaks 108 unsigned long long initial_size = 0; 109 for (int i = 0; i < 100000; i++) { 110 if (i == 50000) { 111 initial_size = VirtualProcessSize(); 112 printf("Size after 50000: %llu\n", initial_size); 113 } 114 char buf[100]; // definitely big enough 115 sprintf(buf, "pat%09d", i); 116 RE newre(buf); 117 } 118 uint64 final_size = VirtualProcessSize(); 119 printf("Size after 100000: %llu\n", final_size); 120 const double growth = double(final_size - initial_size) / final_size; 121 printf("Growth: %0.2f%%", growth * 100); 122 CHECK(growth < 0.02); // Allow < 2% growth 123 } 124 125 #endif 126 127 static void RadixTests() { 128 printf("Testing hex\n"); 129 130 #define CHECK_HEX(type, value) \ 131 do { \ 132 type v; \ 133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ 134 CHECK_EQ(v, 0x ## value); \ 135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ 136 CHECK_EQ(v, 0x ## value); \ 137 } while(0) 138 139 CHECK_HEX(short, 2bad); 140 CHECK_HEX(unsigned short, 2badU); 141 CHECK_HEX(int, dead); 142 CHECK_HEX(unsigned int, deadU); 143 CHECK_HEX(long, 7eadbeefL); 144 CHECK_HEX(unsigned long, deadbeefUL); 145 #ifdef HAVE_LONG_LONG 146 CHECK_HEX(long long, 12345678deadbeefLL); 147 #endif 148 #ifdef HAVE_UNSIGNED_LONG_LONG 149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); 150 #endif 151 152 #undef CHECK_HEX 153 154 printf("Testing octal\n"); 155 156 #define CHECK_OCTAL(type, value) \ 157 do { \ 158 type v; \ 159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ 160 CHECK_EQ(v, 0 ## value); \ 161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ 162 CHECK_EQ(v, 0 ## value); \ 163 } while(0) 164 165 CHECK_OCTAL(short, 77777); 166 CHECK_OCTAL(unsigned short, 177777U); 167 CHECK_OCTAL(int, 17777777777); 168 CHECK_OCTAL(unsigned int, 37777777777U); 169 CHECK_OCTAL(long, 17777777777L); 170 CHECK_OCTAL(unsigned long, 37777777777UL); 171 #ifdef HAVE_LONG_LONG 172 CHECK_OCTAL(long long, 777777777777777777777LL); 173 #endif 174 #ifdef HAVE_UNSIGNED_LONG_LONG 175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); 176 #endif 177 178 #undef CHECK_OCTAL 179 180 printf("Testing decimal\n"); 181 182 #define CHECK_DECIMAL(type, value) \ 183 do { \ 184 type v; \ 185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ 186 CHECK_EQ(v, value); \ 187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ 188 CHECK_EQ(v, value); \ 189 } while(0) 190 191 CHECK_DECIMAL(short, -1); 192 CHECK_DECIMAL(unsigned short, 9999); 193 CHECK_DECIMAL(int, -1000); 194 CHECK_DECIMAL(unsigned int, 12345U); 195 CHECK_DECIMAL(long, -10000000L); 196 CHECK_DECIMAL(unsigned long, 3083324652U); 197 #ifdef HAVE_LONG_LONG 198 CHECK_DECIMAL(long long, -100000000000000LL); 199 #endif 200 #ifdef HAVE_UNSIGNED_LONG_LONG 201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); 202 #endif 203 204 #undef CHECK_DECIMAL 205 206 } 207 208 static void TestReplace() { 209 printf("Testing Replace\n"); 210 211 struct ReplaceTest { 212 const char *regexp; 213 const char *rewrite; 214 const char *original; 215 const char *single; 216 const char *global; 217 int global_count; // the expected return value from ReplaceAll 218 }; 219 static const ReplaceTest tests[] = { 220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", 221 "\\2\\1ay", 222 "the quick brown fox jumps over the lazy dogs.", 223 "ethay quick brown fox jumps over the lazy dogs.", 224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", 225 9 }, 226 { "\\w+", 227 "\\0-NOSPAM", 228 "paul.haahr (at) google.com", 229 "paul-NOSPAM.haahr (at) google.com", 230 "paul-NOSPAM.haahr-NOSPAM (at) google-NOSPAM.com-NOSPAM", 231 4 }, 232 { "^", 233 "(START)", 234 "foo", 235 "(START)foo", 236 "(START)foo", 237 1 }, 238 { "^", 239 "(START)", 240 "", 241 "(START)", 242 "(START)", 243 1 }, 244 { "$", 245 "(END)", 246 "", 247 "(END)", 248 "(END)", 249 1 }, 250 { "b", 251 "bb", 252 "ababababab", 253 "abbabababab", 254 "abbabbabbabbabb", 255 5 }, 256 { "b", 257 "bb", 258 "bbbbbb", 259 "bbbbbbb", 260 "bbbbbbbbbbbb", 261 6 }, 262 { "b+", 263 "bb", 264 "bbbbbb", 265 "bb", 266 "bb", 267 1 }, 268 { "b*", 269 "bb", 270 "bbbbbb", 271 "bb", 272 "bbbb", 273 2 }, 274 { "b*", 275 "bb", 276 "aaaaa", 277 "bbaaaaa", 278 "bbabbabbabbabbabb", 279 6 }, 280 { "b*", 281 "bb", 282 "aa\naa\n", 283 "bbaa\naa\n", 284 "bbabbabb\nbbabbabb\nbb", 285 7 }, 286 { "b*", 287 "bb", 288 "aa\raa\r", 289 "bbaa\raa\r", 290 "bbabbabb\rbbabbabb\rbb", 291 7 }, 292 { "b*", 293 "bb", 294 "aa\r\naa\r\n", 295 "bbaa\r\naa\r\n", 296 "bbabbabb\r\nbbabbabb\r\nbb", 297 7 }, 298 // Check empty-string matching (it's tricky!) 299 { "aa|b*", 300 "@", 301 "aa", 302 "@", 303 "@@", 304 2 }, 305 { "b*|aa", 306 "@", 307 "aa", 308 "@aa", 309 "@@@", 310 3 }, 311 #ifdef SUPPORT_UTF8 312 { "b*", 313 "bb", 314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", 316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", 317 5 }, 318 { "b*", 319 "bb", 320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", 322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" 323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), 324 9 }, 325 #endif 326 { "", NULL, NULL, NULL, NULL, 0 } 327 }; 328 329 #ifdef SUPPORT_UTF8 330 const bool support_utf8 = true; 331 #else 332 const bool support_utf8 = false; 333 #endif 334 335 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 336 RE re(t->regexp, RE_Options().set_newline_mode(PCRE2_NEWLINE_CRLF) 337 .set_utf(support_utf8)); 338 assert(re.error().empty()); 339 string one(t->original); 340 CHECK(re.Replace(t->rewrite, &one)); 341 CHECK_EQ(one, t->single); 342 string all(t->original); 343 const int replace_count = re.GlobalReplace(t->rewrite, &all); 344 CHECK_EQ(all, t->global); 345 CHECK_EQ(replace_count, t->global_count); 346 } 347 348 // One final test: test \r\n replacement when we're not in CRLF mode 349 { 350 RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_CR) 351 .set_utf(support_utf8)); 352 assert(re.error().empty()); 353 string all("aa\r\naa\r\n"); 354 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 355 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 356 } 357 { 358 RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_LF) 359 .set_utf(support_utf8)); 360 assert(re.error().empty()); 361 string all("aa\r\naa\r\n"); 362 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 363 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 364 } 365 // TODO: test what happens when no PCRE_NEWLINE_* flag is set. 366 // Alas, the answer depends on how pcre was compiled. 367 } 368 369 static void TestExtract() { 370 printf("Testing Extract\n"); 371 372 string s; 373 374 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris (at) kremvax.ru", &s)); 375 CHECK_EQ(s, "kremvax!boris"); 376 377 // check the RE interface as well 378 CHECK(RE(".*").Extract("'\\0'", "foo", &s)); 379 CHECK_EQ(s, "'foo'"); 380 CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); 381 CHECK_EQ(s, "'foo'"); 382 } 383 384 static void TestConsume() { 385 printf("Testing Consume\n"); 386 387 string word; 388 389 string s(" aaa b!@#$@#$cccc"); 390 StringPiece input(s); 391 392 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace 393 CHECK(r.Consume(&input, &word)); 394 CHECK_EQ(word, "aaa"); 395 CHECK(r.Consume(&input, &word)); 396 CHECK_EQ(word, "b"); 397 CHECK(! r.Consume(&input, &word)); 398 } 399 400 static void TestFindAndConsume() { 401 printf("Testing FindAndConsume\n"); 402 403 string word; 404 405 string s(" aaa b!@#$@#$cccc"); 406 StringPiece input(s); 407 408 RE r("(\\w+)"); // matches a word 409 CHECK(r.FindAndConsume(&input, &word)); 410 CHECK_EQ(word, "aaa"); 411 CHECK(r.FindAndConsume(&input, &word)); 412 CHECK_EQ(word, "b"); 413 CHECK(r.FindAndConsume(&input, &word)); 414 CHECK_EQ(word, "cccc"); 415 CHECK(! r.FindAndConsume(&input, &word)); 416 } 417 418 static void TestMatchNumberPeculiarity() { 419 printf("Testing match-number peculiarity\n"); 420 421 string word1; 422 string word2; 423 string word3; 424 425 RE r("(foo)|(bar)|(baz)"); 426 CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); 427 CHECK_EQ(word1, "foo"); 428 CHECK_EQ(word2, ""); 429 CHECK_EQ(word3, ""); 430 CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); 431 CHECK_EQ(word1, ""); 432 CHECK_EQ(word2, "bar"); 433 CHECK_EQ(word3, ""); 434 CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); 435 CHECK_EQ(word1, ""); 436 CHECK_EQ(word2, ""); 437 CHECK_EQ(word3, "baz"); 438 CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); 439 440 string a; 441 CHECK(RE("(foo)|hello").FullMatch("hello", &a)); 442 CHECK_EQ(a, ""); 443 } 444 445 static void TestRecursion() { 446 printf("Testing recursion\n"); 447 448 // Get one string that passes (sometimes), one that never does. 449 string text_good("abcdefghijk"); 450 string text_bad("acdefghijkl"); 451 452 // According to pcretest, matching text_good against (\w+)*b 453 // requires match_limit of at least 8192, and match_recursion_limit 454 // of at least 37. 455 456 RE_Options options_ml; 457 options_ml.set_match_limit(8192); 458 RE re("(\\w+)*b", options_ml); 459 CHECK(re.PartialMatch(text_good) == true); 460 CHECK(re.PartialMatch(text_bad) == false); 461 CHECK(re.FullMatch(text_good) == false); 462 CHECK(re.FullMatch(text_bad) == false); 463 464 options_ml.set_match_limit(1024); 465 RE re2("(\\w+)*b", options_ml); 466 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit 467 CHECK(re2.PartialMatch(text_bad) == false); 468 CHECK(re2.FullMatch(text_good) == false); 469 CHECK(re2.FullMatch(text_bad) == false); 470 471 RE_Options options_mlr; 472 options_mlr.set_match_limit_recursion(50); 473 RE re3("(\\w+)*b", options_mlr); 474 CHECK(re3.PartialMatch(text_good) == true); 475 CHECK(re3.PartialMatch(text_bad) == false); 476 CHECK(re3.FullMatch(text_good) == false); 477 CHECK(re3.FullMatch(text_bad) == false); 478 479 options_mlr.set_match_limit_recursion(10); 480 RE re4("(\\w+)*b", options_mlr); 481 CHECK(re4.PartialMatch(text_good) == false); 482 CHECK(re4.PartialMatch(text_bad) == false); 483 CHECK(re4.FullMatch(text_good) == false); 484 CHECK(re4.FullMatch(text_bad) == false); 485 } 486 487 // A meta-quoted string, interpreted as a pattern, should always match 488 // the original unquoted string. 489 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { 490 string quoted = RE::QuoteMeta(unquoted); 491 RE re(quoted, options); 492 CHECK(re.FullMatch(unquoted)); 493 } 494 495 // A string containing meaningful regexp characters, which is then meta- 496 // quoted, should not generally match a string the unquoted string does. 497 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 498 RE_Options options = RE_Options()) { 499 string quoted = RE::QuoteMeta(unquoted); 500 RE re(quoted, options); 501 CHECK(!re.FullMatch(should_not_match)); 502 } 503 504 // Tests that quoted meta characters match their original strings, 505 // and that a few things that shouldn't match indeed do not. 506 static void TestQuotaMetaSimple() { 507 TestQuoteMeta("foo"); 508 TestQuoteMeta("foo.bar"); 509 TestQuoteMeta("foo\\.bar"); 510 TestQuoteMeta("[1-9]"); 511 TestQuoteMeta("1.5-2.0?"); 512 TestQuoteMeta("\\d"); 513 TestQuoteMeta("Who doesn't like ice cream?"); 514 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); 515 TestQuoteMeta("((?!)xxx).*yyy"); 516 TestQuoteMeta("(["); 517 TestQuoteMeta(string("foo\0bar", 7)); 518 } 519 520 static void TestQuoteMetaSimpleNegative() { 521 NegativeTestQuoteMeta("foo", "bar"); 522 NegativeTestQuoteMeta("...", "bar"); 523 NegativeTestQuoteMeta("\\.", "."); 524 NegativeTestQuoteMeta("\\.", ".."); 525 NegativeTestQuoteMeta("(a)", "a"); 526 NegativeTestQuoteMeta("(a|b)", "a"); 527 NegativeTestQuoteMeta("(a|b)", "(a)"); 528 NegativeTestQuoteMeta("(a|b)", "a|b"); 529 NegativeTestQuoteMeta("[0-9]", "0"); 530 NegativeTestQuoteMeta("[0-9]", "0-9"); 531 NegativeTestQuoteMeta("[0-9]", "[9]"); 532 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); 533 } 534 535 static void TestQuoteMetaLatin1() { 536 TestQuoteMeta("3\xb2 = 9"); 537 } 538 539 static void TestQuoteMetaUtf8() { 540 #ifdef SUPPORT_UTF8 541 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); 542 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 543 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) 544 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character 545 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) 546 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) 547 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work 548 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) 549 "27\\\xc2\\\xb0", 550 pcrecpp::UTF8()); 551 #endif 552 } 553 554 static void TestQuoteMetaAll() { 555 printf("Testing QuoteMeta\n"); 556 TestQuotaMetaSimple(); 557 TestQuoteMetaSimpleNegative(); 558 TestQuoteMetaLatin1(); 559 TestQuoteMetaUtf8(); 560 } 561 562 // 563 // Options tests contributed by 564 // Giuseppe Maxia, CTO, Stardata s.r.l. 565 // July 2005 566 // 567 static void GetOneOptionResult( 568 const char *option_name, 569 const char *regex, 570 const char *str, 571 RE_Options options, 572 bool full, 573 string expected) { 574 575 printf("Testing Option <%s>\n", option_name); 576 if(VERBOSE_TEST) 577 printf("/%s/ finds \"%s\" within \"%s\" \n", 578 regex, 579 expected.c_str(), 580 str); 581 string captured(""); 582 if (full) 583 RE(regex,options).FullMatch(str, &captured); 584 else 585 RE(regex,options).PartialMatch(str, &captured); 586 CHECK_EQ(captured, expected); 587 } 588 589 static void TestOneOption( 590 const char *option_name, 591 const char *regex, 592 const char *str, 593 RE_Options options, 594 bool full, 595 bool assertive = true) { 596 597 printf("Testing Option <%s>\n", option_name); 598 if (VERBOSE_TEST) 599 printf("'%s' %s /%s/ \n", 600 str, 601 (assertive? "matches" : "doesn't match"), 602 regex); 603 if (assertive) { 604 if (full) 605 CHECK(RE(regex,options).FullMatch(str)); 606 else 607 CHECK(RE(regex,options).PartialMatch(str)); 608 } else { 609 if (full) 610 CHECK(!RE(regex,options).FullMatch(str)); 611 else 612 CHECK(!RE(regex,options).PartialMatch(str)); 613 } 614 } 615 616 static void Test_CASELESS() { 617 RE_Options options; 618 RE_Options options2; 619 620 options.set_caseless(true); 621 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); 622 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); 623 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); 624 625 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); 626 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); 627 options.set_caseless(false); 628 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); 629 } 630 631 static void Test_MULTILINE() { 632 RE_Options options; 633 RE_Options options2; 634 const char *str = "HELLO\n" "cruel\n" "world\n"; 635 636 options.set_multiline(true); 637 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); 638 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); 639 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); 640 options.set_multiline(false); 641 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); 642 } 643 644 static void Test_DOTALL() { 645 RE_Options options; 646 RE_Options options2; 647 const char *str = "HELLO\n" "cruel\n" "world"; 648 649 options.set_dotall(true); 650 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); 651 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); 652 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); 653 options.set_dotall(false); 654 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); 655 } 656 657 static void Test_DOLLAR_ENDONLY() { 658 RE_Options options; 659 RE_Options options2; 660 const char *str = "HELLO world\n"; 661 662 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); 663 options.set_dollar_endonly(true); 664 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); 665 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); 666 } 667 668 static void Test_EXTENDED() { 669 RE_Options options; 670 RE_Options options2; 671 const char *str = "HELLO world"; 672 673 options.set_extended(true); 674 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); 675 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); 676 TestOneOption("EXTENDED (class)", 677 "^ HE L{2} O " 678 "\\s+ " 679 "\\w+ $ ", 680 str, 681 options, 682 false); 683 684 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); 685 TestOneOption("EXTENDED (function)", 686 "^ HE L{2} O " 687 "\\s+ " 688 "\\w+ $ ", 689 str, 690 pcrecpp::EXTENDED(), 691 false); 692 693 options.set_extended(false); 694 TestOneOption("no EXTENDED", "HELLO world", str, options, false); 695 } 696 697 static void Test_NO_AUTO_CAPTURE() { 698 RE_Options options; 699 const char *str = "HELLO world"; 700 string captured; 701 702 printf("Testing Option <no NO_AUTO_CAPTURE>\n"); 703 if (VERBOSE_TEST) 704 printf("parentheses capture text\n"); 705 RE re("(world|universe)$", options); 706 CHECK(re.Extract("\\1", str , &captured)); 707 CHECK_EQ(captured, "world"); 708 options.set_no_auto_capture(true); 709 printf("testing Option <NO_AUTO_CAPTURE>\n"); 710 if (VERBOSE_TEST) 711 printf("parentheses do not capture text\n"); 712 re.Extract("\\1",str, &captured ); 713 CHECK_EQ(captured, "world"); 714 } 715 716 static void Test_UNGREEDY() { 717 RE_Options options; 718 const char *str = "HELLO, 'this' is the 'world'"; 719 720 options.set_ungreedy(true); 721 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); 722 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); 723 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); 724 725 options.set_ungreedy(false); 726 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); 727 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); 728 } 729 730 static void Test_all_options() { 731 const char *str = "HELLO\n" "cruel\n" "world"; 732 RE_Options options; 733 options.set_all_options(PCRE2_CASELESS | PCRE2_DOTALL); 734 735 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); 736 options.set_all_options(0); 737 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); 738 options.set_all_options(PCRE2_MULTILINE | PCRE2_EXTENDED); 739 740 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); 741 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", 742 " ^ c r u e l $ ", 743 str, 744 RE_Options(PCRE2_MULTILINE | PCRE2_EXTENDED), 745 false); 746 747 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", 748 " ^ c r u e l $ ", 749 str, 750 RE_Options() 751 .set_multiline(true) 752 .set_extended(true), 753 false); 754 755 options.set_all_options(0); 756 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); 757 758 } 759 760 static void TestOptions() { 761 printf("Testing Options\n"); 762 Test_CASELESS(); 763 Test_MULTILINE(); 764 Test_DOTALL(); 765 Test_DOLLAR_ENDONLY(); 766 Test_EXTENDED(); 767 Test_NO_AUTO_CAPTURE(); 768 Test_UNGREEDY(); 769 Test_all_options(); 770 } 771 772 static void TestConstructors() { 773 printf("Testing constructors\n"); 774 775 RE_Options options; 776 options.set_dotall(true); 777 const char *str = "HELLO\n" "cruel\n" "world"; 778 779 RE orig("HELLO.*world", options); 780 CHECK(orig.FullMatch(str)); 781 782 RE copy1(orig); 783 CHECK(copy1.FullMatch(str)); 784 785 RE copy2("not a match"); 786 CHECK(!copy2.FullMatch(str)); 787 copy2 = copy1; 788 CHECK(copy2.FullMatch(str)); 789 copy2 = orig; 790 CHECK(copy2.FullMatch(str)); 791 792 // Make sure when we assign to ourselves, nothing bad happens 793 orig = orig; 794 copy1 = copy1; 795 copy2 = copy2; 796 CHECK(orig.FullMatch(str)); 797 CHECK(copy1.FullMatch(str)); 798 CHECK(copy2.FullMatch(str)); 799 } 800 801 int main(int argc, char** argv) { 802 // Treat any flag as --help 803 if (argc > 1 && argv[1][0] == '-') { 804 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" 805 " If 'timingX ###' is specified, run the given timing test\n" 806 " with the given number of iterations, rather than running\n" 807 " the default corectness test.\n", argv[0]); 808 return 0; 809 } 810 811 if (argc > 1) { 812 if ( argc == 2 || atoi(argv[2]) == 0) { 813 printf("timing mode needs a num-iters argument\n"); 814 return 1; 815 } 816 if (!strcmp(argv[1], "timing1")) 817 Timing1(atoi(argv[2])); 818 else if (!strcmp(argv[1], "timing2")) 819 Timing2(atoi(argv[2])); 820 else if (!strcmp(argv[1], "timing3")) 821 Timing3(atoi(argv[2])); 822 else 823 printf("Unknown argument '%s'\n", argv[1]); 824 return 0; 825 } 826 827 printf("PCRE C++ wrapper tests\n"); 828 printf("Testing FullMatch\n"); 829 830 int i; 831 string s; 832 833 /***** FullMatch with no args *****/ 834 835 CHECK(RE("h.*o").FullMatch("hello")); 836 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front 837 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end 838 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op 839 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op 840 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops 841 842 /***** FullMatch with args *****/ 843 844 // Zero-arg 845 CHECK(RE("\\d+").FullMatch("1001")); 846 847 // Single-arg 848 CHECK(RE("(\\d+)").FullMatch("1001", &i)); 849 CHECK_EQ(i, 1001); 850 CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); 851 CHECK_EQ(i, -123); 852 CHECK(!RE("()\\d+").FullMatch("10", &i)); 853 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", 854 &i)); 855 856 // Digits surrounding integer-arg 857 CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); 858 CHECK_EQ(i, 23); 859 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); 860 CHECK_EQ(i, 1); 861 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); 862 CHECK_EQ(i, -1); 863 CHECK(RE("(\\d)").PartialMatch("1234", &i)); 864 CHECK_EQ(i, 1); 865 CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); 866 CHECK_EQ(i, -1); 867 868 // String-arg 869 CHECK(RE("h(.*)o").FullMatch("hello", &s)); 870 CHECK_EQ(s, string("ell")); 871 872 // StringPiece-arg 873 StringPiece sp; 874 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); 875 CHECK_EQ(sp.size(), 4); 876 CHECK(memcmp(sp.data(), "ruby", 4) == 0); 877 CHECK_EQ(i, 1234); 878 879 // Multi-arg 880 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); 881 CHECK_EQ(s, string("ruby")); 882 CHECK_EQ(i, 1234); 883 884 // Ignore non-void* NULL arg 885 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); 886 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); 887 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); 888 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); 889 #ifdef HAVE_LONG_LONG 890 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); 891 #endif 892 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); 893 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); 894 895 // Fail on non-void* NULL arg if the match doesn't parse for the given type. 896 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); 897 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); 898 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); 899 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); 900 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); 901 902 // Ignored arg 903 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); 904 CHECK_EQ(s, string("ruby")); 905 CHECK_EQ(i, 1234); 906 907 // Type tests 908 { 909 char c; 910 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 911 CHECK_EQ(c, 'H'); 912 } 913 { 914 unsigned char c; 915 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 916 CHECK_EQ(c, static_cast<unsigned char>('H')); 917 } 918 { 919 short v; 920 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 921 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 922 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 923 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); 924 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); 925 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); 926 } 927 { 928 unsigned short v; 929 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 930 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 931 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); 932 CHECK(!RE("(\\d+)").FullMatch("65536", &v)); 933 } 934 { 935 int v; 936 static const int max_value = 0x7fffffff; 937 static const int min_value = -max_value - 1; 938 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 939 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 940 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); 941 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); 942 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); 943 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); 944 } 945 { 946 unsigned int v; 947 static const unsigned int max_value = 0xfffffffful; 948 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 949 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); 950 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); 951 } 952 #ifdef HAVE_LONG_LONG 953 # if defined(__MINGW__) || defined(__MINGW32__) 954 # define LLD "%I64d" 955 # define LLU "%I64u" 956 # else 957 # define LLD "%lld" 958 # define LLU "%llu" 959 # endif 960 { 961 long long v; 962 static const long long max_value = 0x7fffffffffffffffLL; 963 static const long long min_value = -max_value - 1; 964 char buf[32]; // definitely big enough for a long long 965 966 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 967 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); 968 969 sprintf(buf, LLD, max_value); 970 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 971 972 sprintf(buf, LLD, min_value); 973 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); 974 975 sprintf(buf, LLD, max_value); 976 assert(buf[strlen(buf)-1] != '9'); 977 buf[strlen(buf)-1]++; 978 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 979 980 sprintf(buf, LLD, min_value); 981 assert(buf[strlen(buf)-1] != '9'); 982 buf[strlen(buf)-1]++; 983 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 984 } 985 #endif 986 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG 987 { 988 unsigned long long v; 989 long long v2; 990 static const unsigned long long max_value = 0xffffffffffffffffULL; 991 char buf[32]; // definitely big enough for a unsigned long long 992 993 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); 994 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); 995 996 sprintf(buf, LLU, max_value); 997 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 998 999 assert(buf[strlen(buf)-1] != '9'); 1000 buf[strlen(buf)-1]++; 1001 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 1002 } 1003 #endif 1004 { 1005 float v; 1006 CHECK(RE("(.*)").FullMatch("100", &v)); 1007 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1008 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1009 } 1010 { 1011 double v; 1012 CHECK(RE("(.*)").FullMatch("100", &v)); 1013 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1014 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1015 } 1016 1017 // Check that matching is fully anchored 1018 CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); 1019 CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); 1020 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); 1021 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); 1022 1023 // Braces 1024 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); 1025 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); 1026 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); 1027 1028 // Complicated RE 1029 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); 1030 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); 1031 CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); 1032 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); 1033 1034 // Check full-match handling (needs '$' tacked on internally) 1035 CHECK(RE("fo|foo").FullMatch("fo")); 1036 CHECK(RE("fo|foo").FullMatch("foo")); 1037 CHECK(RE("fo|foo$").FullMatch("fo")); 1038 CHECK(RE("fo|foo$").FullMatch("foo")); 1039 CHECK(RE("foo$").FullMatch("foo")); 1040 CHECK(!RE("foo\\$").FullMatch("foo$bar")); 1041 CHECK(!RE("fo|bar").FullMatch("fox")); 1042 1043 // Uncomment the following if we change the handling of '$' to 1044 // prevent it from matching a trailing newline 1045 if (false) { 1046 // Check that we don't get bitten by pcre's special handling of a 1047 // '\n' at the end of the string matching '$' 1048 CHECK(!RE("foo$").PartialMatch("foo\n")); 1049 } 1050 1051 // Number of args 1052 int a[16]; 1053 CHECK(RE("").FullMatch("")); 1054 1055 memset(a, 0, sizeof(0)); 1056 CHECK(RE("(\\d){1}").FullMatch("1", 1057 &a[0])); 1058 CHECK_EQ(a[0], 1); 1059 1060 memset(a, 0, sizeof(0)); 1061 CHECK(RE("(\\d)(\\d)").FullMatch("12", 1062 &a[0], &a[1])); 1063 CHECK_EQ(a[0], 1); 1064 CHECK_EQ(a[1], 2); 1065 1066 memset(a, 0, sizeof(0)); 1067 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", 1068 &a[0], &a[1], &a[2])); 1069 CHECK_EQ(a[0], 1); 1070 CHECK_EQ(a[1], 2); 1071 CHECK_EQ(a[2], 3); 1072 1073 memset(a, 0, sizeof(0)); 1074 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", 1075 &a[0], &a[1], &a[2], &a[3])); 1076 CHECK_EQ(a[0], 1); 1077 CHECK_EQ(a[1], 2); 1078 CHECK_EQ(a[2], 3); 1079 CHECK_EQ(a[3], 4); 1080 1081 memset(a, 0, sizeof(0)); 1082 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", 1083 &a[0], &a[1], &a[2], 1084 &a[3], &a[4])); 1085 CHECK_EQ(a[0], 1); 1086 CHECK_EQ(a[1], 2); 1087 CHECK_EQ(a[2], 3); 1088 CHECK_EQ(a[3], 4); 1089 CHECK_EQ(a[4], 5); 1090 1091 memset(a, 0, sizeof(0)); 1092 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", 1093 &a[0], &a[1], &a[2], 1094 &a[3], &a[4], &a[5])); 1095 CHECK_EQ(a[0], 1); 1096 CHECK_EQ(a[1], 2); 1097 CHECK_EQ(a[2], 3); 1098 CHECK_EQ(a[3], 4); 1099 CHECK_EQ(a[4], 5); 1100 CHECK_EQ(a[5], 6); 1101 1102 memset(a, 0, sizeof(0)); 1103 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", 1104 &a[0], &a[1], &a[2], &a[3], 1105 &a[4], &a[5], &a[6])); 1106 CHECK_EQ(a[0], 1); 1107 CHECK_EQ(a[1], 2); 1108 CHECK_EQ(a[2], 3); 1109 CHECK_EQ(a[3], 4); 1110 CHECK_EQ(a[4], 5); 1111 CHECK_EQ(a[5], 6); 1112 CHECK_EQ(a[6], 7); 1113 1114 memset(a, 0, sizeof(0)); 1115 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" 1116 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( 1117 "1234567890123456", 1118 &a[0], &a[1], &a[2], &a[3], 1119 &a[4], &a[5], &a[6], &a[7], 1120 &a[8], &a[9], &a[10], &a[11], 1121 &a[12], &a[13], &a[14], &a[15])); 1122 CHECK_EQ(a[0], 1); 1123 CHECK_EQ(a[1], 2); 1124 CHECK_EQ(a[2], 3); 1125 CHECK_EQ(a[3], 4); 1126 CHECK_EQ(a[4], 5); 1127 CHECK_EQ(a[5], 6); 1128 CHECK_EQ(a[6], 7); 1129 CHECK_EQ(a[7], 8); 1130 CHECK_EQ(a[8], 9); 1131 CHECK_EQ(a[9], 0); 1132 CHECK_EQ(a[10], 1); 1133 CHECK_EQ(a[11], 2); 1134 CHECK_EQ(a[12], 3); 1135 CHECK_EQ(a[13], 4); 1136 CHECK_EQ(a[14], 5); 1137 CHECK_EQ(a[15], 6); 1138 1139 /***** PartialMatch *****/ 1140 1141 printf("Testing PartialMatch\n"); 1142 1143 CHECK(RE("h.*o").PartialMatch("hello")); 1144 CHECK(RE("h.*o").PartialMatch("othello")); 1145 CHECK(RE("h.*o").PartialMatch("hello!")); 1146 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); 1147 1148 /***** other tests *****/ 1149 1150 RadixTests(); 1151 TestReplace(); 1152 TestExtract(); 1153 TestConsume(); 1154 TestFindAndConsume(); 1155 TestQuoteMetaAll(); 1156 TestMatchNumberPeculiarity(); 1157 1158 // Check the pattern() accessor 1159 { 1160 const string kPattern = "http://([^/]+)/.*"; 1161 const RE re(kPattern); 1162 CHECK_EQ(kPattern, re.pattern()); 1163 } 1164 1165 // Check RE error field. 1166 { 1167 RE re("foo"); 1168 CHECK(re.error().empty()); // Must have no error 1169 } 1170 1171 #ifdef SUPPORT_UTF8 1172 // Check UTF-8 handling 1173 { 1174 printf("Testing UTF-8 handling\n"); 1175 1176 // Three Japanese characters (nihongo) 1177 const unsigned char utf8_string[] = { 1178 0xe6, 0x97, 0xa5, // 65e5 1179 0xe6, 0x9c, 0xac, // 627c 1180 0xe8, 0xaa, 0x9e, // 8a9e 1181 0 1182 }; 1183 const unsigned char utf8_pattern[] = { 1184 '.', 1185 0xe6, 0x9c, 0xac, // 627c 1186 '.', 1187 0 1188 }; 1189 1190 // Both should match in either mode, bytes or UTF-8 1191 RE re_test1("........."); 1192 CHECK(re_test1.FullMatch(utf8_string)); 1193 RE re_test2("...", pcrecpp::UTF8()); 1194 CHECK(re_test2.FullMatch(utf8_string)); 1195 1196 // Check that '.' matches one byte or UTF-8 character 1197 // according to the mode. 1198 string ss; 1199 RE re_test3("(.)"); 1200 CHECK(re_test3.PartialMatch(utf8_string, &ss)); 1201 CHECK_EQ(ss, string("\xe6")); 1202 RE re_test4("(.)", pcrecpp::UTF8()); 1203 CHECK(re_test4.PartialMatch(utf8_string, &ss)); 1204 CHECK_EQ(ss, string("\xe6\x97\xa5")); 1205 1206 // Check that string matches itself in either mode 1207 RE re_test5(utf8_string); 1208 CHECK(re_test5.FullMatch(utf8_string)); 1209 RE re_test6(utf8_string, pcrecpp::UTF8()); 1210 CHECK(re_test6.FullMatch(utf8_string)); 1211 1212 // Check that pattern matches string only in UTF8 mode 1213 RE re_test7(utf8_pattern); 1214 CHECK(!re_test7.FullMatch(utf8_string)); 1215 RE re_test8(utf8_pattern, pcrecpp::UTF8()); 1216 CHECK(re_test8.FullMatch(utf8_string)); 1217 } 1218 1219 // Check that ungreedy, UTF8 regular expressions don't match when they 1220 // oughtn't -- see bug 82246. 1221 { 1222 // This code always worked. 1223 const char* pattern = "\\w+X"; 1224 const string target = "a aX"; 1225 RE match_sentence(pattern); 1226 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1227 1228 CHECK(!match_sentence.FullMatch(target)); 1229 CHECK(!match_sentence_re.FullMatch(target)); 1230 } 1231 1232 { 1233 const char* pattern = "(?U)\\w+X"; 1234 const string target = "a aX"; 1235 RE match_sentence(pattern); 1236 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1237 1238 CHECK(!match_sentence.FullMatch(target)); 1239 CHECK(!match_sentence_re.FullMatch(target)); 1240 } 1241 #endif /* def SUPPORT_UTF8 */ 1242 1243 printf("Testing error reporting\n"); 1244 1245 { RE re("a\\1"); CHECK(!re.error().empty()); } 1246 { 1247 RE re("a[x"); 1248 CHECK(!re.error().empty()); 1249 } 1250 { 1251 RE re("a[z-a]"); 1252 CHECK(!re.error().empty()); 1253 } 1254 { 1255 RE re("a[[:foobar:]]"); 1256 CHECK(!re.error().empty()); 1257 } 1258 { 1259 RE re("a(b"); 1260 CHECK(!re.error().empty()); 1261 } 1262 { 1263 RE re("a\\"); 1264 CHECK(!re.error().empty()); 1265 } 1266 1267 // Test that recursion is stopped 1268 TestRecursion(); 1269 1270 // Test Options 1271 if (getenv("VERBOSE_TEST") != NULL) 1272 VERBOSE_TEST = true; 1273 TestOptions(); 1274 1275 // Test the constructors 1276 TestConstructors(); 1277 1278 // Done 1279 printf("OK\n"); 1280 1281 return 0; 1282 } 1283