1 // -*- coding: utf-8 -*- 2 // 3 // Copyright (c) 2005 - 2010, Google Inc. 4 // All rights reserved. 5 // 6 // Redistribution and use in source and binary forms, with or without 7 // modification, are permitted provided that the following conditions are 8 // met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above 13 // copyright notice, this list of conditions and the following disclaimer 14 // in the documentation and/or other materials provided with the 15 // distribution. 16 // * Neither the name of Google Inc. nor the names of its 17 // contributors may be used to endorse or promote products derived from 18 // this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 // 32 // Author: Sanjay Ghemawat 33 // 34 // TODO: Test extractions for PartialMatch/Consume 35 36 #ifdef HAVE_CONFIG_H 37 #include "config.h" 38 #endif 39 40 #include <stdio.h> 41 #include <cassert> 42 #include <vector> 43 #include "pcrecpp.h" 44 45 using pcrecpp::StringPiece; 46 using pcrecpp::RE; 47 using pcrecpp::RE_Options; 48 using pcrecpp::Hex; 49 using pcrecpp::Octal; 50 using pcrecpp::CRadix; 51 52 static bool VERBOSE_TEST = false; 53 54 // CHECK dies with a fatal error if condition is not true. It is *not* 55 // controlled by NDEBUG, so the check will be executed regardless of 56 // compilation mode. Therefore, it is safe to do things like: 57 // CHECK_EQ(fp->Write(x), 4) 58 #define CHECK(condition) do { \ 59 if (!(condition)) { \ 60 fprintf(stderr, "%s:%d: Check failed: %s\n", \ 61 __FILE__, __LINE__, #condition); \ 62 exit(1); \ 63 } \ 64 } while (0) 65 66 #define CHECK_EQ(a, b) CHECK(a == b) 67 68 static void Timing1(int num_iters) { 69 // Same pattern lots of times 70 RE pattern("ruby:\\d+"); 71 StringPiece p("ruby:1234"); 72 for (int j = num_iters; j > 0; j--) { 73 CHECK(pattern.FullMatch(p)); 74 } 75 } 76 77 static void Timing2(int num_iters) { 78 // Same pattern lots of times 79 RE pattern("ruby:(\\d+)"); 80 int i; 81 for (int j = num_iters; j > 0; j--) { 82 CHECK(pattern.FullMatch("ruby:1234", &i)); 83 CHECK_EQ(i, 1234); 84 } 85 } 86 87 static void Timing3(int num_iters) { 88 string text_string; 89 for (int j = num_iters; j > 0; j--) { 90 text_string += "this is another line\n"; 91 } 92 93 RE line_matcher(".*\n"); 94 string line; 95 StringPiece text(text_string); 96 int counter = 0; 97 while (line_matcher.Consume(&text)) { 98 counter++; 99 } 100 printf("Matched %d lines\n", counter); 101 } 102 103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize() 104 105 static void LeakTest() { 106 // Check for memory leaks 107 unsigned long long initial_size = 0; 108 for (int i = 0; i < 100000; i++) { 109 if (i == 50000) { 110 initial_size = VirtualProcessSize(); 111 printf("Size after 50000: %llu\n", initial_size); 112 } 113 char buf[100]; // definitely big enough 114 sprintf(buf, "pat%09d", i); 115 RE newre(buf); 116 } 117 uint64 final_size = VirtualProcessSize(); 118 printf("Size after 100000: %llu\n", final_size); 119 const double growth = double(final_size - initial_size) / final_size; 120 printf("Growth: %0.2f%%", growth * 100); 121 CHECK(growth < 0.02); // Allow < 2% growth 122 } 123 124 #endif 125 126 static void RadixTests() { 127 printf("Testing hex\n"); 128 129 #define CHECK_HEX(type, value) \ 130 do { \ 131 type v; \ 132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ 133 CHECK_EQ(v, 0x ## value); \ 134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ 135 CHECK_EQ(v, 0x ## value); \ 136 } while(0) 137 138 CHECK_HEX(short, 2bad); 139 CHECK_HEX(unsigned short, 2badU); 140 CHECK_HEX(int, dead); 141 CHECK_HEX(unsigned int, deadU); 142 CHECK_HEX(long, 7eadbeefL); 143 CHECK_HEX(unsigned long, deadbeefUL); 144 #ifdef HAVE_LONG_LONG 145 CHECK_HEX(long long, 12345678deadbeefLL); 146 #endif 147 #ifdef HAVE_UNSIGNED_LONG_LONG 148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL); 149 #endif 150 151 #undef CHECK_HEX 152 153 printf("Testing octal\n"); 154 155 #define CHECK_OCTAL(type, value) \ 156 do { \ 157 type v; \ 158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ 159 CHECK_EQ(v, 0 ## value); \ 160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ 161 CHECK_EQ(v, 0 ## value); \ 162 } while(0) 163 164 CHECK_OCTAL(short, 77777); 165 CHECK_OCTAL(unsigned short, 177777U); 166 CHECK_OCTAL(int, 17777777777); 167 CHECK_OCTAL(unsigned int, 37777777777U); 168 CHECK_OCTAL(long, 17777777777L); 169 CHECK_OCTAL(unsigned long, 37777777777UL); 170 #ifdef HAVE_LONG_LONG 171 CHECK_OCTAL(long long, 777777777777777777777LL); 172 #endif 173 #ifdef HAVE_UNSIGNED_LONG_LONG 174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); 175 #endif 176 177 #undef CHECK_OCTAL 178 179 printf("Testing decimal\n"); 180 181 #define CHECK_DECIMAL(type, value) \ 182 do { \ 183 type v; \ 184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ 185 CHECK_EQ(v, value); \ 186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ 187 CHECK_EQ(v, value); \ 188 } while(0) 189 190 CHECK_DECIMAL(short, -1); 191 CHECK_DECIMAL(unsigned short, 9999); 192 CHECK_DECIMAL(int, -1000); 193 CHECK_DECIMAL(unsigned int, 12345U); 194 CHECK_DECIMAL(long, -10000000L); 195 CHECK_DECIMAL(unsigned long, 3083324652U); 196 #ifdef HAVE_LONG_LONG 197 CHECK_DECIMAL(long long, -100000000000000LL); 198 #endif 199 #ifdef HAVE_UNSIGNED_LONG_LONG 200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); 201 #endif 202 203 #undef CHECK_DECIMAL 204 205 } 206 207 static void TestReplace() { 208 printf("Testing Replace\n"); 209 210 struct ReplaceTest { 211 const char *regexp; 212 const char *rewrite; 213 const char *original; 214 const char *single; 215 const char *global; 216 int global_count; // the expected return value from ReplaceAll 217 }; 218 static const ReplaceTest tests[] = { 219 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", 220 "\\2\\1ay", 221 "the quick brown fox jumps over the lazy dogs.", 222 "ethay quick brown fox jumps over the lazy dogs.", 223 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", 224 9 }, 225 { "\\w+", 226 "\\0-NOSPAM", 227 "paul.haahr (at) google.com", 228 "paul-NOSPAM.haahr (at) google.com", 229 "paul-NOSPAM.haahr-NOSPAM (at) google-NOSPAM.com-NOSPAM", 230 4 }, 231 { "^", 232 "(START)", 233 "foo", 234 "(START)foo", 235 "(START)foo", 236 1 }, 237 { "^", 238 "(START)", 239 "", 240 "(START)", 241 "(START)", 242 1 }, 243 { "$", 244 "(END)", 245 "", 246 "(END)", 247 "(END)", 248 1 }, 249 { "b", 250 "bb", 251 "ababababab", 252 "abbabababab", 253 "abbabbabbabbabb", 254 5 }, 255 { "b", 256 "bb", 257 "bbbbbb", 258 "bbbbbbb", 259 "bbbbbbbbbbbb", 260 6 }, 261 { "b+", 262 "bb", 263 "bbbbbb", 264 "bb", 265 "bb", 266 1 }, 267 { "b*", 268 "bb", 269 "bbbbbb", 270 "bb", 271 "bbbb", 272 2 }, 273 { "b*", 274 "bb", 275 "aaaaa", 276 "bbaaaaa", 277 "bbabbabbabbabbabb", 278 6 }, 279 { "b*", 280 "bb", 281 "aa\naa\n", 282 "bbaa\naa\n", 283 "bbabbabb\nbbabbabb\nbb", 284 7 }, 285 { "b*", 286 "bb", 287 "aa\raa\r", 288 "bbaa\raa\r", 289 "bbabbabb\rbbabbabb\rbb", 290 7 }, 291 { "b*", 292 "bb", 293 "aa\r\naa\r\n", 294 "bbaa\r\naa\r\n", 295 "bbabbabb\r\nbbabbabb\r\nbb", 296 7 }, 297 // Check empty-string matching (it's tricky!) 298 { "aa|b*", 299 "@", 300 "aa", 301 "@", 302 "@@", 303 2 }, 304 { "b*|aa", 305 "@", 306 "aa", 307 "@aa", 308 "@@@", 309 3 }, 310 #ifdef SUPPORT_UTF8 311 { "b*", 312 "bb", 313 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 314 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", 315 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", 316 5 }, 317 { "b*", 318 "bb", 319 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 320 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", 321 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" 322 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), 323 9 }, 324 #endif 325 { "", NULL, NULL, NULL, NULL, 0 } 326 }; 327 328 #ifdef SUPPORT_UTF8 329 const bool support_utf8 = true; 330 #else 331 const bool support_utf8 = false; 332 #endif 333 334 for (const ReplaceTest *t = tests; t->original != NULL; ++t) { 335 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); 336 assert(re.error().empty()); 337 string one(t->original); 338 CHECK(re.Replace(t->rewrite, &one)); 339 CHECK_EQ(one, t->single); 340 string all(t->original); 341 const int replace_count = re.GlobalReplace(t->rewrite, &all); 342 CHECK_EQ(all, t->global); 343 CHECK_EQ(replace_count, t->global_count); 344 } 345 346 // One final test: test \r\n replacement when we're not in CRLF mode 347 { 348 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); 349 assert(re.error().empty()); 350 string all("aa\r\naa\r\n"); 351 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 352 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 353 } 354 { 355 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); 356 assert(re.error().empty()); 357 string all("aa\r\naa\r\n"); 358 CHECK_EQ(re.GlobalReplace("bb", &all), 9); 359 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); 360 } 361 // TODO: test what happens when no PCRE_NEWLINE_* flag is set. 362 // Alas, the answer depends on how pcre was compiled. 363 } 364 365 static void TestExtract() { 366 printf("Testing Extract\n"); 367 368 string s; 369 370 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris (at) kremvax.ru", &s)); 371 CHECK_EQ(s, "kremvax!boris"); 372 373 // check the RE interface as well 374 CHECK(RE(".*").Extract("'\\0'", "foo", &s)); 375 CHECK_EQ(s, "'foo'"); 376 CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); 377 CHECK_EQ(s, "'foo'"); 378 } 379 380 static void TestConsume() { 381 printf("Testing Consume\n"); 382 383 string word; 384 385 string s(" aaa b!@#$@#$cccc"); 386 StringPiece input(s); 387 388 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace 389 CHECK(r.Consume(&input, &word)); 390 CHECK_EQ(word, "aaa"); 391 CHECK(r.Consume(&input, &word)); 392 CHECK_EQ(word, "b"); 393 CHECK(! r.Consume(&input, &word)); 394 } 395 396 static void TestFindAndConsume() { 397 printf("Testing FindAndConsume\n"); 398 399 string word; 400 401 string s(" aaa b!@#$@#$cccc"); 402 StringPiece input(s); 403 404 RE r("(\\w+)"); // matches a word 405 CHECK(r.FindAndConsume(&input, &word)); 406 CHECK_EQ(word, "aaa"); 407 CHECK(r.FindAndConsume(&input, &word)); 408 CHECK_EQ(word, "b"); 409 CHECK(r.FindAndConsume(&input, &word)); 410 CHECK_EQ(word, "cccc"); 411 CHECK(! r.FindAndConsume(&input, &word)); 412 } 413 414 static void TestMatchNumberPeculiarity() { 415 printf("Testing match-number peculiaraity\n"); 416 417 string word1; 418 string word2; 419 string word3; 420 421 RE r("(foo)|(bar)|(baz)"); 422 CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); 423 CHECK_EQ(word1, "foo"); 424 CHECK_EQ(word2, ""); 425 CHECK_EQ(word3, ""); 426 CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); 427 CHECK_EQ(word1, ""); 428 CHECK_EQ(word2, "bar"); 429 CHECK_EQ(word3, ""); 430 CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); 431 CHECK_EQ(word1, ""); 432 CHECK_EQ(word2, ""); 433 CHECK_EQ(word3, "baz"); 434 CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); 435 436 string a; 437 CHECK(RE("(foo)|hello").FullMatch("hello", &a)); 438 CHECK_EQ(a, ""); 439 } 440 441 static void TestRecursion() { 442 printf("Testing recursion\n"); 443 444 // Get one string that passes (sometimes), one that never does. 445 string text_good("abcdefghijk"); 446 string text_bad("acdefghijkl"); 447 448 // According to pcretest, matching text_good against (\w+)*b 449 // requires match_limit of at least 8192, and match_recursion_limit 450 // of at least 37. 451 452 RE_Options options_ml; 453 options_ml.set_match_limit(8192); 454 RE re("(\\w+)*b", options_ml); 455 CHECK(re.PartialMatch(text_good) == true); 456 CHECK(re.PartialMatch(text_bad) == false); 457 CHECK(re.FullMatch(text_good) == false); 458 CHECK(re.FullMatch(text_bad) == false); 459 460 options_ml.set_match_limit(1024); 461 RE re2("(\\w+)*b", options_ml); 462 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit 463 CHECK(re2.PartialMatch(text_bad) == false); 464 CHECK(re2.FullMatch(text_good) == false); 465 CHECK(re2.FullMatch(text_bad) == false); 466 467 RE_Options options_mlr; 468 options_mlr.set_match_limit_recursion(50); 469 RE re3("(\\w+)*b", options_mlr); 470 CHECK(re3.PartialMatch(text_good) == true); 471 CHECK(re3.PartialMatch(text_bad) == false); 472 CHECK(re3.FullMatch(text_good) == false); 473 CHECK(re3.FullMatch(text_bad) == false); 474 475 options_mlr.set_match_limit_recursion(10); 476 RE re4("(\\w+)*b", options_mlr); 477 CHECK(re4.PartialMatch(text_good) == false); 478 CHECK(re4.PartialMatch(text_bad) == false); 479 CHECK(re4.FullMatch(text_good) == false); 480 CHECK(re4.FullMatch(text_bad) == false); 481 } 482 483 // A meta-quoted string, interpreted as a pattern, should always match 484 // the original unquoted string. 485 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { 486 string quoted = RE::QuoteMeta(unquoted); 487 RE re(quoted, options); 488 CHECK(re.FullMatch(unquoted)); 489 } 490 491 // A string containing meaningful regexp characters, which is then meta- 492 // quoted, should not generally match a string the unquoted string does. 493 static void NegativeTestQuoteMeta(string unquoted, string should_not_match, 494 RE_Options options = RE_Options()) { 495 string quoted = RE::QuoteMeta(unquoted); 496 RE re(quoted, options); 497 CHECK(!re.FullMatch(should_not_match)); 498 } 499 500 // Tests that quoted meta characters match their original strings, 501 // and that a few things that shouldn't match indeed do not. 502 static void TestQuotaMetaSimple() { 503 TestQuoteMeta("foo"); 504 TestQuoteMeta("foo.bar"); 505 TestQuoteMeta("foo\\.bar"); 506 TestQuoteMeta("[1-9]"); 507 TestQuoteMeta("1.5-2.0?"); 508 TestQuoteMeta("\\d"); 509 TestQuoteMeta("Who doesn't like ice cream?"); 510 TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); 511 TestQuoteMeta("((?!)xxx).*yyy"); 512 TestQuoteMeta("(["); 513 TestQuoteMeta(string("foo\0bar", 7)); 514 } 515 516 static void TestQuoteMetaSimpleNegative() { 517 NegativeTestQuoteMeta("foo", "bar"); 518 NegativeTestQuoteMeta("...", "bar"); 519 NegativeTestQuoteMeta("\\.", "."); 520 NegativeTestQuoteMeta("\\.", ".."); 521 NegativeTestQuoteMeta("(a)", "a"); 522 NegativeTestQuoteMeta("(a|b)", "a"); 523 NegativeTestQuoteMeta("(a|b)", "(a)"); 524 NegativeTestQuoteMeta("(a|b)", "a|b"); 525 NegativeTestQuoteMeta("[0-9]", "0"); 526 NegativeTestQuoteMeta("[0-9]", "0-9"); 527 NegativeTestQuoteMeta("[0-9]", "[9]"); 528 NegativeTestQuoteMeta("((?!)xxx)", "xxx"); 529 } 530 531 static void TestQuoteMetaLatin1() { 532 TestQuoteMeta("3\xb2 = 9"); 533 } 534 535 static void TestQuoteMetaUtf8() { 536 #ifdef SUPPORT_UTF8 537 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); 538 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 539 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) 540 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character 541 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) 542 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) 543 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work 544 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) 545 "27\\\xc2\\\xb0", 546 pcrecpp::UTF8()); 547 #endif 548 } 549 550 static void TestQuoteMetaAll() { 551 printf("Testing QuoteMeta\n"); 552 TestQuotaMetaSimple(); 553 TestQuoteMetaSimpleNegative(); 554 TestQuoteMetaLatin1(); 555 TestQuoteMetaUtf8(); 556 } 557 558 // 559 // Options tests contributed by 560 // Giuseppe Maxia, CTO, Stardata s.r.l. 561 // July 2005 562 // 563 static void GetOneOptionResult( 564 const char *option_name, 565 const char *regex, 566 const char *str, 567 RE_Options options, 568 bool full, 569 string expected) { 570 571 printf("Testing Option <%s>\n", option_name); 572 if(VERBOSE_TEST) 573 printf("/%s/ finds \"%s\" within \"%s\" \n", 574 regex, 575 expected.c_str(), 576 str); 577 string captured(""); 578 if (full) 579 RE(regex,options).FullMatch(str, &captured); 580 else 581 RE(regex,options).PartialMatch(str, &captured); 582 CHECK_EQ(captured, expected); 583 } 584 585 static void TestOneOption( 586 const char *option_name, 587 const char *regex, 588 const char *str, 589 RE_Options options, 590 bool full, 591 bool assertive = true) { 592 593 printf("Testing Option <%s>\n", option_name); 594 if (VERBOSE_TEST) 595 printf("'%s' %s /%s/ \n", 596 str, 597 (assertive? "matches" : "doesn't match"), 598 regex); 599 if (assertive) { 600 if (full) 601 CHECK(RE(regex,options).FullMatch(str)); 602 else 603 CHECK(RE(regex,options).PartialMatch(str)); 604 } else { 605 if (full) 606 CHECK(!RE(regex,options).FullMatch(str)); 607 else 608 CHECK(!RE(regex,options).PartialMatch(str)); 609 } 610 } 611 612 static void Test_CASELESS() { 613 RE_Options options; 614 RE_Options options2; 615 616 options.set_caseless(true); 617 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); 618 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); 619 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); 620 621 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); 622 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); 623 options.set_caseless(false); 624 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); 625 } 626 627 static void Test_MULTILINE() { 628 RE_Options options; 629 RE_Options options2; 630 const char *str = "HELLO\n" "cruel\n" "world\n"; 631 632 options.set_multiline(true); 633 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); 634 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); 635 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); 636 options.set_multiline(false); 637 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); 638 } 639 640 static void Test_DOTALL() { 641 RE_Options options; 642 RE_Options options2; 643 const char *str = "HELLO\n" "cruel\n" "world"; 644 645 options.set_dotall(true); 646 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); 647 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); 648 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); 649 options.set_dotall(false); 650 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); 651 } 652 653 static void Test_DOLLAR_ENDONLY() { 654 RE_Options options; 655 RE_Options options2; 656 const char *str = "HELLO world\n"; 657 658 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); 659 options.set_dollar_endonly(true); 660 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); 661 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); 662 } 663 664 static void Test_EXTRA() { 665 RE_Options options; 666 const char *str = "HELLO"; 667 668 options.set_extra(true); 669 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); 670 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); 671 options.set_extra(false); 672 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); 673 } 674 675 static void Test_EXTENDED() { 676 RE_Options options; 677 RE_Options options2; 678 const char *str = "HELLO world"; 679 680 options.set_extended(true); 681 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); 682 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); 683 TestOneOption("EXTENDED (class)", 684 "^ HE L{2} O " 685 "\\s+ " 686 "\\w+ $ ", 687 str, 688 options, 689 false); 690 691 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); 692 TestOneOption("EXTENDED (function)", 693 "^ HE L{2} O " 694 "\\s+ " 695 "\\w+ $ ", 696 str, 697 pcrecpp::EXTENDED(), 698 false); 699 700 options.set_extended(false); 701 TestOneOption("no EXTENDED", "HELLO world", str, options, false); 702 } 703 704 static void Test_NO_AUTO_CAPTURE() { 705 RE_Options options; 706 const char *str = "HELLO world"; 707 string captured; 708 709 printf("Testing Option <no NO_AUTO_CAPTURE>\n"); 710 if (VERBOSE_TEST) 711 printf("parentheses capture text\n"); 712 RE re("(world|universe)$", options); 713 CHECK(re.Extract("\\1", str , &captured)); 714 CHECK_EQ(captured, "world"); 715 options.set_no_auto_capture(true); 716 printf("testing Option <NO_AUTO_CAPTURE>\n"); 717 if (VERBOSE_TEST) 718 printf("parentheses do not capture text\n"); 719 re.Extract("\\1",str, &captured ); 720 CHECK_EQ(captured, "world"); 721 } 722 723 static void Test_UNGREEDY() { 724 RE_Options options; 725 const char *str = "HELLO, 'this' is the 'world'"; 726 727 options.set_ungreedy(true); 728 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); 729 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); 730 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); 731 732 options.set_ungreedy(false); 733 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); 734 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); 735 } 736 737 static void Test_all_options() { 738 const char *str = "HELLO\n" "cruel\n" "world"; 739 RE_Options options; 740 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); 741 742 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); 743 options.set_all_options(0); 744 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); 745 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); 746 747 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); 748 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", 749 " ^ c r u e l $ ", 750 str, 751 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), 752 false); 753 754 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", 755 " ^ c r u e l $ ", 756 str, 757 RE_Options() 758 .set_multiline(true) 759 .set_extended(true), 760 false); 761 762 options.set_all_options(0); 763 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); 764 765 } 766 767 static void TestOptions() { 768 printf("Testing Options\n"); 769 Test_CASELESS(); 770 Test_MULTILINE(); 771 Test_DOTALL(); 772 Test_DOLLAR_ENDONLY(); 773 Test_EXTENDED(); 774 Test_NO_AUTO_CAPTURE(); 775 Test_UNGREEDY(); 776 Test_EXTRA(); 777 Test_all_options(); 778 } 779 780 static void TestConstructors() { 781 printf("Testing constructors\n"); 782 783 RE_Options options; 784 options.set_dotall(true); 785 const char *str = "HELLO\n" "cruel\n" "world"; 786 787 RE orig("HELLO.*world", options); 788 CHECK(orig.FullMatch(str)); 789 790 RE copy1(orig); 791 CHECK(copy1.FullMatch(str)); 792 793 RE copy2("not a match"); 794 CHECK(!copy2.FullMatch(str)); 795 copy2 = copy1; 796 CHECK(copy2.FullMatch(str)); 797 copy2 = orig; 798 CHECK(copy2.FullMatch(str)); 799 800 // Make sure when we assign to ourselves, nothing bad happens 801 orig = orig; 802 copy1 = copy1; 803 copy2 = copy2; 804 CHECK(orig.FullMatch(str)); 805 CHECK(copy1.FullMatch(str)); 806 CHECK(copy2.FullMatch(str)); 807 } 808 809 int main(int argc, char** argv) { 810 // Treat any flag as --help 811 if (argc > 1 && argv[1][0] == '-') { 812 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" 813 " If 'timingX ###' is specified, run the given timing test\n" 814 " with the given number of iterations, rather than running\n" 815 " the default corectness test.\n", argv[0]); 816 return 0; 817 } 818 819 if (argc > 1) { 820 if ( argc == 2 || atoi(argv[2]) == 0) { 821 printf("timing mode needs a num-iters argument\n"); 822 return 1; 823 } 824 if (!strcmp(argv[1], "timing1")) 825 Timing1(atoi(argv[2])); 826 else if (!strcmp(argv[1], "timing2")) 827 Timing2(atoi(argv[2])); 828 else if (!strcmp(argv[1], "timing3")) 829 Timing3(atoi(argv[2])); 830 else 831 printf("Unknown argument '%s'\n", argv[1]); 832 return 0; 833 } 834 835 printf("Testing FullMatch\n"); 836 837 int i; 838 string s; 839 840 /***** FullMatch with no args *****/ 841 842 CHECK(RE("h.*o").FullMatch("hello")); 843 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front 844 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end 845 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op 846 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op 847 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops 848 849 /***** FullMatch with args *****/ 850 851 // Zero-arg 852 CHECK(RE("\\d+").FullMatch("1001")); 853 854 // Single-arg 855 CHECK(RE("(\\d+)").FullMatch("1001", &i)); 856 CHECK_EQ(i, 1001); 857 CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); 858 CHECK_EQ(i, -123); 859 CHECK(!RE("()\\d+").FullMatch("10", &i)); 860 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", 861 &i)); 862 863 // Digits surrounding integer-arg 864 CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); 865 CHECK_EQ(i, 23); 866 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); 867 CHECK_EQ(i, 1); 868 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); 869 CHECK_EQ(i, -1); 870 CHECK(RE("(\\d)").PartialMatch("1234", &i)); 871 CHECK_EQ(i, 1); 872 CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); 873 CHECK_EQ(i, -1); 874 875 // String-arg 876 CHECK(RE("h(.*)o").FullMatch("hello", &s)); 877 CHECK_EQ(s, string("ell")); 878 879 // StringPiece-arg 880 StringPiece sp; 881 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); 882 CHECK_EQ(sp.size(), 4); 883 CHECK(memcmp(sp.data(), "ruby", 4) == 0); 884 CHECK_EQ(i, 1234); 885 886 // Multi-arg 887 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); 888 CHECK_EQ(s, string("ruby")); 889 CHECK_EQ(i, 1234); 890 891 // Ignore non-void* NULL arg 892 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); 893 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); 894 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); 895 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); 896 #ifdef HAVE_LONG_LONG 897 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); 898 #endif 899 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); 900 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); 901 902 // Fail on non-void* NULL arg if the match doesn't parse for the given type. 903 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); 904 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); 905 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); 906 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); 907 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); 908 909 // Ignored arg 910 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); 911 CHECK_EQ(s, string("ruby")); 912 CHECK_EQ(i, 1234); 913 914 // Type tests 915 { 916 char c; 917 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 918 CHECK_EQ(c, 'H'); 919 } 920 { 921 unsigned char c; 922 CHECK(RE("(H)ello").FullMatch("Hello", &c)); 923 CHECK_EQ(c, static_cast<unsigned char>('H')); 924 } 925 { 926 short v; 927 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 928 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 929 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 930 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); 931 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); 932 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); 933 } 934 { 935 unsigned short v; 936 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 937 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); 938 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); 939 CHECK(!RE("(\\d+)").FullMatch("65536", &v)); 940 } 941 { 942 int v; 943 static const int max_value = 0x7fffffff; 944 static const int min_value = -max_value - 1; 945 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 946 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); 947 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); 948 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); 949 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); 950 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); 951 } 952 { 953 unsigned int v; 954 static const unsigned int max_value = 0xfffffffful; 955 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 956 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); 957 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); 958 } 959 #ifdef HAVE_LONG_LONG 960 # if defined(__MINGW__) || defined(__MINGW32__) 961 # define LLD "%I64d" 962 # define LLU "%I64u" 963 # else 964 # define LLD "%lld" 965 # define LLU "%llu" 966 # endif 967 { 968 long long v; 969 static const long long max_value = 0x7fffffffffffffffLL; 970 static const long long min_value = -max_value - 1; 971 char buf[32]; // definitely big enough for a long long 972 973 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); 974 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); 975 976 sprintf(buf, LLD, max_value); 977 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 978 979 sprintf(buf, LLD, min_value); 980 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); 981 982 sprintf(buf, LLD, max_value); 983 assert(buf[strlen(buf)-1] != '9'); 984 buf[strlen(buf)-1]++; 985 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 986 987 sprintf(buf, LLD, min_value); 988 assert(buf[strlen(buf)-1] != '9'); 989 buf[strlen(buf)-1]++; 990 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 991 } 992 #endif 993 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG 994 { 995 unsigned long long v; 996 long long v2; 997 static const unsigned long long max_value = 0xffffffffffffffffULL; 998 char buf[32]; // definitely big enough for a unsigned long long 999 1000 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); 1001 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); 1002 1003 sprintf(buf, LLU, max_value); 1004 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); 1005 1006 assert(buf[strlen(buf)-1] != '9'); 1007 buf[strlen(buf)-1]++; 1008 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); 1009 } 1010 #endif 1011 { 1012 float v; 1013 CHECK(RE("(.*)").FullMatch("100", &v)); 1014 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1015 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1016 } 1017 { 1018 double v; 1019 CHECK(RE("(.*)").FullMatch("100", &v)); 1020 CHECK(RE("(.*)").FullMatch("-100.", &v)); 1021 CHECK(RE("(.*)").FullMatch("1e23", &v)); 1022 } 1023 1024 // Check that matching is fully anchored 1025 CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); 1026 CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); 1027 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); 1028 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); 1029 1030 // Braces 1031 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); 1032 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); 1033 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); 1034 1035 // Complicated RE 1036 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); 1037 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); 1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); 1039 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); 1040 1041 // Check full-match handling (needs '$' tacked on internally) 1042 CHECK(RE("fo|foo").FullMatch("fo")); 1043 CHECK(RE("fo|foo").FullMatch("foo")); 1044 CHECK(RE("fo|foo$").FullMatch("fo")); 1045 CHECK(RE("fo|foo$").FullMatch("foo")); 1046 CHECK(RE("foo$").FullMatch("foo")); 1047 CHECK(!RE("foo\\$").FullMatch("foo$bar")); 1048 CHECK(!RE("fo|bar").FullMatch("fox")); 1049 1050 // Uncomment the following if we change the handling of '$' to 1051 // prevent it from matching a trailing newline 1052 if (false) { 1053 // Check that we don't get bitten by pcre's special handling of a 1054 // '\n' at the end of the string matching '$' 1055 CHECK(!RE("foo$").PartialMatch("foo\n")); 1056 } 1057 1058 // Number of args 1059 int a[16]; 1060 CHECK(RE("").FullMatch("")); 1061 1062 memset(a, 0, sizeof(0)); 1063 CHECK(RE("(\\d){1}").FullMatch("1", 1064 &a[0])); 1065 CHECK_EQ(a[0], 1); 1066 1067 memset(a, 0, sizeof(0)); 1068 CHECK(RE("(\\d)(\\d)").FullMatch("12", 1069 &a[0], &a[1])); 1070 CHECK_EQ(a[0], 1); 1071 CHECK_EQ(a[1], 2); 1072 1073 memset(a, 0, sizeof(0)); 1074 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", 1075 &a[0], &a[1], &a[2])); 1076 CHECK_EQ(a[0], 1); 1077 CHECK_EQ(a[1], 2); 1078 CHECK_EQ(a[2], 3); 1079 1080 memset(a, 0, sizeof(0)); 1081 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", 1082 &a[0], &a[1], &a[2], &a[3])); 1083 CHECK_EQ(a[0], 1); 1084 CHECK_EQ(a[1], 2); 1085 CHECK_EQ(a[2], 3); 1086 CHECK_EQ(a[3], 4); 1087 1088 memset(a, 0, sizeof(0)); 1089 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", 1090 &a[0], &a[1], &a[2], 1091 &a[3], &a[4])); 1092 CHECK_EQ(a[0], 1); 1093 CHECK_EQ(a[1], 2); 1094 CHECK_EQ(a[2], 3); 1095 CHECK_EQ(a[3], 4); 1096 CHECK_EQ(a[4], 5); 1097 1098 memset(a, 0, sizeof(0)); 1099 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", 1100 &a[0], &a[1], &a[2], 1101 &a[3], &a[4], &a[5])); 1102 CHECK_EQ(a[0], 1); 1103 CHECK_EQ(a[1], 2); 1104 CHECK_EQ(a[2], 3); 1105 CHECK_EQ(a[3], 4); 1106 CHECK_EQ(a[4], 5); 1107 CHECK_EQ(a[5], 6); 1108 1109 memset(a, 0, sizeof(0)); 1110 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", 1111 &a[0], &a[1], &a[2], &a[3], 1112 &a[4], &a[5], &a[6])); 1113 CHECK_EQ(a[0], 1); 1114 CHECK_EQ(a[1], 2); 1115 CHECK_EQ(a[2], 3); 1116 CHECK_EQ(a[3], 4); 1117 CHECK_EQ(a[4], 5); 1118 CHECK_EQ(a[5], 6); 1119 CHECK_EQ(a[6], 7); 1120 1121 memset(a, 0, sizeof(0)); 1122 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" 1123 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( 1124 "1234567890123456", 1125 &a[0], &a[1], &a[2], &a[3], 1126 &a[4], &a[5], &a[6], &a[7], 1127 &a[8], &a[9], &a[10], &a[11], 1128 &a[12], &a[13], &a[14], &a[15])); 1129 CHECK_EQ(a[0], 1); 1130 CHECK_EQ(a[1], 2); 1131 CHECK_EQ(a[2], 3); 1132 CHECK_EQ(a[3], 4); 1133 CHECK_EQ(a[4], 5); 1134 CHECK_EQ(a[5], 6); 1135 CHECK_EQ(a[6], 7); 1136 CHECK_EQ(a[7], 8); 1137 CHECK_EQ(a[8], 9); 1138 CHECK_EQ(a[9], 0); 1139 CHECK_EQ(a[10], 1); 1140 CHECK_EQ(a[11], 2); 1141 CHECK_EQ(a[12], 3); 1142 CHECK_EQ(a[13], 4); 1143 CHECK_EQ(a[14], 5); 1144 CHECK_EQ(a[15], 6); 1145 1146 /***** PartialMatch *****/ 1147 1148 printf("Testing PartialMatch\n"); 1149 1150 CHECK(RE("h.*o").PartialMatch("hello")); 1151 CHECK(RE("h.*o").PartialMatch("othello")); 1152 CHECK(RE("h.*o").PartialMatch("hello!")); 1153 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); 1154 1155 /***** other tests *****/ 1156 1157 RadixTests(); 1158 TestReplace(); 1159 TestExtract(); 1160 TestConsume(); 1161 TestFindAndConsume(); 1162 TestQuoteMetaAll(); 1163 TestMatchNumberPeculiarity(); 1164 1165 // Check the pattern() accessor 1166 { 1167 const string kPattern = "http://([^/]+)/.*"; 1168 const RE re(kPattern); 1169 CHECK_EQ(kPattern, re.pattern()); 1170 } 1171 1172 // Check RE error field. 1173 { 1174 RE re("foo"); 1175 CHECK(re.error().empty()); // Must have no error 1176 } 1177 1178 #ifdef SUPPORT_UTF8 1179 // Check UTF-8 handling 1180 { 1181 printf("Testing UTF-8 handling\n"); 1182 1183 // Three Japanese characters (nihongo) 1184 const unsigned char utf8_string[] = { 1185 0xe6, 0x97, 0xa5, // 65e5 1186 0xe6, 0x9c, 0xac, // 627c 1187 0xe8, 0xaa, 0x9e, // 8a9e 1188 0 1189 }; 1190 const unsigned char utf8_pattern[] = { 1191 '.', 1192 0xe6, 0x9c, 0xac, // 627c 1193 '.', 1194 0 1195 }; 1196 1197 // Both should match in either mode, bytes or UTF-8 1198 RE re_test1("........."); 1199 CHECK(re_test1.FullMatch(utf8_string)); 1200 RE re_test2("...", pcrecpp::UTF8()); 1201 CHECK(re_test2.FullMatch(utf8_string)); 1202 1203 // Check that '.' matches one byte or UTF-8 character 1204 // according to the mode. 1205 string ss; 1206 RE re_test3("(.)"); 1207 CHECK(re_test3.PartialMatch(utf8_string, &ss)); 1208 CHECK_EQ(ss, string("\xe6")); 1209 RE re_test4("(.)", pcrecpp::UTF8()); 1210 CHECK(re_test4.PartialMatch(utf8_string, &ss)); 1211 CHECK_EQ(ss, string("\xe6\x97\xa5")); 1212 1213 // Check that string matches itself in either mode 1214 RE re_test5(utf8_string); 1215 CHECK(re_test5.FullMatch(utf8_string)); 1216 RE re_test6(utf8_string, pcrecpp::UTF8()); 1217 CHECK(re_test6.FullMatch(utf8_string)); 1218 1219 // Check that pattern matches string only in UTF8 mode 1220 RE re_test7(utf8_pattern); 1221 CHECK(!re_test7.FullMatch(utf8_string)); 1222 RE re_test8(utf8_pattern, pcrecpp::UTF8()); 1223 CHECK(re_test8.FullMatch(utf8_string)); 1224 } 1225 1226 // Check that ungreedy, UTF8 regular expressions don't match when they 1227 // oughtn't -- see bug 82246. 1228 { 1229 // This code always worked. 1230 const char* pattern = "\\w+X"; 1231 const string target = "a aX"; 1232 RE match_sentence(pattern); 1233 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1234 1235 CHECK(!match_sentence.FullMatch(target)); 1236 CHECK(!match_sentence_re.FullMatch(target)); 1237 } 1238 1239 { 1240 const char* pattern = "(?U)\\w+X"; 1241 const string target = "a aX"; 1242 RE match_sentence(pattern); 1243 RE match_sentence_re(pattern, pcrecpp::UTF8()); 1244 1245 CHECK(!match_sentence.FullMatch(target)); 1246 CHECK(!match_sentence_re.FullMatch(target)); 1247 } 1248 #endif /* def SUPPORT_UTF8 */ 1249 1250 printf("Testing error reporting\n"); 1251 1252 { RE re("a\\1"); CHECK(!re.error().empty()); } 1253 { 1254 RE re("a[x"); 1255 CHECK(!re.error().empty()); 1256 } 1257 { 1258 RE re("a[z-a]"); 1259 CHECK(!re.error().empty()); 1260 } 1261 { 1262 RE re("a[[:foobar:]]"); 1263 CHECK(!re.error().empty()); 1264 } 1265 { 1266 RE re("a(b"); 1267 CHECK(!re.error().empty()); 1268 } 1269 { 1270 RE re("a\\"); 1271 CHECK(!re.error().empty()); 1272 } 1273 1274 // Test that recursion is stopped 1275 TestRecursion(); 1276 1277 // Test Options 1278 if (getenv("VERBOSE_TEST") != NULL) 1279 VERBOSE_TEST = true; 1280 TestOptions(); 1281 1282 // Test the constructors 1283 TestConstructors(); 1284 1285 // Done 1286 printf("OK\n"); 1287 1288 return 0; 1289 } 1290