1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "util/test.h" 6 #include "re2/prog.h" 7 #include "re2/regexp.h" 8 9 namespace re2 { 10 11 struct PCRETest { 12 const char* regexp; 13 bool should_match; 14 }; 15 16 static PCRETest tests[] = { 17 // Most things should behave exactly. 18 { "abc", true }, 19 { "(a|b)c", true }, 20 { "(a*|b)c", true }, 21 { "(a|b*)c", true }, 22 { "a(b|c)d", true }, 23 { "a(()|())c", true }, 24 { "ab*c", true }, 25 { "ab+c", true }, 26 { "a(b*|c*)d", true }, 27 { "\\W", true }, 28 { "\\W{1,2}", true }, 29 { "\\d", true }, 30 31 // Check that repeated empty strings do not. 32 { "(a*)*", false }, 33 { "x(a*)*y", false }, 34 { "(a*)+", false }, 35 { "(a+)*", true }, 36 { "(a+)+", true }, 37 { "(a+)+", true }, 38 39 // \v is the only character class that shouldn't. 40 { "\\b", true }, 41 { "\\v", false }, 42 { "\\d", true }, 43 44 // The handling of ^ in multi-line mode is different, as is 45 // the handling of $ in single-line mode. (Both involve 46 // boundary cases if the string ends with \n.) 47 { "\\A", true }, 48 { "\\z", true }, 49 { "(?m)^", false }, 50 { "(?m)$", true }, 51 { "(?-m)^", true }, 52 { "(?-m)$", false }, // In PCRE, == \Z 53 { "(?m)\\A", true }, 54 { "(?m)\\z", true }, 55 { "(?-m)\\A", true }, 56 { "(?-m)\\z", true }, 57 }; 58 59 TEST(MimicsPCRE, SimpleTests) { 60 for (int i = 0; i < arraysize(tests); i++) { 61 const PCRETest& t = tests[i]; 62 for (int j = 0; j < 2; j++) { 63 Regexp::ParseFlags flags = Regexp::LikePerl; 64 if (j == 0) 65 flags = flags | Regexp::Latin1; 66 Regexp* re = Regexp::Parse(t.regexp, flags, NULL); 67 CHECK(re) << " " << t.regexp; 68 CHECK_EQ(t.should_match, re->MimicsPCRE()) 69 << " " << t.regexp << " " 70 << (j==0 ? "latin1" : "utf"); 71 re->Decref(); 72 } 73 } 74 } 75 76 } // namespace re2 77