1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Exhaustive testing of regular expression matching. 6 7 #include "util/test.h" 8 #include "re2/re2.h" 9 #include "re2/testing/exhaustive_tester.h" 10 11 DECLARE_string(regexp_engines); 12 13 namespace re2 { 14 15 // Test empty string matches (aka "(?:)") 16 TEST(EmptyString, Exhaustive) { 17 ExhaustiveTest(2, 2, Split(" ", "(?:) a"), 18 RegexpGenerator::EgrepOps(), 19 5, Split("", "ab"), "", ""); 20 } 21 22 // Test escaped versions of regexp syntax. 23 TEST(Punctuation, Literals) { 24 vector<string> alphabet = Explode("()*+?{}[]\\^$."); 25 vector<string> escaped = alphabet; 26 for (int i = 0; i < escaped.size(); i++) 27 escaped[i] = "\\" + escaped[i]; 28 ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(), 29 2, alphabet, "", ""); 30 } 31 32 // Test ^ $ . \A \z in presence of line endings. 33 // Have to wrap the empty-width ones in (?:) so that 34 // they can be repeated -- PCRE rejects ^* but allows (?:^)* 35 TEST(LineEnds, Exhaustive) { 36 ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"), 37 RegexpGenerator::EgrepOps(), 38 4, Explode("ab\n"), "", ""); 39 } 40 41 // Test what does and does not match \n. 42 // This would be a good test, except that PCRE seems to have a bug: 43 // in single-byte character set mode (the default), 44 // [^a] matches \n, but in UTF-8 mode it does not. 45 // So when we run the test, the tester complains that 46 // we don't agree with PCRE, but it's PCRE that is at fault. 47 // For what it's worth, Perl gets this right (matches 48 // regardless of whether UTF-8 input is selected): 49 // 50 // #!/usr/bin/perl 51 // use POSIX qw(locale_h); 52 // print "matches in latin1\n" if "\n" =~ /[^a]/; 53 // setlocale("en_US.utf8"); 54 // print "matches in utf8\n" if "\n" =~ /[^a]/; 55 // 56 // The rule chosen for RE2 is that by default, like Perl, 57 // dot does not match \n but negated character classes [^a] do. 58 // (?s) will allow dot to match \n; there is no way in RE2 59 // to stop [^a] from matching \n, though the underlying library 60 // provides a mechanism, and RE2 could add new syntax if needed. 61 // 62 // TEST(Newlines, Exhaustive) { 63 // vector<string> empty_vector; 64 // ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"), 65 // RegexpGenerator::EgrepOps(), 66 // 4, Explode("a\n"), ""); 67 // } 68 69 } // namespace re2 70 71