1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Exhaustive testing of regular expression matching. 6 7 // Each test picks an alphabet (e.g., "abc"), a maximum string length, 8 // a maximum regular expression length, and a maximum number of letters 9 // that can appear in the regular expression. Given these parameters, 10 // it tries every possible regular expression and string, verifying that 11 // the NFA, DFA, and a trivial backtracking implementation agree about 12 // the location of the match. 13 14 #include <stdlib.h> 15 #include <stdio.h> 16 17 #ifndef LOGGING 18 #define LOGGING 0 19 #endif 20 21 #include "util/test.h" 22 #include "re2/testing/exhaustive_tester.h" 23 #include "re2/testing/tester.h" 24 25 DEFINE_bool(show_regexps, false, "show regexps during testing"); 26 27 DEFINE_int32(max_bad_regexp_inputs, 1, 28 "Stop testing a regular expression after finding this many " 29 "strings that break it."); 30 31 // Compiled in debug mode, the usual tests run for over an hour. 32 // Have to cut it down to make the unit test machines happy. 33 DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode."); 34 35 namespace re2 { 36 37 static char* escape(const StringPiece& sp) { 38 static char buf[512]; 39 char* p = buf; 40 *p++ = '\"'; 41 for (int i = 0; i < sp.size(); i++) { 42 if(p+5 >= buf+sizeof buf) 43 LOG(FATAL) << "ExhaustiveTester escape: too long"; 44 if(sp[i] == '\\' || sp[i] == '\"') { 45 *p++ = '\\'; 46 *p++ = sp[i]; 47 } else if(sp[i] == '\n') { 48 *p++ = '\\'; 49 *p++ = 'n'; 50 } else { 51 *p++ = sp[i]; 52 } 53 } 54 *p++ = '\"'; 55 *p = '\0'; 56 return buf; 57 } 58 59 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) { 60 if (!re.Match(input, 0, input.size(), anchor, m, n)) { 61 printf("-"); 62 return; 63 } 64 for (int i = 0; i < n; i++) { 65 if (i > 0) 66 printf(" "); 67 if (m[i].begin() == NULL) 68 printf("-"); 69 else 70 printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin())); 71 } 72 } 73 74 // Processes a single generated regexp. 75 // Compiles it using Regexp interface and PCRE, and then 76 // checks that NFA, DFA, and PCRE all return the same results. 77 void ExhaustiveTester::HandleRegexp(const string& const_regexp) { 78 regexps_++; 79 string regexp = const_regexp; 80 if (!topwrapper_.empty()) 81 regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str()); 82 83 if (FLAGS_show_regexps) { 84 printf("\r%s", regexp.c_str()); 85 fflush(stdout); 86 } 87 88 if (LOGGING) { 89 // Write out test cases and answers for use in testing 90 // other implementations, such as Go's regexp package. 91 if (randomstrings_) 92 LOG(ERROR) << "Cannot log with random strings."; 93 if (regexps_ == 1) { // first 94 printf("strings\n"); 95 strgen_.Reset(); 96 while (strgen_.HasNext()) 97 printf("%s\n", escape(strgen_.Next())); 98 printf("regexps\n"); 99 } 100 printf("%s\n", escape(regexp)); 101 102 RE2 re(regexp); 103 RE2::Options longest; 104 longest.set_longest_match(true); 105 RE2 relongest(regexp, longest); 106 int ngroup = re.NumberOfCapturingGroups()+1; 107 StringPiece* group = new StringPiece[ngroup]; 108 109 strgen_.Reset(); 110 while (strgen_.HasNext()) { 111 StringPiece input = strgen_.Next(); 112 PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup); 113 printf(";"); 114 PrintResult(re, input, RE2::UNANCHORED, group, ngroup); 115 printf(";"); 116 PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup); 117 printf(";"); 118 PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup); 119 printf("\n"); 120 } 121 delete[] group; 122 return; 123 } 124 125 Tester tester(regexp); 126 if (tester.error()) 127 return; 128 129 strgen_.Reset(); 130 strgen_.GenerateNULL(); 131 if (randomstrings_) 132 strgen_.Random(stringseed_, stringcount_); 133 int bad_inputs = 0; 134 while (strgen_.HasNext()) { 135 tests_++; 136 if (!tester.TestInput(strgen_.Next())) { 137 failures_++; 138 if (++bad_inputs >= FLAGS_max_bad_regexp_inputs) 139 break; 140 } 141 } 142 } 143 144 // Runs an exhaustive test on the given parameters. 145 void ExhaustiveTest(int maxatoms, int maxops, 146 const vector<string>& alphabet, 147 const vector<string>& ops, 148 int maxstrlen, const vector<string>& stralphabet, 149 const string& wrapper, 150 const string& topwrapper) { 151 if (DEBUG_MODE && FLAGS_quick_debug_mode) { 152 if (maxatoms > 1) 153 maxatoms--; 154 if (maxops > 1) 155 maxops--; 156 if (maxstrlen > 1) 157 maxstrlen--; 158 } 159 ExhaustiveTester t(maxatoms, maxops, alphabet, ops, 160 maxstrlen, stralphabet, wrapper, 161 topwrapper); 162 t.Generate(); 163 if (!LOGGING) { 164 printf("%d regexps, %d tests, %d failures [%d/%d str]\n", 165 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size()); 166 } 167 EXPECT_EQ(0, t.failures()); 168 } 169 170 // Runs an exhaustive test using the given parameters and 171 // the basic egrep operators. 172 void EgrepTest(int maxatoms, int maxops, const string& alphabet, 173 int maxstrlen, const string& stralphabet, 174 const string& wrapper) { 175 const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" }; 176 177 for (int i = 0; i < arraysize(tops); i++) { 178 ExhaustiveTest(maxatoms, maxops, 179 Split("", alphabet), 180 RegexpGenerator::EgrepOps(), 181 maxstrlen, 182 Split("", stralphabet), 183 wrapper, 184 tops[i]); 185 } 186 } 187 188 } // namespace re2 189