1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Random testing of regular expression matching. 6 7 #include <stdio.h> 8 #include "util/test.h" 9 #include "re2/testing/exhaustive_tester.h" 10 11 DEFINE_int32(regexpseed, 404, "Random regexp seed."); 12 DEFINE_int32(regexpcount, 100, "How many random regexps to generate."); 13 DEFINE_int32(stringseed, 200, "Random string seed."); 14 DEFINE_int32(stringcount, 100, "How many random strings to generate."); 15 16 namespace re2 { 17 18 // Runs a random test on the given parameters. 19 // (Always uses the same random seeds for reproducibility. 20 // Can give different seeds on command line.) 21 static void RandomTest(int maxatoms, int maxops, 22 const vector<string>& alphabet, 23 const vector<string>& ops, 24 int maxstrlen, const vector<string>& stralphabet, 25 const string& wrapper) { 26 // Limit to smaller test cases in debug mode, 27 // because everything is so much slower. 28 if (DEBUG_MODE) { 29 maxatoms--; 30 maxops--; 31 maxstrlen /= 2; 32 } 33 34 ExhaustiveTester t(maxatoms, maxops, alphabet, ops, 35 maxstrlen, stralphabet, wrapper, ""); 36 t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount); 37 t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount); 38 printf("%d regexps, %d tests, %d failures [%d/%d str]\n", 39 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size()); 40 EXPECT_EQ(0, t.failures()); 41 } 42 43 // Tests random small regexps involving literals and egrep operators. 44 TEST(Random, SmallEgrepLiterals) { 45 RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(), 46 15, Explode("abc"), 47 ""); 48 } 49 50 // Tests random bigger regexps involving literals and egrep operators. 51 TEST(Random, BigEgrepLiterals) { 52 RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(), 53 15, Explode("abc"), 54 ""); 55 } 56 57 // Tests random small regexps involving literals, capturing parens, 58 // and egrep operators. 59 TEST(Random, SmallEgrepCaptures) { 60 RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(), 61 15, Explode("abc"), 62 ""); 63 } 64 65 // Tests random bigger regexps involving literals, capturing parens, 66 // and egrep operators. 67 TEST(Random, BigEgrepCaptures) { 68 RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(), 69 15, Explode("abc"), 70 ""); 71 } 72 73 // Tests random large complicated expressions, using all the possible 74 // operators, some literals, some parenthesized literals, and predefined 75 // character classes like \d. (Adding larger character classes would 76 // make for too many possibilities.) 77 TEST(Random, Complicated) { 78 vector<string> ops = Split(" ", 79 "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? " 80 "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} " 81 "%s{2} %s{2,} %s{3,4} %s{4,5}"); 82 83 // Use (?:\b) and (?:\B) instead of \b and \B, 84 // because PCRE rejects \b* but accepts (?:\b)*. 85 // Ditto ^ and $. 86 vector<string> atoms = Split(" ", 87 ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v " 88 "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) " 89 "a (a) b c - \\\\"); 90 vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a"); 91 RandomTest(10, 10, atoms, ops, 20, alphabet, ""); 92 } 93 94 } // namespace re2 95 96