1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Exhaustive testing of regular expression matching. 6 7 #include "util/test.h" 8 #include "re2/testing/exhaustive_tester.h" 9 10 namespace re2 { 11 12 // Test simple character classes by themselves. 13 TEST(CharacterClasses, Exhaustive) { 14 vector<string> atoms = Split(" ", 15 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); 16 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), 17 5, Explode("ab"), "", ""); 18 } 19 20 // Test simple character classes inside a___b (for example, a[a]b). 21 TEST(CharacterClasses, ExhaustiveAB) { 22 vector<string> atoms = Split(" ", 23 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b ."); 24 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(), 25 5, Explode("ab"), "a%sb", ""); 26 } 27 28 // Returns UTF8 for Rune r 29 static string UTF8(Rune r) { 30 char buf[UTFmax+1]; 31 buf[runetochar(buf, &r)] = 0; 32 return string(buf); 33 } 34 35 // Returns a vector of "interesting" UTF8 characters. 36 // Unicode is now too big to just return all of them, 37 // so UTF8Characters return a set likely to be good test cases. 38 static const vector<string>& InterestingUTF8() { 39 static bool init; 40 static vector<string> v; 41 42 if (init) 43 return v; 44 45 init = true; 46 // All the Latin1 equivalents are interesting. 47 for (int i = 1; i < 256; i++) 48 v.push_back(UTF8(i)); 49 50 // After that, the codes near bit boundaries are 51 // interesting, because they span byte sequence lengths. 52 for (int j = 0; j < 8; j++) 53 v.push_back(UTF8(256 + j)); 54 for (int i = 512; i < Runemax; i <<= 1) 55 for (int j = -8; j < 8; j++) 56 v.push_back(UTF8(i + j)); 57 58 // The codes near Runemax, including Runemax itself, are interesting. 59 for (int j = -8; j <= 0; j++) 60 v.push_back(UTF8(Runemax + j)); 61 62 return v; 63 } 64 65 // Test interesting UTF-8 characters against character classes. 66 TEST(InterestingUTF8, SingleOps) { 67 vector<string> atoms = Split(" ", 68 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " 69 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " 70 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " 71 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); 72 vector<string> ops; // no ops 73 ExhaustiveTest(1, 0, atoms, ops, 74 1, InterestingUTF8(), "", ""); 75 } 76 77 // Test interesting UTF-8 characters against character classes, 78 // but wrap everything inside AB. 79 TEST(InterestingUTF8, AB) { 80 vector<string> atoms = Split(" ", 81 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B " 82 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] " 83 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] " 84 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]"); 85 vector<string> ops; // no ops 86 vector<string> alpha = InterestingUTF8(); 87 for (int i = 0; i < alpha.size(); i++) 88 alpha[i] = "a" + alpha[i] + "b"; 89 ExhaustiveTest(1, 0, atoms, ops, 90 1, alpha, "a%sb", ""); 91 } 92 93 } // namespace re2 94 95