1 // Copyright 2006 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Test character class manipulations. 6 7 #include "util/test.h" 8 #include "re2/regexp.h" 9 10 namespace re2 { 11 12 struct CCTest { 13 struct { 14 Rune lo; 15 Rune hi; 16 } add[10]; 17 int remove; 18 struct { 19 Rune lo; 20 Rune hi; 21 } final[10]; 22 }; 23 24 static CCTest tests[] = { 25 { { { 10, 20 }, {-1} }, -1, 26 { { 10, 20 }, {-1} } }, 27 28 { { { 10, 20 }, { 20, 30 }, {-1} }, -1, 29 { { 10, 30 }, {-1} } }, 30 31 { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1, 32 { { 10, 40 }, {-1} } }, 33 34 { { { 0, 50 }, { 20, 30 }, {-1} }, -1, 35 { { 0, 50 }, {-1} } }, 36 37 { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1, 38 { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, 39 40 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1, 41 { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, 42 43 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1, 44 { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } }, 45 46 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1, 47 { { 5, 25 }, {-1} } }, 48 49 { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1, 50 { { 10, 23 }, {-1} } }, 51 52 // These check boundary cases during negation. 53 { { { 0, Runemax }, {-1} }, -1, 54 { { 0, Runemax }, {-1} } }, 55 56 { { { 0, 50 }, {-1} }, -1, 57 { { 0, 50 }, {-1} } }, 58 59 { { { 50, Runemax }, {-1} }, -1, 60 { { 50, Runemax }, {-1} } }, 61 62 // Check RemoveAbove. 63 { { { 50, Runemax }, {-1} }, 255, 64 { { 50, 255 }, {-1} } }, 65 66 { { { 50, Runemax }, {-1} }, 65535, 67 { { 50, 65535 }, {-1} } }, 68 69 { { { 50, Runemax }, {-1} }, Runemax, 70 { { 50, Runemax }, {-1} } }, 71 72 { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255, 73 { { 50, 60 }, { 250, 255 }, {-1} } }, 74 75 { { { 50, 60 }, {-1} }, 255, 76 { { 50, 60 }, {-1} } }, 77 78 { { { 350, 360 }, {-1} }, 255, 79 { {-1} } }, 80 81 { { {-1} }, 255, 82 { {-1} } }, 83 }; 84 85 template<class CharClass> 86 static void Broke(const char *desc, const CCTest* t, CharClass* cc) { 87 if (t == NULL) { 88 printf("\t%s:", desc); 89 } else { 90 printf("\n"); 91 printf("CharClass added: [%s]", desc); 92 for (int k = 0; t->add[k].lo >= 0; k++) 93 printf(" %d-%d", t->add[k].lo, t->add[k].hi); 94 printf("\n"); 95 if (t->remove >= 0) 96 printf("Removed > %d\n", t->remove); 97 printf("\twant:"); 98 for (int k = 0; t->final[k].lo >= 0; k++) 99 printf(" %d-%d", t->final[k].lo, t->final[k].hi); 100 printf("\n"); 101 printf("\thave:"); 102 } 103 104 for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it) 105 printf(" %d-%d", it->lo, it->hi); 106 printf("\n"); 107 } 108 109 bool ShouldContain(CCTest *t, int x) { 110 for (int j = 0; t->final[j].lo >= 0; j++) 111 if (t->final[j].lo <= x && x <= t->final[j].hi) 112 return true; 113 return false; 114 } 115 116 // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder. 117 118 CharClass* Negate(CharClass *cc) { 119 return cc->Negate(); 120 } 121 122 void Delete(CharClass* cc) { 123 cc->Delete(); 124 } 125 126 CharClassBuilder* Negate(CharClassBuilder* cc) { 127 CharClassBuilder* ncc = cc->Copy(); 128 ncc->Negate(); 129 return ncc; 130 } 131 132 void Delete(CharClassBuilder* cc) { 133 delete cc; 134 } 135 136 template<class CharClass> 137 bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) { 138 typename CharClass::iterator it = cc->begin(); 139 int size = 0; 140 for (int j = 0; t->final[j].lo >= 0; j++, ++it) { 141 if (it == cc->end() || 142 it->lo != t->final[j].lo || 143 it->hi != t->final[j].hi) { 144 Broke(desc, t, cc); 145 return false; 146 } 147 size += it->hi - it->lo + 1; 148 } 149 if (it != cc->end()) { 150 Broke(desc, t, cc); 151 return false; 152 } 153 if (cc->size() != size) { 154 Broke(desc, t, cc); 155 printf("wrong size: want %d have %d\n", size, cc->size()); 156 return false; 157 } 158 159 for (int j = 0; j < 101; j++) { 160 if (j == 100) 161 j = Runemax; 162 if (ShouldContain(t, j) != cc->Contains(j)) { 163 Broke(desc, t, cc); 164 printf("want contains(%d)=%d, got %d\n", 165 j, ShouldContain(t, j), cc->Contains(j)); 166 return false; 167 } 168 } 169 170 CharClass* ncc = Negate(cc); 171 for (int j = 0; j < 101; j++) { 172 if (j == 100) 173 j = Runemax; 174 if (ShouldContain(t, j) == ncc->Contains(j)) { 175 Broke(desc, t, cc); 176 Broke("ncc", NULL, ncc); 177 printf("want ncc contains(%d)!=%d, got %d\n", 178 j, ShouldContain(t, j), ncc->Contains(j)); 179 Delete(ncc); 180 return false; 181 } 182 if (ncc->size() != Runemax+1 - cc->size()) { 183 Broke(desc, t, cc); 184 Broke("ncc", NULL, ncc); 185 printf("ncc size should be %d is %d\n", 186 Runemax+1 - cc->size(), ncc->size()); 187 Delete(ncc); 188 return false; 189 } 190 } 191 Delete(ncc); 192 return true; 193 } 194 195 TEST(TestCharClassBuilder, Adds) { 196 int nfail = 0; 197 for (int i = 0; i < arraysize(tests); i++) { 198 CharClassBuilder ccb; 199 CCTest* t = &tests[i]; 200 for (int j = 0; t->add[j].lo >= 0; j++) 201 ccb.AddRange(t->add[j].lo, t->add[j].hi); 202 if (t->remove >= 0) 203 ccb.RemoveAbove(t->remove); 204 if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)")) 205 nfail++; 206 CharClass* cc = ccb.GetCharClass(); 207 if (!CorrectCC(cc, t, "before copy (CharClass)")) 208 nfail++; 209 cc->Delete(); 210 211 CharClassBuilder *ccb1 = ccb.Copy(); 212 if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)")) 213 nfail++; 214 cc = ccb.GetCharClass(); 215 if (!CorrectCC(cc, t, "after copy (CharClass)")) 216 nfail++; 217 cc->Delete(); 218 delete ccb1; 219 } 220 EXPECT_EQ(nfail, 0); 221 } 222 223 } // namespace re2 224