1 2 /*--------------------------------------------------------------------*/ 3 /*--- A simple sequence matching facility. ---*/ 4 /*--- m_seqmatch.c ---*/ 5 /*--------------------------------------------------------------------*/ 6 7 /* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2008-2011 OpenWorks Ltd 12 info (at) open-works.co.uk 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30 */ 31 32 #include "pub_core_basics.h" 33 #include "pub_core_libcassert.h" 34 #include "pub_core_libcbase.h" // VG_(strlen) 35 #include "pub_core_seqmatch.h" // self 36 37 /* --------------------------------------------------------------------- 38 A simple sequence matching facility 39 ------------------------------------------------------------------ */ 40 41 /* See detailed comment in include/pub_tool_seqmatch.h about this. */ 42 Bool VG_(generic_match) ( 43 Bool matchAll, 44 void* patt, SizeT szbPatt, UWord nPatt, UWord ixPatt, 45 void* input, SizeT szbInput, UWord nInput, UWord ixInput, 46 Bool (*pIsStar)(void*), 47 Bool (*pIsQuery)(void*), 48 Bool (*pattEQinp)(void*,void*) 49 ) 50 { 51 /* This is the spec, written in my favourite formal specification 52 language. It specifies non-greedy matching of '*'s. 53 54 ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is 55 ma ('*':ps) [] = ma ps [] 56 57 ma ('?':ps) (i:is) = ma ps is 58 ma ('?':ps) [] = False 59 60 ma (p:ps) (i:is) = p == i && ma ps is 61 62 ma (p:ps) [] = False 63 ma [] (i:is) = False -- m-all, True for m-prefix 64 ma [] [] = True 65 */ 66 Bool havePatt, haveInput; 67 void *currPatt, *currInput; 68 tailcall: 69 vg_assert(nPatt >= 0 && nPatt < 1000000); /* arbitrary */ 70 vg_assert(nInput >= 0 && nInput < 1000000); /* arbitrary */ 71 vg_assert(ixPatt >= 0 && ixPatt <= nPatt); 72 vg_assert(ixInput >= 0 && ixInput <= nInput); 73 74 havePatt = ixPatt < nPatt; 75 haveInput = ixInput < nInput; 76 77 /* No specific need to set NULL when !have{Patt,Input}, but guards 78 against inadvertantly dereferencing an out of range pointer to 79 the pattern or input arrays. */ 80 currPatt = havePatt ? ((Char*)patt) + szbPatt * ixPatt : NULL; 81 currInput = haveInput ? ((Char*)input) + szbInput * ixInput : NULL; 82 83 // Deal with the complex case first: wildcards. Do frugal 84 // matching. When encountering a '*', first skip no characters 85 // at all, and see if the rest of the match still works. Only if 86 // that fails do we then skip a character, and retry at the next 87 // position. 88 // 89 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is 90 // 91 // If we're out of input, check the rest of the pattern matches 92 // the empty input. This really means can only be be empty or 93 // composed entirely of '*'s. 94 // 95 // ma ('*':ps) [] = ma ps [] 96 // 97 if (havePatt && pIsStar(currPatt)) { 98 if (haveInput) { 99 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is 100 // we unavoidably have to make a real recursive call for the 101 // first half of the OR, since this isn't straight tail-recursion. 102 if (VG_(generic_match)( matchAll, 103 patt, szbPatt, nPatt, ixPatt+1, 104 input,szbInput,nInput, ixInput+0, 105 pIsStar,pIsQuery,pattEQinp) ) { 106 return True; 107 } 108 // but we can tail-recurse for the second call 109 ixInput++; goto tailcall; 110 } else { 111 // ma ('*':ps) [] = ma ps [] 112 ixPatt++; goto tailcall; 113 } 114 } 115 116 // simpler cases now. Deal with '?' wildcards. 117 // 118 // ma ('?':ps) (i:is) = ma ps is 119 // ma ('?':ps) [] = False 120 if (havePatt && pIsQuery(currPatt)) { 121 if (haveInput) { 122 ixPatt++; ixInput++; goto tailcall; 123 } else { 124 return False; 125 } 126 } 127 128 // obvious case with literal chars in the pattern 129 // 130 // ma (p:ps) (i:is) = p == i && ma ps is 131 if (havePatt && haveInput) { 132 if (!pattEQinp(currPatt,currInput)) return False; 133 ixPatt++; ixInput++; goto tailcall; 134 } 135 136 // if we run out of input before we run out of pattern, we must fail 137 // ma (p:ps) [] = False 138 if (havePatt && !haveInput) return False; 139 140 // if we run out of pattern before we run out of input, the 141 // verdict depends on the matching mode. If we are trying to 142 // match exactly (the pattern must consume the entire input) 143 // then the outcome is failure. However, if we're merely attempting 144 // to match some prefix of the input, then we have been successful. 145 // 146 // ma [] (i:is) = False -- m-all, True for m-prefix 147 if (!havePatt && haveInput) { 148 return matchAll ? False // match-all 149 : True; // match-prefix 150 } 151 152 // finally, if both sequence and input are both completely 153 // consumed, then we were successful, regardless of matching mode. 154 if (!havePatt && !haveInput) return True; 155 156 // end of cases 157 vg_assert(0); 158 } 159 160 161 /* And a parameterization of the above, to make it do 162 string matching. 163 */ 164 static Bool charIsStar ( void* pV ) { return *(Char*)pV == '*'; } 165 static Bool charIsQuery ( void* pV ) { return *(Char*)pV == '?'; } 166 static Bool char_p_EQ_i ( void* pV, void* cV ) { 167 Char p = *(Char*)pV; 168 Char c = *(Char*)cV; 169 vg_assert(p != '*' && p != '?'); 170 return p == c; 171 } 172 Bool VG_(string_match) ( const Char* patt, const Char* input ) 173 { 174 return VG_(generic_match)( 175 True/* match-all */, 176 (void*)patt, sizeof(UChar), VG_(strlen)(patt), 0, 177 (void*)input, sizeof(UChar), VG_(strlen)(input), 0, 178 charIsStar, charIsQuery, char_p_EQ_i 179 ); 180 } 181 182 183 // test cases for the matcher (in match-all mode) 184 // typedef struct { char* patt; char* input; Bool xres; } Test; 185 // 186 //static Test tests[] = 187 // { 188 // { "" ,"" , True }, 189 // { "a" ,"" , False }, 190 // { "a" ,"b" , False }, 191 // { "a" ,"a" , True }, 192 // { "a" ,"aa" , False }, 193 // { "*" ,"" , True }, 194 // { "**" ,"" , True }, 195 // { "*" ,"abc", True }, 196 // { "*a" ,"abc", False }, 197 // { "*b" ,"abc", False }, 198 // { "*bc" ,"abc", True }, 199 // { "a*b" ,"abc", False }, 200 // { "a*c" ,"abc", True }, 201 // { "*c" ,"abc", True }, 202 // { "c*c" ,"abc", False }, 203 // { "abc*" ,"abc", True }, 204 // { "abc**" ,"abc", True }, 205 // { "**abc" ,"abc", True }, 206 // { "**a*b*c**" ,"abc", True }, 207 // { "**a*b*d**" ,"abc", False }, 208 // { "a?b" ,"abc", False }, 209 // { "a?c" ,"abc", True }, 210 // { "?" ,"" , False }, 211 // { "?" ,"a" , True }, 212 // { "?" ,"ab" , False }, 213 // { "abcd" ,"abc", False }, 214 // { "ab" ,"abc", False }, 215 // { NULL ,NULL , False } 216 // }; 217 // 218 //int main ( void ) 219 //{ 220 // Test* t; 221 // for (t = tests; t->patt; t++) { 222 // printf("%10s %6s %s\n", 223 // t->patt, t->input, 224 // match_string_all((UChar*)t->patt,(UChar*)t->input,True) 225 // == t->xres 226 // ? "pass" : "FAIL" ); 227 // } 228 // return 0; 229 //} 230 231 /*--------------------------------------------------------------------*/ 232 /*--- end m_seqmatch.c ---*/ 233 /*--------------------------------------------------------------------*/ 234