Home | History | Annotate | Download | only in coregrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- A simple sequence matching facility.                         ---*/
      4 /*---                                                 m_seqmatch.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of Valgrind, a dynamic binary instrumentation
      9    framework.
     10 
     11    Copyright (C) 2008-2013 OpenWorks Ltd
     12       info (at) open-works.co.uk
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_core_basics.h"
     33 #include "pub_core_libcassert.h"
     34 #include "pub_core_libcbase.h"    // VG_(strlen)
     35 #include "pub_core_seqmatch.h"    // self
     36 
     37 /* ---------------------------------------------------------------------
     38    A simple sequence matching facility
     39    ------------------------------------------------------------------ */
     40 
     41 /* See detailed comment in include/pub_tool_seqmatch.h about this. */
     42 Bool VG_(generic_match) (
     43         Bool matchAll,
     44         const void* patt,  SizeT szbPatt,  UWord nPatt,  UWord ixPatt,
     45         const void* input, SizeT szbInput, UWord nInput, UWord ixInput,
     46         Bool (*pIsStar)(const void*),
     47         Bool (*pIsQuery)(const void*),
     48         Bool (*pattEQinp)(const void*,const void*,void*,UWord),
     49         void* inputCompleter
     50      )
     51 {
     52    /* This is the spec, written in my favourite formal specification
     53       language.  It specifies non-greedy matching of '*'s.
     54 
     55       ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
     56       ma ('*':ps) []     = ma ps []
     57 
     58       ma ('?':ps) (i:is) = ma ps is
     59       ma ('?':ps) []     = False
     60 
     61       ma (p:ps)   (i:is) = p == i && ma ps is
     62 
     63       ma (p:ps)   []     = False
     64       ma []       (i:is) = False -- m-all, True for m-prefix
     65       ma []       []     = True
     66    */
     67    Bool  havePatt, haveInput;
     68    const HChar *currPatt, *currInput;
     69   tailcall:
     70    vg_assert(nPatt >= 0   && nPatt  < 1000000); /* arbitrary */
     71    vg_assert(nInput >= 0  && nInput < 1000000); /* arbitrary */
     72    vg_assert(ixPatt >= 0  && ixPatt <= nPatt);
     73    vg_assert(ixInput >= 0 && ixInput <= nInput);
     74 
     75    havePatt  = ixPatt < nPatt;
     76    haveInput = ixInput < nInput;
     77 
     78    /* No specific need to set NULL when !have{Patt,Input}, but guards
     79       against inadvertantly dereferencing an out of range pointer to
     80       the pattern or input arrays. */
     81    currPatt  = havePatt  ? ((const HChar*)patt) + szbPatt * ixPatt    : NULL;
     82    currInput = haveInput ? ((const HChar*)input) + szbInput * ixInput : NULL;
     83 
     84    // Deal with the complex case first: wildcards.  Do frugal
     85    // matching.  When encountering a '*', first skip no characters
     86    // at all, and see if the rest of the match still works.  Only if
     87    // that fails do we then skip a character, and retry at the next
     88    // position.
     89    //
     90    // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
     91    //
     92    // If we're out of input, check the rest of the pattern matches
     93    // the empty input.  This really means can only be be empty or
     94    // composed entirely of '*'s.
     95    //
     96    // ma ('*':ps) []     = ma ps []
     97    //
     98    if (havePatt && pIsStar(currPatt)) {
     99       if (haveInput) {
    100          // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
    101          // we unavoidably have to make a real recursive call for the
    102          // first half of the OR, since this isn't straight tail-recursion.
    103          if (VG_(generic_match)( matchAll,
    104                                  patt, szbPatt, nPatt,  ixPatt+1,
    105                                  input,szbInput,nInput, ixInput+0,
    106                                  pIsStar,pIsQuery,pattEQinp,
    107                                  inputCompleter) ) {
    108             return True;
    109          }
    110          // but we can tail-recurse for the second call
    111          ixInput++; goto tailcall;
    112       } else {
    113          // ma ('*':ps) []     = ma ps []
    114          ixPatt++; goto tailcall;
    115       }
    116    }
    117 
    118    // simpler cases now.  Deal with '?' wildcards.
    119    //
    120    // ma ('?':ps) (i:is) = ma ps is
    121    // ma ('?':ps) []     = False
    122    if (havePatt && pIsQuery(currPatt)) {
    123       if (haveInput) {
    124          ixPatt++; ixInput++; goto tailcall;
    125       } else {
    126          return False;
    127       }
    128    }
    129 
    130    // obvious case with literal chars in the pattern
    131    //
    132    // ma (p:ps)   (i:is) = p == i && ma ps is
    133    if (havePatt && haveInput) {
    134       if (!pattEQinp(currPatt,currInput,inputCompleter,ixInput)) return False;
    135       ixPatt++; ixInput++; goto tailcall;
    136    }
    137 
    138    // if we run out of input before we run out of pattern, we must fail
    139    // ma (p:ps)   []     = False
    140    if (havePatt && !haveInput) return False;
    141 
    142    // if we run out of pattern before we run out of input, the
    143    // verdict depends on the matching mode.  If we are trying to
    144    // match exactly (the pattern must consume the entire input)
    145    // then the outcome is failure.  However, if we're merely attempting
    146    // to match some prefix of the input, then we have been successful.
    147    //
    148    // ma []       (i:is) = False -- m-all, True for m-prefix
    149    if (!havePatt && haveInput) {
    150       return matchAll ? False // match-all
    151                       : True; // match-prefix
    152    }
    153 
    154    // finally, if both sequence and input are both completely
    155    // consumed, then we were successful, regardless of matching mode.
    156    if (!havePatt && !haveInput) return True;
    157 
    158    // end of cases
    159    vg_assert(0);
    160 }
    161 
    162 
    163 /* And a parameterization of the above, to make it do
    164    string matching.
    165 */
    166 static Bool charIsStar  ( const void* pV ) { return *(const HChar*)pV == '*'; }
    167 static Bool charIsQuery ( const void* pV ) { return *(const HChar*)pV == '?'; }
    168 static Bool char_p_EQ_i ( const void* pV, const void* cV,
    169                           void* null_completer, UWord ixcV ) {
    170    HChar p = *(const HChar*)pV;
    171    HChar c = *(const HChar*)cV;
    172    vg_assert(p != '*' && p != '?');
    173    return p == c;
    174 }
    175 Bool VG_(string_match) ( const HChar* patt, const HChar* input )
    176 {
    177    return VG_(generic_match)(
    178              True/* match-all */,
    179              patt,  sizeof(HChar), VG_(strlen)(patt), 0,
    180              input, sizeof(HChar), VG_(strlen)(input), 0,
    181              charIsStar, charIsQuery, char_p_EQ_i,
    182              NULL
    183           );
    184 }
    185 
    186 
    187 // test cases for the matcher (in match-all mode)
    188 // typedef struct { char* patt; char* input; Bool xres; } Test;
    189 //
    190 //static Test tests[] =
    191 //  {
    192 //    { ""          ,""   , True },
    193 //    { "a"         ,""   , False },
    194 //    { "a"         ,"b"  , False },
    195 //    { "a"         ,"a"  , True },
    196 //    { "a"         ,"aa" , False },
    197 //    { "*"         ,""   , True },
    198 //    { "**"        ,""   , True },
    199 //    { "*"         ,"abc", True },
    200 //    { "*a"        ,"abc", False },
    201 //    { "*b"        ,"abc", False },
    202 //    { "*bc"       ,"abc", True },
    203 //    { "a*b"       ,"abc", False },
    204 //    { "a*c"       ,"abc", True },
    205 //    { "*c"        ,"abc", True },
    206 //    { "c*c"       ,"abc", False },
    207 //    { "abc*"      ,"abc", True },
    208 //    { "abc**"     ,"abc", True },
    209 //    { "**abc"     ,"abc", True },
    210 //    { "**a*b*c**" ,"abc", True },
    211 //    { "**a*b*d**" ,"abc", False },
    212 //    { "a?b"       ,"abc", False },
    213 //    { "a?c"       ,"abc", True },
    214 //    { "?"         ,""   , False },
    215 //    { "?"         ,"a"  , True },
    216 //    { "?"         ,"ab" , False },
    217 //    { "abcd"      ,"abc", False },
    218 //    { "ab"        ,"abc", False },
    219 //    { NULL        ,NULL , False }
    220 //  };
    221 //
    222 //int main ( void )
    223 //{
    224 //   Test* t;
    225 //   for (t = tests; t->patt; t++) {
    226 //     printf("%10s %6s  %s\n",
    227 //            t->patt, t->input,
    228 //            match_string_all((UChar*)t->patt,(UChar*)t->input,True)
    229 //            == t->xres
    230 //               ? "pass" : "FAIL" );
    231 //   }
    232 //   return 0;
    233 //}
    234 
    235 /*--------------------------------------------------------------------*/
    236 /*--- end                                             m_seqmatch.c ---*/
    237 /*--------------------------------------------------------------------*/
    238