Home | History | Annotate | Download | only in util
      1 /* $Revision: 1.2 $
      2  **
      3  ** Do shell-style pattern matching for ?, \, [], and * characters.
      4  ** Might not be robust in face of malformed patterns; e.g., "foo[a-"
      5  ** could cause a segmentation violation. It is 8bit clean.
      6  **
      7  ** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
      8  ** Rich $alz is now <rsalz (at) osf.org>.
      9  ** April, 1991: Replaced mutually-recursive calls with in-line code
     10  ** for the star character.
     11  **
     12  ** Special thanks to Lars Mathiesen <thorinn (at) diku.dk> for the ABORT code.
     13  ** This can greatly speed up failing wildcard patterns. For example:
     14  ** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
     15  ** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
     16  ** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
     17  ** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
     18  ** the ABORT code, it takes 22310 calls to fail. Ugh. The following
     19  ** explanation is from Lars:
     20  ** The precondition that must be fulfilled is that DoMatch will consume
     21  ** at least one character in text. This is true if *p is neither '*' nor
     22  ** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic
     23  ** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx".  With
     24  ** FALSE, each star-loop has to run to the end of the text; with ABORT
     25  ** only the last one does.
     26  **
     27  ** Once the control of one instance of DoMatch enters the star-loop, that
     28  ** instance will return either TRUE or ABORT, and any calling instance
     29  ** will therefore return immediately after (without calling recursively
     30  ** again). In effect, only one star-loop is ever active. It would be
     31  ** possible to modify the code to maintain this context explicitly,
     32  ** eliminating all recursive calls at the cost of some complication and
     33  ** loss of clarity (and the ABORT stuff seems to be unclear enough by
     34  ** itself). I think it would be unwise to try to get this into a
     35  ** released version unless you have a good test data base to try it
     36  ** out on.
     37  */
     38 
     39 #include "cs_config.h"
     40 
     41 #include <ctype.h>
     42 #include "neo_misc.h"
     43 
     44 #define ABORT -1
     45 
     46 
     47  /* What character marks an inverted character class? */
     48 #define NEGATE_CLASS '^'
     49  /* Is "*" a common pattern? */
     50 #define OPTIMIZE_JUST_STAR
     51  /* Do tar(1) matching rules, which ignore a trailing slash? */
     52 #undef MATCH_TAR_PATTERN
     53 
     54 
     55 /*
     56  ** Match text and p, return TRUE, FALSE, or ABORT.
     57  */
     58   static int
     59 DoMatch(register const char *text, register const char *p)
     60 {
     61   register int last;
     62   register int matched;
     63   register int reverse;
     64 
     65   for ( ; *p; text++, p++) {
     66     if (*text == '\0' && *p != '*')
     67       return ABORT;
     68     switch (*p) {
     69       case '\\':
     70 	/* Literal match with following character. */
     71 	p++;
     72 	/* FALLTHROUGH */
     73       default:
     74 	if (*text != *p)
     75 	  return FALSE;
     76 	continue;
     77       case '?':
     78 	/* Match anything. */
     79 	continue;
     80       case '*':
     81 	while (*++p == '*')
     82 	  /* Consecutive stars act just like one. */
     83 	  continue;
     84 	if (*p == '\0')
     85 	  /* Trailing star matches everything. */
     86 	  return TRUE;
     87 	while (*text)
     88 	  if ((matched = DoMatch(text++, p)) != FALSE)
     89 	    return matched;
     90 	return ABORT;
     91       case '[':
     92 	reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
     93 	if (reverse)
     94 	  /* Inverted character class. */
     95 	  p++;
     96 	matched = FALSE;
     97 	if (p[1] == ']' || p[1] == '-')
     98 	  if (*++p == *text)
     99 	    matched = TRUE;
    100 	for (last = *p; *++p && *p != ']'; last = *p)
    101 	  /* This next line requires a good C compiler. */
    102 	  if (*p == '-' && p[1] != ']'
    103 	      ? *text <= *++p && *text >= last : *text == *p)
    104 	    matched = TRUE;
    105 	if (matched == reverse)
    106 	  return FALSE;
    107 	continue;
    108     }
    109   }
    110 
    111 #ifdef MATCH_TAR_PATTERN
    112   if (*text == '/')
    113     return TRUE;
    114 #endif /* MATCH_TAR_ATTERN */
    115   return *text == '\0';
    116 }
    117 
    118 
    119 /*
    120  ** Match text and p, return TRUE, FALSE, or ABORT.
    121  */
    122 static int
    123 DoMatchCaseInsensitive(register const char *text, register const char *p)
    124 {
    125   register int last;
    126   register int matched;
    127   register int reverse;
    128 
    129   for ( ; *p; text++, p++) {
    130     if (*text == '\0' && *p != '*')
    131       return ABORT;
    132     switch (*p) {
    133       case '\\':
    134 	/* Literal match with following character. */
    135 	p++;
    136 	/* FALLTHROUGH */
    137       default:
    138 	if (toupper(*text) != toupper(*p))
    139 	  return FALSE;
    140 	continue;
    141       case '?':
    142 	/* Match anything. */
    143 	continue;
    144       case '*':
    145 	while (*++p == '*')
    146 	  /* Consecutive stars act just like one. */
    147 	  continue;
    148 	if (*p == '\0')
    149 	  /* Trailing star matches everything. */
    150 	  return TRUE;
    151 	while (*text)
    152 	  if ((matched = DoMatchCaseInsensitive(text++, p)) != FALSE)
    153 	    return matched;
    154 	return ABORT;
    155       case '[':
    156 	reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
    157 	if (reverse)
    158 	  /* Inverted character class. */
    159 	  p++;
    160 	matched = FALSE;
    161 	if (p[1] == ']' || p[1] == '-')
    162 	  if (toupper(*++p) == toupper(*text))
    163 	    matched = TRUE;
    164 	for (last = toupper(*p); *++p && *p != ']'; last = toupper(*p))
    165 	  /* This next line requires a good C compiler. */
    166 	  if (*p == '-' && p[1] != ']'
    167 	      ? toupper(*text) <= toupper(*++p) && toupper(*text) >= last : toupper(*text) == toupper(*p))
    168 	    matched = TRUE;
    169 	if (matched == reverse)
    170 	  return FALSE;
    171 	continue;
    172     }
    173   }
    174 
    175 #ifdef MATCH_TAR_PATTERN
    176   if (*text == '/')
    177     return TRUE;
    178 #endif /* MATCH_TAR_ATTERN */
    179   return *text == '\0';
    180 }
    181 
    182 
    183 /*
    184  ** User-level routine. Returns TRUE or FALSE.
    185  */
    186 int
    187 wildmat(const char *text, const char *p)
    188 {
    189 #ifdef OPTIMIZE_JUST_STAR
    190   if (p[0] == '*' && p[1] == '\0')
    191     return TRUE;
    192 #endif /* OPTIMIZE_JUST_STAR */
    193   return DoMatch(text, p) == TRUE;
    194 }
    195 
    196 /*
    197  ** User-level routine. Returns TRUE or FALSE.
    198  */
    199 int
    200 wildmatcase(const char *text, const char *p)
    201 {
    202 #ifdef OPTIMIZE_JUST_STAR
    203   if (p[0] == '*' && p[1] == '\0')
    204     return TRUE;
    205 #endif /* OPTIMIZE_JUST_STAR */
    206   return DoMatchCaseInsensitive(text, p) == TRUE;
    207 }
    208