Home | History | Annotate | Download | only in lib
      1 /***************************************************************************
      2  *                                  _   _ ____  _
      3  *  Project                     ___| | | |  _ \| |
      4  *                             / __| | | | |_) | |
      5  *                            | (__| |_| |  _ <| |___
      6  *                             \___|\___/|_| \_\_____|
      7  *
      8  * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel (at) haxx.se>, et al.
      9  *
     10  * This software is licensed as described in the file COPYING, which
     11  * you should have received as part of this distribution. The terms
     12  * are also available at https://curl.haxx.se/docs/copyright.html.
     13  *
     14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
     15  * copies of the Software, and permit persons to whom the Software is
     16  * furnished to do so, under the terms of the COPYING file.
     17  *
     18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
     19  * KIND, either express or implied.
     20  *
     21  ***************************************************************************/
     22 
     23 #include "curl_setup.h"
     24 
     25 #include <curl/curl.h>
     26 
     27 #include "curl_fnmatch.h"
     28 #include "curl_memory.h"
     29 
     30 /* The last #include file should be: */
     31 #include "memdebug.h"
     32 
     33 #define CURLFNM_CHARSET_LEN (sizeof(char) * 256)
     34 #define CURLFNM_CHSET_SIZE (CURLFNM_CHARSET_LEN + 15)
     35 
     36 #define CURLFNM_NEGATE  CURLFNM_CHARSET_LEN
     37 
     38 #define CURLFNM_ALNUM   (CURLFNM_CHARSET_LEN + 1)
     39 #define CURLFNM_DIGIT   (CURLFNM_CHARSET_LEN + 2)
     40 #define CURLFNM_XDIGIT  (CURLFNM_CHARSET_LEN + 3)
     41 #define CURLFNM_ALPHA   (CURLFNM_CHARSET_LEN + 4)
     42 #define CURLFNM_PRINT   (CURLFNM_CHARSET_LEN + 5)
     43 #define CURLFNM_BLANK   (CURLFNM_CHARSET_LEN + 6)
     44 #define CURLFNM_LOWER   (CURLFNM_CHARSET_LEN + 7)
     45 #define CURLFNM_GRAPH   (CURLFNM_CHARSET_LEN + 8)
     46 #define CURLFNM_SPACE   (CURLFNM_CHARSET_LEN + 9)
     47 #define CURLFNM_UPPER   (CURLFNM_CHARSET_LEN + 10)
     48 
     49 typedef enum {
     50   CURLFNM_LOOP_DEFAULT = 0,
     51   CURLFNM_LOOP_BACKSLASH
     52 } loop_state;
     53 
     54 typedef enum {
     55   CURLFNM_SCHS_DEFAULT = 0,
     56   CURLFNM_SCHS_MAYRANGE,
     57   CURLFNM_SCHS_MAYRANGE2,
     58   CURLFNM_SCHS_RIGHTBR,
     59   CURLFNM_SCHS_RIGHTBRLEFTBR
     60 } setcharset_state;
     61 
     62 typedef enum {
     63   CURLFNM_PKW_INIT = 0,
     64   CURLFNM_PKW_DDOT
     65 } parsekey_state;
     66 
     67 #define SETCHARSET_OK     1
     68 #define SETCHARSET_FAIL   0
     69 
     70 static int parsekeyword(unsigned char **pattern, unsigned char *charset)
     71 {
     72   parsekey_state state = CURLFNM_PKW_INIT;
     73 #define KEYLEN 10
     74   char keyword[KEYLEN] = { 0 };
     75   int found = FALSE;
     76   int i;
     77   unsigned char *p = *pattern;
     78   for(i = 0; !found; i++) {
     79     char c = *p++;
     80     if(i >= KEYLEN)
     81       return SETCHARSET_FAIL;
     82     switch(state) {
     83     case CURLFNM_PKW_INIT:
     84       if(ISALPHA(c) && ISLOWER(c))
     85         keyword[i] = c;
     86       else if(c == ':')
     87         state = CURLFNM_PKW_DDOT;
     88       else
     89         return 0;
     90       break;
     91     case CURLFNM_PKW_DDOT:
     92       if(c == ']')
     93         found = TRUE;
     94       else
     95         return SETCHARSET_FAIL;
     96     }
     97   }
     98 #undef KEYLEN
     99 
    100   *pattern = p; /* move caller's pattern pointer */
    101   if(strcmp(keyword, "digit") == 0)
    102     charset[CURLFNM_DIGIT] = 1;
    103   else if(strcmp(keyword, "alnum") == 0)
    104     charset[CURLFNM_ALNUM] = 1;
    105   else if(strcmp(keyword, "alpha") == 0)
    106     charset[CURLFNM_ALPHA] = 1;
    107   else if(strcmp(keyword, "xdigit") == 0)
    108     charset[CURLFNM_XDIGIT] = 1;
    109   else if(strcmp(keyword, "print") == 0)
    110     charset[CURLFNM_PRINT] = 1;
    111   else if(strcmp(keyword, "graph") == 0)
    112     charset[CURLFNM_GRAPH] = 1;
    113   else if(strcmp(keyword, "space") == 0)
    114     charset[CURLFNM_SPACE] = 1;
    115   else if(strcmp(keyword, "blank") == 0)
    116     charset[CURLFNM_BLANK] = 1;
    117   else if(strcmp(keyword, "upper") == 0)
    118     charset[CURLFNM_UPPER] = 1;
    119   else if(strcmp(keyword, "lower") == 0)
    120     charset[CURLFNM_LOWER] = 1;
    121   else
    122     return SETCHARSET_FAIL;
    123   return SETCHARSET_OK;
    124 }
    125 
    126 /* returns 1 (true) if pattern is OK, 0 if is bad ("p" is pattern pointer) */
    127 static int setcharset(unsigned char **p, unsigned char *charset)
    128 {
    129   setcharset_state state = CURLFNM_SCHS_DEFAULT;
    130   unsigned char rangestart = 0;
    131   unsigned char lastchar   = 0;
    132   bool something_found = FALSE;
    133   unsigned char c;
    134   for(;;) {
    135     c = **p;
    136     if(!c)
    137       return SETCHARSET_FAIL;
    138 
    139     switch(state) {
    140     case CURLFNM_SCHS_DEFAULT:
    141       if(ISALNUM(c)) { /* ASCII value */
    142         rangestart = c;
    143         charset[c] = 1;
    144         (*p)++;
    145         state = CURLFNM_SCHS_MAYRANGE;
    146         something_found = TRUE;
    147       }
    148       else if(c == ']') {
    149         if(something_found)
    150           return SETCHARSET_OK;
    151         something_found = TRUE;
    152         state = CURLFNM_SCHS_RIGHTBR;
    153         charset[c] = 1;
    154         (*p)++;
    155       }
    156       else if(c == '[') {
    157         char c2 = *((*p) + 1);
    158         if(c2 == ':') { /* there has to be a keyword */
    159           (*p) += 2;
    160           if(parsekeyword(p, charset)) {
    161             state = CURLFNM_SCHS_DEFAULT;
    162           }
    163           else
    164             return SETCHARSET_FAIL;
    165         }
    166         else {
    167           charset[c] = 1;
    168           (*p)++;
    169         }
    170         something_found = TRUE;
    171       }
    172       else if(c == '?' || c == '*') {
    173         something_found = TRUE;
    174         charset[c] = 1;
    175         (*p)++;
    176       }
    177       else if(c == '^' || c == '!') {
    178         if(!something_found) {
    179           if(charset[CURLFNM_NEGATE]) {
    180             charset[c] = 1;
    181             something_found = TRUE;
    182           }
    183           else
    184             charset[CURLFNM_NEGATE] = 1; /* negate charset */
    185         }
    186         else
    187           charset[c] = 1;
    188         (*p)++;
    189       }
    190       else if(c == '\\') {
    191         c = *(++(*p));
    192         if(ISPRINT((c))) {
    193           something_found = TRUE;
    194           state = CURLFNM_SCHS_MAYRANGE;
    195           charset[c] = 1;
    196           rangestart = c;
    197           (*p)++;
    198         }
    199         else
    200           return SETCHARSET_FAIL;
    201       }
    202       else {
    203         charset[c] = 1;
    204         (*p)++;
    205         something_found = TRUE;
    206       }
    207       break;
    208     case CURLFNM_SCHS_MAYRANGE:
    209       if(c == '-') {
    210         charset[c] = 1;
    211         (*p)++;
    212         lastchar = '-';
    213         state = CURLFNM_SCHS_MAYRANGE2;
    214       }
    215       else if(c == '[') {
    216         state = CURLFNM_SCHS_DEFAULT;
    217       }
    218       else if(ISALNUM(c)) {
    219         charset[c] = 1;
    220         (*p)++;
    221       }
    222       else if(c == '\\') {
    223         c = *(++(*p));
    224         if(ISPRINT(c)) {
    225           charset[c] = 1;
    226           (*p)++;
    227         }
    228         else
    229           return SETCHARSET_FAIL;
    230       }
    231       else if(c == ']') {
    232         return SETCHARSET_OK;
    233       }
    234       else
    235         return SETCHARSET_FAIL;
    236       break;
    237     case CURLFNM_SCHS_MAYRANGE2:
    238       if(c == ']') {
    239         return SETCHARSET_OK;
    240       }
    241       else if(c == '\\') {
    242         c = *(++(*p));
    243         if(ISPRINT(c)) {
    244           charset[c] = 1;
    245           state = CURLFNM_SCHS_DEFAULT;
    246           (*p)++;
    247         }
    248         else
    249           return SETCHARSET_FAIL;
    250       }
    251       else if(c >= rangestart) {
    252         if((ISLOWER(c) && ISLOWER(rangestart)) ||
    253            (ISDIGIT(c) && ISDIGIT(rangestart)) ||
    254            (ISUPPER(c) && ISUPPER(rangestart))) {
    255           charset[lastchar] = 0;
    256           rangestart++;
    257           while(rangestart++ <= c)
    258             charset[rangestart-1] = 1;
    259           (*p)++;
    260           state = CURLFNM_SCHS_DEFAULT;
    261         }
    262         else
    263           return SETCHARSET_FAIL;
    264       }
    265       else
    266         return SETCHARSET_FAIL;
    267       break;
    268     case CURLFNM_SCHS_RIGHTBR:
    269       if(c == '[') {
    270         state = CURLFNM_SCHS_RIGHTBRLEFTBR;
    271         charset[c] = 1;
    272         (*p)++;
    273       }
    274       else if(c == ']') {
    275         return SETCHARSET_OK;
    276       }
    277       else if(ISPRINT(c)) {
    278         charset[c] = 1;
    279         (*p)++;
    280         state = CURLFNM_SCHS_DEFAULT;
    281       }
    282       else
    283         /* used 'goto fail' instead of 'return SETCHARSET_FAIL' to avoid a
    284          * nonsense warning 'statement not reached' at end of the fnc when
    285          * compiling on Solaris */
    286         goto fail;
    287       break;
    288     case CURLFNM_SCHS_RIGHTBRLEFTBR:
    289       if(c == ']') {
    290         return SETCHARSET_OK;
    291       }
    292       else {
    293         state  = CURLFNM_SCHS_DEFAULT;
    294         charset[c] = 1;
    295         (*p)++;
    296       }
    297       break;
    298     }
    299   }
    300 fail:
    301   return SETCHARSET_FAIL;
    302 }
    303 
    304 static int loop(const unsigned char *pattern, const unsigned char *string,
    305                 int maxstars)
    306 {
    307   loop_state state = CURLFNM_LOOP_DEFAULT;
    308   unsigned char *p = (unsigned char *)pattern;
    309   unsigned char *s = (unsigned char *)string;
    310   unsigned char charset[CURLFNM_CHSET_SIZE] = { 0 };
    311   int rc = 0;
    312 
    313   for(;;) {
    314     switch(state) {
    315     case CURLFNM_LOOP_DEFAULT:
    316       if(*p == '*') {
    317         if(!maxstars)
    318           return CURL_FNMATCH_NOMATCH;
    319         while(*(p + 1) == '*') /* eliminate multiple stars */
    320           p++;
    321         if(*s == '\0' && *(p + 1) == '\0')
    322           return CURL_FNMATCH_MATCH;
    323         rc = loop(p + 1, s, maxstars - 1); /* *.txt matches .txt <=>
    324                                               .txt matches .txt */
    325         if(rc == CURL_FNMATCH_MATCH)
    326           return CURL_FNMATCH_MATCH;
    327         if(*s) /* let the star eat up one character */
    328           s++;
    329         else
    330           return CURL_FNMATCH_NOMATCH;
    331       }
    332       else if(*p == '?') {
    333         if(ISPRINT(*s)) {
    334           s++;
    335           p++;
    336         }
    337         else if(*s == '\0')
    338           return CURL_FNMATCH_NOMATCH;
    339         else
    340           return CURL_FNMATCH_FAIL; /* cannot deal with other character */
    341       }
    342       else if(*p == '\0') {
    343         if(*s == '\0')
    344           return CURL_FNMATCH_MATCH;
    345         return CURL_FNMATCH_NOMATCH;
    346       }
    347       else if(*p == '\\') {
    348         state = CURLFNM_LOOP_BACKSLASH;
    349         p++;
    350       }
    351       else if(*p == '[') {
    352         unsigned char *pp = p + 1; /* cannot handle with pointer to register */
    353         if(setcharset(&pp, charset)) {
    354           int found = FALSE;
    355           if(charset[(unsigned int)*s])
    356             found = TRUE;
    357           else if(charset[CURLFNM_ALNUM])
    358             found = ISALNUM(*s);
    359           else if(charset[CURLFNM_ALPHA])
    360             found = ISALPHA(*s);
    361           else if(charset[CURLFNM_DIGIT])
    362             found = ISDIGIT(*s);
    363           else if(charset[CURLFNM_XDIGIT])
    364             found = ISXDIGIT(*s);
    365           else if(charset[CURLFNM_PRINT])
    366             found = ISPRINT(*s);
    367           else if(charset[CURLFNM_SPACE])
    368             found = ISSPACE(*s);
    369           else if(charset[CURLFNM_UPPER])
    370             found = ISUPPER(*s);
    371           else if(charset[CURLFNM_LOWER])
    372             found = ISLOWER(*s);
    373           else if(charset[CURLFNM_BLANK])
    374             found = ISBLANK(*s);
    375           else if(charset[CURLFNM_GRAPH])
    376             found = ISGRAPH(*s);
    377 
    378           if(charset[CURLFNM_NEGATE])
    379             found = !found;
    380 
    381           if(found) {
    382             p = pp + 1;
    383             if(*s)
    384               /* don't advance if we're matching on an empty string */
    385               s++;
    386             memset(charset, 0, CURLFNM_CHSET_SIZE);
    387           }
    388           else
    389             return CURL_FNMATCH_NOMATCH;
    390         }
    391         else
    392           return CURL_FNMATCH_FAIL;
    393       }
    394       else {
    395         if(*p++ != *s++)
    396           return CURL_FNMATCH_NOMATCH;
    397       }
    398       break;
    399     case CURLFNM_LOOP_BACKSLASH:
    400       if(ISPRINT(*p)) {
    401         if(*p++ == *s++)
    402           state = CURLFNM_LOOP_DEFAULT;
    403         else
    404           return CURL_FNMATCH_NOMATCH;
    405       }
    406       else
    407         return CURL_FNMATCH_FAIL;
    408       break;
    409     }
    410   }
    411 }
    412 
    413 /*
    414  * @unittest: 1307
    415  */
    416 int Curl_fnmatch(void *ptr, const char *pattern, const char *string)
    417 {
    418   (void)ptr; /* the argument is specified by the curl_fnmatch_callback
    419                 prototype, but not used by Curl_fnmatch() */
    420   if(!pattern || !string) {
    421     return CURL_FNMATCH_FAIL;
    422   }
    423   return loop((unsigned char *)pattern, (unsigned char *)string, 5);
    424 }
    425