Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10           New API code Copyright (c) 2016-2018 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifdef HAVE_CONFIG_H
     43 #include "config.h"
     44 #endif
     45 
     46 #include "pcre2_internal.h"
     47 
     48 #define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
     49   PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
     50 
     51 #define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
     52   PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
     53   PCRE2_CONVERT_GLOB_NO_STARSTAR| \
     54   TYPE_OPTIONS)
     55 
     56 #define DUMMY_BUFFER_SIZE 100
     57 
     58 /* Generated pattern fragments */
     59 
     60 #define STR_BACKSLASH_A STR_BACKSLASH STR_A
     61 #define STR_BACKSLASH_z STR_BACKSLASH STR_z
     62 #define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
     63 #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
     64 #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
     65 #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
     66 #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
     67 
     68 /* States for range and POSIX processing */
     69 
     70 enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
     71 enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
     72        POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
     73 
     74 /* Macro to add a character string to the output buffer, checking for overflow. */
     75 
     76 #define PUTCHARS(string) \
     77   { \
     78   for (s = (char *)(string); *s != 0; s++) \
     79     { \
     80     if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
     81     *p++ = *s; \
     82     } \
     83   }
     84 
     85 /* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
     86 
     87 static const char *pcre2_escaped_literals =
     88   STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
     89   STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
     90   STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
     91   STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
     92   STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
     93 
     94 /* Recognized escaped metacharacters in POSIX basic patterns. */
     95 
     96 static const char *posix_meta_escapes =
     97   STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
     98   STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
     99   STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
    100 
    101 
    102 
    103 /*************************************************
    104 *           Convert a POSIX pattern              *
    105 *************************************************/
    106 
    107 /* This function handles both basic and extended POSIX patterns.
    108 
    109 Arguments:
    110   pattype        the pattern type
    111   pattern        the pattern
    112   plength        length in code units
    113   utf            TRUE if UTF
    114   use_buffer     where to put the output
    115   use_length     length of use_buffer
    116   bufflenptr     where to put the used length
    117   dummyrun       TRUE if a dummy run
    118   ccontext       the convert context
    119 
    120 Returns:         0 => success
    121                 !0 => error code
    122 */
    123 
    124 static int
    125 convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
    126   BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
    127   PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
    128 {
    129 char *s;
    130 PCRE2_SPTR posix = pattern;
    131 PCRE2_UCHAR *p = use_buffer;
    132 PCRE2_UCHAR *pp = p;
    133 PCRE2_UCHAR *endp = p + use_length - 1;  /* Allow for trailing zero */
    134 PCRE2_SIZE convlength = 0;
    135 
    136 uint32_t bracount = 0;
    137 uint32_t posix_state = POSIX_START_REGEX;
    138 uint32_t lastspecial = 0;
    139 BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
    140 BOOL nextisliteral = FALSE;
    141 
    142 (void)utf;       /* Not used when Unicode not supported */
    143 (void)ccontext;  /* Not currently used */
    144 
    145 /* Initialize default for error offset as end of input. */
    146 
    147 *bufflenptr = plength;
    148 PUTCHARS(STR_STAR_NUL);
    149 
    150 /* Now scan the input. */
    151 
    152 while (plength > 0)
    153   {
    154   uint32_t c, sc;
    155   int clength = 1;
    156 
    157   /* Add in the length of the last item, then, if in the dummy run, pull the
    158   pointer back to the start of the (temporary) buffer and then remember the
    159   start of the next item. */
    160 
    161   convlength += p - pp;
    162   if (dummyrun) p = use_buffer;
    163   pp = p;
    164 
    165   /* Pick up the next character */
    166 
    167 #ifndef SUPPORT_UNICODE
    168   c = *posix;
    169 #else
    170   GETCHARLENTEST(c, posix, clength);
    171 #endif
    172   posix += clength;
    173   plength -= clength;
    174 
    175   sc = nextisliteral? 0 : c;
    176   nextisliteral = FALSE;
    177 
    178   /* Handle a character within a class. */
    179 
    180   if (posix_state >= POSIX_CLASS_NOT_STARTED)
    181     {
    182     if (c == CHAR_RIGHT_SQUARE_BRACKET)
    183       {
    184       PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
    185       posix_state = POSIX_NOT_BRACKET;
    186       }
    187 
    188     /* Not the end of the class */
    189 
    190     else
    191       {
    192       switch (posix_state)
    193         {
    194         case POSIX_CLASS_STARTED:
    195         if (c <= 127 && islower(c)) break;  /* Remain in started state */
    196         posix_state = POSIX_CLASS_NOT_STARTED;
    197         if (c == CHAR_COLON  && plength > 0 &&
    198             *posix == CHAR_RIGHT_SQUARE_BRACKET)
    199           {
    200           PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
    201           plength--;
    202           posix++;
    203           continue;    /* With next character after :] */
    204           }
    205         /* Fall through */
    206 
    207         case POSIX_CLASS_NOT_STARTED:
    208         if (c == CHAR_LEFT_SQUARE_BRACKET)
    209           posix_state = POSIX_CLASS_STARTING;
    210         break;
    211 
    212         case POSIX_CLASS_STARTING:
    213         if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
    214         break;
    215         }
    216 
    217       if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
    218       if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
    219       memcpy(p, posix - clength, CU2BYTES(clength));
    220       p += clength;
    221       }
    222     }
    223 
    224   /* Handle a character not within a class. */
    225 
    226   else switch(sc)
    227     {
    228     case CHAR_LEFT_SQUARE_BRACKET:
    229     PUTCHARS(STR_LEFT_SQUARE_BRACKET);
    230 
    231 #ifdef NEVER
    232     /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
    233     support) but they are not part of POSIX 1003.1. */
    234 
    235     if (plength >= 6)
    236       {
    237       if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
    238           posix[1] == CHAR_COLON &&
    239           (posix[2] == CHAR_LESS_THAN_SIGN ||
    240            posix[2] == CHAR_GREATER_THAN_SIGN) &&
    241           posix[3] == CHAR_COLON &&
    242           posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
    243           posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
    244         {
    245         if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
    246         memcpy(p, posix, CU2BYTES(6));
    247         p += 6;
    248         posix += 6;
    249         plength -= 6;
    250         continue;  /* With next character */
    251         }
    252       }
    253 #endif
    254 
    255     /* Handle start of "normal" character classes */
    256 
    257     posix_state = POSIX_CLASS_NOT_STARTED;
    258 
    259     /* Handle ^ and ] as first characters */
    260 
    261     if (plength > 0)
    262       {
    263       if (*posix == CHAR_CIRCUMFLEX_ACCENT)
    264         {
    265         posix++;
    266         plength--;
    267         PUTCHARS(STR_CIRCUMFLEX_ACCENT);
    268         }
    269       if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
    270         {
    271         posix++;
    272         plength--;
    273         PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
    274         }
    275       }
    276     break;
    277 
    278     case CHAR_BACKSLASH:
    279     if (plength <= 0) return PCRE2_ERROR_END_BACKSLASH;
    280     if (extended) nextisliteral = TRUE; else
    281       {
    282       if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
    283         {
    284         if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
    285         if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
    286         lastspecial = *p++ = *posix++;
    287         plength--;
    288         }
    289       else nextisliteral = TRUE;
    290       }
    291     break;
    292 
    293     case CHAR_RIGHT_PARENTHESIS:
    294     if (!extended || bracount == 0) goto ESCAPE_LITERAL;
    295     bracount--;
    296     goto COPY_SPECIAL;
    297 
    298     case CHAR_LEFT_PARENTHESIS:
    299     bracount++;
    300     /* Fall through */
    301 
    302     case CHAR_QUESTION_MARK:
    303     case CHAR_PLUS:
    304     case CHAR_LEFT_CURLY_BRACKET:
    305     case CHAR_RIGHT_CURLY_BRACKET:
    306     case CHAR_VERTICAL_LINE:
    307     if (!extended) goto ESCAPE_LITERAL;
    308     /* Fall through */
    309 
    310     case CHAR_DOT:
    311     case CHAR_DOLLAR_SIGN:
    312     posix_state = POSIX_NOT_BRACKET;
    313     COPY_SPECIAL:
    314     lastspecial = c;
    315     if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
    316     *p++ = c;
    317     break;
    318 
    319     case CHAR_ASTERISK:
    320     if (lastspecial != CHAR_ASTERISK)
    321       {
    322       if (!extended && (posix_state < POSIX_NOT_BRACKET ||
    323           lastspecial == CHAR_LEFT_PARENTHESIS))
    324         goto ESCAPE_LITERAL;
    325       goto COPY_SPECIAL;
    326       }
    327     break;   /* Ignore second and subsequent asterisks */
    328 
    329     case CHAR_CIRCUMFLEX_ACCENT:
    330     if (extended) goto COPY_SPECIAL;
    331     if (posix_state == POSIX_START_REGEX ||
    332         lastspecial == CHAR_LEFT_PARENTHESIS)
    333       {
    334       posix_state = POSIX_ANCHORED;
    335       goto COPY_SPECIAL;
    336       }
    337     /* Fall through */
    338 
    339     default:
    340     if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
    341       {
    342       ESCAPE_LITERAL:
    343       PUTCHARS(STR_BACKSLASH);
    344       }
    345     lastspecial = 0xff;  /* Indicates nothing special */
    346     if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
    347     memcpy(p, posix - clength, CU2BYTES(clength));
    348     p += clength;
    349     posix_state = POSIX_NOT_BRACKET;
    350     break;
    351     }
    352   }
    353 
    354 if (posix_state >= POSIX_CLASS_NOT_STARTED)
    355   return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
    356 convlength += p - pp;        /* Final segment */
    357 *bufflenptr = convlength;
    358 *p++ = 0;
    359 return 0;
    360 }
    361 
    362 
    363 /*************************************************
    364 *           Convert a glob pattern               *
    365 *************************************************/
    366 
    367 /* Context for writing the output into a buffer. */
    368 
    369 typedef struct pcre2_output_context {
    370   PCRE2_UCHAR *output;                  /* current output position */
    371   PCRE2_SPTR output_end;                /* output end */
    372   PCRE2_SIZE output_size;               /* size of the output */
    373   uint8_t out_str[8];                   /* string copied to the output */
    374 } pcre2_output_context;
    375 
    376 
    377 /* Write a character into the output.
    378 
    379 Arguments:
    380   out            output context
    381   chr            the next character
    382 */
    383 
    384 static void
    385 convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
    386 {
    387 out->output_size++;
    388 
    389 if (out->output < out->output_end)
    390   *out->output++ = chr;
    391 }
    392 
    393 
    394 /* Write a string into the output.
    395 
    396 Arguments:
    397   out            output context
    398   length         length of out->out_str
    399 */
    400 
    401 static void
    402 convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
    403 {
    404 uint8_t *out_str = out->out_str;
    405 PCRE2_UCHAR *output = out->output;
    406 PCRE2_SPTR output_end = out->output_end;
    407 PCRE2_SIZE output_size = out->output_size;
    408 
    409 do
    410   {
    411   output_size++;
    412 
    413   if (output < output_end)
    414     *output++ = *out_str++;
    415   }
    416 while (--length != 0);
    417 
    418 out->output = output;
    419 out->output_size = output_size;
    420 }
    421 
    422 
    423 /* Prints the separator into the output.
    424 
    425 Arguments:
    426   out            output context
    427   separator      glob separator
    428   with_escape    backslash is needed before separator
    429 */
    430 
    431 static void
    432 convert_glob_print_separator(pcre2_output_context *out,
    433   PCRE2_UCHAR separator, BOOL with_escape)
    434 {
    435 if (with_escape)
    436   convert_glob_write(out, CHAR_BACKSLASH);
    437 
    438 convert_glob_write(out, separator);
    439 }
    440 
    441 
    442 /* Prints a wildcard into the output.
    443 
    444 Arguments:
    445   out            output context
    446   separator      glob separator
    447   with_escape    backslash is needed before separator
    448 */
    449 
    450 static void
    451 convert_glob_print_wildcard(pcre2_output_context *out,
    452   PCRE2_UCHAR separator, BOOL with_escape)
    453 {
    454 out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
    455 out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
    456 convert_glob_write_str(out, 2);
    457 
    458 convert_glob_print_separator(out, separator, with_escape);
    459 
    460 convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
    461 }
    462 
    463 
    464 /* Parse a posix class.
    465 
    466 Arguments:
    467   from           starting point of scanning the range
    468   pattern_end    end of pattern
    469   out            output context
    470 
    471 Returns:  >0 => class index
    472           0  => malformed class
    473 */
    474 
    475 static int
    476 convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
    477   pcre2_output_context *out)
    478 {
    479 static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
    480   "graph:lower:print:punct:space:upper:word:xdigit:";
    481 PCRE2_SPTR start = *from + 1;
    482 PCRE2_SPTR pattern = start;
    483 const char *class_ptr;
    484 PCRE2_UCHAR c;
    485 int class_index;
    486 
    487 while (TRUE)
    488   {
    489   if (pattern >= pattern_end) return 0;
    490 
    491   c = *pattern++;
    492 
    493   if (c < CHAR_a || c > CHAR_z) break;
    494   }
    495 
    496 if (c != CHAR_COLON || pattern >= pattern_end ||
    497     *pattern != CHAR_RIGHT_SQUARE_BRACKET)
    498   return 0;
    499 
    500 class_ptr = posix_classes;
    501 class_index = 1;
    502 
    503 while (TRUE)
    504   {
    505   if (*class_ptr == CHAR_NUL) return 0;
    506 
    507   pattern = start;
    508 
    509   while (*pattern == (PCRE2_UCHAR) *class_ptr)
    510     {
    511     if (*pattern == CHAR_COLON)
    512       {
    513       pattern += 2;
    514       start -= 2;
    515 
    516       do convert_glob_write(out, *start++); while (start < pattern);
    517 
    518       *from = pattern;
    519       return class_index;
    520       }
    521     pattern++;
    522     class_ptr++;
    523     }
    524 
    525   while (*class_ptr != CHAR_COLON) class_ptr++;
    526   class_ptr++;
    527   class_index++;
    528   }
    529 }
    530 
    531 /* Checks whether the character is in the class.
    532 
    533 Arguments:
    534   class_index    class index
    535   c              character
    536 
    537 Returns:   !0 => character is found in the class
    538             0 => otherwise
    539 */
    540 
    541 static BOOL
    542 convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
    543 {
    544 switch (class_index)
    545   {
    546   case 1: return isalnum(c);
    547   case 2: return isalpha(c);
    548   case 3: return 1;
    549   case 4: return c == CHAR_HT || c == CHAR_SPACE;
    550   case 5: return iscntrl(c);
    551   case 6: return isdigit(c);
    552   case 7: return isgraph(c);
    553   case 8: return islower(c);
    554   case 9: return isprint(c);
    555   case 10: return ispunct(c);
    556   case 11: return isspace(c);
    557   case 12: return isupper(c);
    558   case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
    559   default: return isxdigit(c);
    560   }
    561 }
    562 
    563 /* Parse a range of characters.
    564 
    565 Arguments:
    566   from           starting point of scanning the range
    567   pattern_end    end of pattern
    568   out            output context
    569   separator      glob separator
    570   with_escape    backslash is needed before separator
    571 
    572 Returns:         0 => success
    573                 !0 => error code
    574 */
    575 
    576 static int
    577 convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
    578   pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
    579   BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
    580 {
    581 BOOL is_negative = FALSE;
    582 BOOL separator_seen = FALSE;
    583 BOOL has_prev_c;
    584 PCRE2_SPTR pattern = *from;
    585 PCRE2_SPTR char_start = NULL;
    586 uint32_t c, prev_c;
    587 int len, class_index;
    588 
    589 (void)utf; /* Avoid compiler warning. */
    590 
    591 if (pattern >= pattern_end)
    592   {
    593   *from = pattern;
    594   return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
    595   }
    596 
    597 if (*pattern == CHAR_EXCLAMATION_MARK
    598     || *pattern == CHAR_CIRCUMFLEX_ACCENT)
    599   {
    600   pattern++;
    601 
    602   if (pattern >= pattern_end)
    603     {
    604     *from = pattern;
    605     return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
    606     }
    607 
    608   is_negative = TRUE;
    609 
    610   out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
    611   out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
    612   len = 2;
    613 
    614   if (!no_wildsep)
    615     {
    616     if (with_escape)
    617       {
    618       out->out_str[len] = CHAR_BACKSLASH;
    619       len++;
    620       }
    621     out->out_str[len] = (uint8_t) separator;
    622     }
    623 
    624   convert_glob_write_str(out, len + 1);
    625   }
    626 else
    627   convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
    628 
    629 has_prev_c = FALSE;
    630 prev_c = 0;
    631 
    632 if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
    633   {
    634   out->out_str[0] = CHAR_BACKSLASH;
    635   out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
    636   convert_glob_write_str(out, 2);
    637   has_prev_c = TRUE;
    638   prev_c = CHAR_RIGHT_SQUARE_BRACKET;
    639   pattern++;
    640   }
    641 
    642 while (pattern < pattern_end)
    643   {
    644   char_start = pattern;
    645   GETCHARINCTEST(c, pattern);
    646 
    647   if (c == CHAR_RIGHT_SQUARE_BRACKET)
    648     {
    649     convert_glob_write(out, c);
    650 
    651     if (!is_negative && !no_wildsep && separator_seen)
    652       {
    653       out->out_str[0] = CHAR_LEFT_PARENTHESIS;
    654       out->out_str[1] = CHAR_QUESTION_MARK;
    655       out->out_str[2] = CHAR_LESS_THAN_SIGN;
    656       out->out_str[3] = CHAR_EXCLAMATION_MARK;
    657       convert_glob_write_str(out, 4);
    658 
    659       convert_glob_print_separator(out, separator, with_escape);
    660       convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
    661       }
    662 
    663     *from = pattern;
    664     return 0;
    665     }
    666 
    667   if (pattern >= pattern_end) break;
    668 
    669   if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
    670     {
    671     *from = pattern;
    672     class_index = convert_glob_parse_class(from, pattern_end, out);
    673 
    674     if (class_index != 0)
    675       {
    676       pattern = *from;
    677 
    678       has_prev_c = FALSE;
    679       prev_c = 0;
    680 
    681       if (!is_negative &&
    682           convert_glob_char_in_class (class_index, separator))
    683         separator_seen = TRUE;
    684       continue;
    685       }
    686     }
    687   else if (c == CHAR_MINUS && has_prev_c &&
    688            *pattern != CHAR_RIGHT_SQUARE_BRACKET)
    689     {
    690     convert_glob_write(out, CHAR_MINUS);
    691 
    692     char_start = pattern;
    693     GETCHARINCTEST(c, pattern);
    694 
    695     if (pattern >= pattern_end) break;
    696 
    697     if (escape != 0 && c == escape)
    698       {
    699       char_start = pattern;
    700       GETCHARINCTEST(c, pattern);
    701       }
    702     else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
    703       {
    704       *from = pattern;
    705       return PCRE2_ERROR_CONVERT_SYNTAX;
    706       }
    707 
    708     if (prev_c > c)
    709       {
    710       *from = pattern;
    711       return PCRE2_ERROR_CONVERT_SYNTAX;
    712       }
    713 
    714     if (prev_c < separator && separator < c) separator_seen = TRUE;
    715 
    716     has_prev_c = FALSE;
    717     prev_c = 0;
    718     }
    719   else
    720     {
    721     if (escape != 0 && c == escape)
    722       {
    723       char_start = pattern;
    724       GETCHARINCTEST(c, pattern);
    725 
    726       if (pattern >= pattern_end) break;
    727       }
    728 
    729     has_prev_c = TRUE;
    730     prev_c = c;
    731     }
    732 
    733   if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
    734       c == CHAR_BACKSLASH || c == CHAR_MINUS)
    735     convert_glob_write(out, CHAR_BACKSLASH);
    736 
    737   if (c == separator) separator_seen = TRUE;
    738 
    739   do convert_glob_write(out, *char_start++); while (char_start < pattern);
    740   }
    741 
    742 *from = pattern;
    743 return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
    744 }
    745 
    746 
    747 /* Prints a (*COMMIT) into the output.
    748 
    749 Arguments:
    750   out            output context
    751 */
    752 
    753 static void
    754 convert_glob_print_commit(pcre2_output_context *out)
    755 {
    756 out->out_str[0] = CHAR_LEFT_PARENTHESIS;
    757 out->out_str[1] = CHAR_ASTERISK;
    758 out->out_str[2] = CHAR_C;
    759 out->out_str[3] = CHAR_O;
    760 out->out_str[4] = CHAR_M;
    761 out->out_str[5] = CHAR_M;
    762 out->out_str[6] = CHAR_I;
    763 out->out_str[7] = CHAR_T;
    764 convert_glob_write_str(out, 8);
    765 convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
    766 }
    767 
    768 
    769 /* Bash glob converter.
    770 
    771 Arguments:
    772   pattype        the pattern type
    773   pattern        the pattern
    774   plength        length in code units
    775   utf            TRUE if UTF
    776   use_buffer     where to put the output
    777   use_length     length of use_buffer
    778   bufflenptr     where to put the used length
    779   dummyrun       TRUE if a dummy run
    780   ccontext       the convert context
    781 
    782 Returns:         0 => success
    783                 !0 => error code
    784 */
    785 
    786 static int
    787 convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
    788   BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
    789   PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
    790 {
    791 pcre2_output_context out;
    792 PCRE2_SPTR pattern_start = pattern;
    793 PCRE2_SPTR pattern_end = pattern + plength;
    794 PCRE2_UCHAR separator = ccontext->glob_separator;
    795 PCRE2_UCHAR escape = ccontext->glob_escape;
    796 PCRE2_UCHAR c;
    797 BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
    798 BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
    799 BOOL in_atomic = FALSE;
    800 BOOL after_starstar = FALSE;
    801 BOOL no_slash_z = FALSE;
    802 BOOL with_escape, is_start, after_separator;
    803 int result = 0;
    804 
    805 (void)utf; /* Avoid compiler warning. */
    806 
    807 #ifdef SUPPORT_UNICODE
    808 if (utf && (separator >= 128 || escape >= 128))
    809   {
    810   /* Currently only ASCII characters are supported. */
    811   *bufflenptr = 0;
    812   return PCRE2_ERROR_CONVERT_SYNTAX;
    813   }
    814 #endif
    815 
    816 with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
    817 
    818 /* Initialize default for error offset as end of input. */
    819 out.output = use_buffer;
    820 out.output_end = use_buffer + use_length;
    821 out.output_size = 0;
    822 
    823 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
    824 out.out_str[1] = CHAR_QUESTION_MARK;
    825 out.out_str[2] = CHAR_s;
    826 out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
    827 convert_glob_write_str(&out, 4);
    828 
    829 is_start = TRUE;
    830 
    831 if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
    832   {
    833   if (no_wildsep)
    834     is_start = FALSE;
    835   else if (!no_starstar && pattern + 1 < pattern_end &&
    836            pattern[1] == CHAR_ASTERISK)
    837     is_start = FALSE;
    838   }
    839 
    840 if (is_start)
    841   {
    842   out.out_str[0] = CHAR_BACKSLASH;
    843   out.out_str[1] = CHAR_A;
    844   convert_glob_write_str(&out, 2);
    845   }
    846 
    847 while (pattern < pattern_end)
    848   {
    849   c = *pattern++;
    850 
    851   if (c == CHAR_ASTERISK)
    852     {
    853     is_start = pattern == pattern_start + 1;
    854 
    855     if (in_atomic)
    856       {
    857       convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
    858       in_atomic = FALSE;
    859       }
    860 
    861     if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
    862       {
    863       after_separator = is_start || (pattern[-2] == separator);
    864 
    865       do pattern++; while (pattern < pattern_end &&
    866                            *pattern == CHAR_ASTERISK);
    867 
    868       if (pattern >= pattern_end)
    869         {
    870         no_slash_z = TRUE;
    871         break;
    872         }
    873 
    874       after_starstar = TRUE;
    875 
    876       if (after_separator && escape != 0 && *pattern == escape &&
    877           pattern + 1 < pattern_end && pattern[1] == separator)
    878         pattern++;
    879 
    880       if (is_start)
    881         {
    882         if (*pattern != separator) continue;
    883 
    884         out.out_str[0] = CHAR_LEFT_PARENTHESIS;
    885         out.out_str[1] = CHAR_QUESTION_MARK;
    886         out.out_str[2] = CHAR_COLON;
    887         out.out_str[3] = CHAR_BACKSLASH;
    888         out.out_str[4] = CHAR_A;
    889         out.out_str[5] = CHAR_VERTICAL_LINE;
    890         convert_glob_write_str(&out, 6);
    891 
    892         convert_glob_print_separator(&out, separator, with_escape);
    893         convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
    894 
    895         pattern++;
    896         continue;
    897         }
    898 
    899       convert_glob_print_commit(&out);
    900 
    901       if (!after_separator || *pattern != separator)
    902         {
    903         out.out_str[0] = CHAR_DOT;
    904         out.out_str[1] = CHAR_ASTERISK;
    905         out.out_str[2] = CHAR_QUESTION_MARK;
    906         convert_glob_write_str(&out, 3);
    907         continue;
    908         }
    909 
    910       out.out_str[0] = CHAR_LEFT_PARENTHESIS;
    911       out.out_str[1] = CHAR_QUESTION_MARK;
    912       out.out_str[2] = CHAR_COLON;
    913       out.out_str[3] = CHAR_DOT;
    914       out.out_str[4] = CHAR_ASTERISK;
    915       out.out_str[5] = CHAR_QUESTION_MARK;
    916 
    917       convert_glob_write_str(&out, 6);
    918 
    919       convert_glob_print_separator(&out, separator, with_escape);
    920 
    921       out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
    922       out.out_str[1] = CHAR_QUESTION_MARK;
    923       out.out_str[2] = CHAR_QUESTION_MARK;
    924       convert_glob_write_str(&out, 3);
    925 
    926       pattern++;
    927       continue;
    928       }
    929 
    930     if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
    931       {
    932       do pattern++; while (pattern < pattern_end &&
    933                            *pattern == CHAR_ASTERISK);
    934       }
    935 
    936     if (no_wildsep)
    937       {
    938       if (pattern >= pattern_end)
    939         {
    940         no_slash_z = TRUE;
    941         break;
    942         }
    943 
    944       /* Start check must be after the end check. */
    945       if (is_start) continue;
    946       }
    947 
    948     if (!is_start)
    949       {
    950       if (after_starstar)
    951         {
    952         out.out_str[0] = CHAR_LEFT_PARENTHESIS;
    953         out.out_str[1] = CHAR_QUESTION_MARK;
    954         out.out_str[2] = CHAR_GREATER_THAN_SIGN;
    955         convert_glob_write_str(&out, 3);
    956         in_atomic = TRUE;
    957         }
    958       else
    959         convert_glob_print_commit(&out);
    960       }
    961 
    962     if (no_wildsep)
    963       convert_glob_write(&out, CHAR_DOT);
    964     else
    965       convert_glob_print_wildcard(&out, separator, with_escape);
    966 
    967     out.out_str[0] = CHAR_ASTERISK;
    968     out.out_str[1] = CHAR_QUESTION_MARK;
    969     if (pattern >= pattern_end)
    970       out.out_str[1] = CHAR_PLUS;
    971     convert_glob_write_str(&out, 2);
    972     continue;
    973     }
    974 
    975   if (c == CHAR_QUESTION_MARK)
    976     {
    977     if (no_wildsep)
    978       convert_glob_write(&out, CHAR_DOT);
    979     else
    980       convert_glob_print_wildcard(&out, separator, with_escape);
    981     continue;
    982     }
    983 
    984   if (c == CHAR_LEFT_SQUARE_BRACKET)
    985     {
    986     result = convert_glob_parse_range(&pattern, pattern_end,
    987       &out, utf, separator, with_escape, escape, no_wildsep);
    988     if (result != 0) break;
    989     continue;
    990     }
    991 
    992   if (escape != 0 && c == escape)
    993     {
    994     if (pattern >= pattern_end)
    995       {
    996       result = PCRE2_ERROR_CONVERT_SYNTAX;
    997       break;
    998       }
    999     c = *pattern++;
   1000     }
   1001 
   1002   if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
   1003     convert_glob_write(&out, CHAR_BACKSLASH);
   1004 
   1005   convert_glob_write(&out, c);
   1006   }
   1007 
   1008 if (result == 0)
   1009   {
   1010   if (!no_slash_z)
   1011     {
   1012     out.out_str[0] = CHAR_BACKSLASH;
   1013     out.out_str[1] = CHAR_z;
   1014     convert_glob_write_str(&out, 2);
   1015     }
   1016 
   1017   if (in_atomic)
   1018     convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
   1019 
   1020   convert_glob_write(&out, CHAR_NUL);
   1021 
   1022   if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
   1023     result = PCRE2_ERROR_NOMEMORY;
   1024   }
   1025 
   1026 if (result != 0)
   1027   {
   1028   *bufflenptr = pattern - pattern_start;
   1029   return result;
   1030   }
   1031 
   1032 *bufflenptr = out.output_size - 1;
   1033 return 0;
   1034 }
   1035 
   1036 
   1037 /*************************************************
   1038 *                Convert pattern                 *
   1039 *************************************************/
   1040 
   1041 /* This is the external-facing function for converting other forms of pattern
   1042 into PCRE2 regular expression patterns. On error, the bufflenptr argument is
   1043 used to return an offset in the original pattern.
   1044 
   1045 Arguments:
   1046   pattern     the input pattern
   1047   plength     length of input, or PCRE2_ZERO_TERMINATED
   1048   options     options bits
   1049   buffptr     pointer to pointer to output buffer
   1050   bufflenptr  pointer to length of output buffer
   1051   ccontext    convert context or NULL
   1052 
   1053 Returns:      0 for success, else an error code (+ve or -ve)
   1054 */
   1055 
   1056 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
   1057 pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
   1058   PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
   1059   pcre2_convert_context *ccontext)
   1060 {
   1061 int i, rc;
   1062 PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
   1063 PCRE2_UCHAR *use_buffer = dummy_buffer;
   1064 PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
   1065 BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
   1066 uint32_t pattype = options & TYPE_OPTIONS;
   1067 
   1068 if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
   1069 
   1070 if ((options & ~ALL_OPTIONS) != 0 ||        /* Undefined bit set */
   1071     (pattype & (~pattype+1)) != pattype ||  /* More than one type set */
   1072     pattype == 0)                           /* No type set */
   1073   {
   1074   *bufflenptr = 0;                          /* Error offset */
   1075   return PCRE2_ERROR_BADOPTION;
   1076   }
   1077 
   1078 if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
   1079 if (ccontext == NULL) ccontext =
   1080   (pcre2_convert_context *)(&PRIV(default_convert_context));
   1081 
   1082 /* Check UTF if required. */
   1083 
   1084 #ifndef SUPPORT_UNICODE
   1085 if (utf)
   1086   {
   1087   *bufflenptr = 0;  /* Error offset */
   1088   return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
   1089   }
   1090 #else
   1091 if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
   1092   {
   1093   PCRE2_SIZE erroroffset;
   1094   rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
   1095   if (rc != 0)
   1096     {
   1097     *bufflenptr = erroroffset;
   1098     return rc;
   1099     }
   1100   }
   1101 #endif
   1102 
   1103 /* If buffptr is not NULL, and what it points to is not NULL, we are being
   1104 provided with a buffer and a length, so set them as the buffer to use. */
   1105 
   1106 if (buffptr != NULL && *buffptr != NULL)
   1107   {
   1108   use_buffer = *buffptr;
   1109   use_length = *bufflenptr;
   1110   }
   1111 
   1112 /* Call an individual converter, either just once (if a buffer was provided or
   1113 just the length is needed), or twice (if a memory allocation is required). */
   1114 
   1115 for (i = 0; i < 2; i++)
   1116   {
   1117   PCRE2_UCHAR *allocated;
   1118   BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
   1119 
   1120   switch(pattype)
   1121     {
   1122     case PCRE2_CONVERT_GLOB:
   1123     rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
   1124       use_buffer, use_length, bufflenptr, dummyrun, ccontext);
   1125     break;
   1126 
   1127     case PCRE2_CONVERT_POSIX_BASIC:
   1128     case PCRE2_CONVERT_POSIX_EXTENDED:
   1129     rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
   1130       bufflenptr, dummyrun, ccontext);
   1131     break;
   1132 
   1133     default:
   1134     *bufflenptr = 0;  /* Error offset */
   1135     return PCRE2_ERROR_INTERNAL;
   1136     }
   1137 
   1138   if (rc != 0 ||           /* Error */
   1139       buffptr == NULL ||   /* Just the length is required */
   1140       *buffptr != NULL)    /* Buffer was provided or allocated */
   1141     return rc;
   1142 
   1143   /* Allocate memory for the buffer, with hidden space for an allocator at
   1144   the start. The next time round the loop runs the conversion for real. */
   1145 
   1146   allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
   1147     (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
   1148   if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
   1149   *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
   1150 
   1151   use_buffer = *buffptr;
   1152   use_length = *bufflenptr + 1;
   1153   }
   1154 
   1155 /* Control should never get here. */
   1156 
   1157 return PCRE2_ERROR_INTERNAL;
   1158 }
   1159 
   1160 
   1161 /*************************************************
   1162 *            Free converted pattern              *
   1163 *************************************************/
   1164 
   1165 /* This frees a converted pattern that was put in newly-allocated memory.
   1166 
   1167 Argument:   the converted pattern
   1168 Returns:    nothing
   1169 */
   1170 
   1171 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
   1172 pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
   1173 {
   1174 if (converted != NULL)
   1175   {
   1176   pcre2_memctl *memctl =
   1177     (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
   1178   memctl->free(memctl, memctl->memory_data);
   1179   }
   1180 }
   1181 
   1182 /* End of pcre2_convert.c */
   1183