Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10           New API code Copyright (c) 2016-2018 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifdef HAVE_CONFIG_H
     43 #include "config.h"
     44 #endif
     45 
     46 #include "pcre2_internal.h"
     47 
     48 #define PTR_STACK_SIZE 20
     49 
     50 #define SUBSTITUTE_OPTIONS \
     51   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
     52    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
     53    PCRE2_SUBSTITUTE_UNSET_EMPTY)
     54 
     55 
     56 
     57 /*************************************************
     58 *           Find end of substitute text          *
     59 *************************************************/
     60 
     61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
     62 constructions. This requires the identification of unescaped : and }
     63 characters. This function scans for such. It must deal with nested ${
     64 constructions. The pointer to the text is updated, either to the required end
     65 character, or to where an error was detected.
     66 
     67 Arguments:
     68   code      points to the compiled expression (for options)
     69   ptrptr    points to the pointer to the start of the text (updated)
     70   ptrend    end of the whole string
     71   last      TRUE if the last expected string (only } recognized)
     72 
     73 Returns:    0 on success
     74             negative error code on failure
     75 */
     76 
     77 static int
     78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
     79   BOOL last)
     80 {
     81 int rc = 0;
     82 uint32_t nestlevel = 0;
     83 BOOL literal = FALSE;
     84 PCRE2_SPTR ptr = *ptrptr;
     85 
     86 for (; ptr < ptrend; ptr++)
     87   {
     88   if (literal)
     89     {
     90     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
     91       {
     92       literal = FALSE;
     93       ptr += 1;
     94       }
     95     }
     96 
     97   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
     98     {
     99     if (nestlevel == 0) goto EXIT;
    100     nestlevel--;
    101     }
    102 
    103   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
    104 
    105   else if (*ptr == CHAR_DOLLAR_SIGN)
    106     {
    107     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
    108       {
    109       nestlevel++;
    110       ptr += 1;
    111       }
    112     }
    113 
    114   else if (*ptr == CHAR_BACKSLASH)
    115     {
    116     int erc;
    117     int errorcode;
    118     uint32_t ch;
    119 
    120     if (ptr < ptrend - 1) switch (ptr[1])
    121       {
    122       case CHAR_L:
    123       case CHAR_l:
    124       case CHAR_U:
    125       case CHAR_u:
    126       ptr += 1;
    127       continue;
    128       }
    129 
    130     ptr += 1;  /* Must point after \ */
    131     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
    132       code->overall_options, FALSE, NULL);
    133     ptr -= 1;  /* Back to last code unit of escape */
    134     if (errorcode != 0)
    135       {
    136       rc = errorcode;
    137       goto EXIT;
    138       }
    139 
    140     switch(erc)
    141       {
    142       case 0:      /* Data character */
    143       case ESC_E:  /* Isolated \E is ignored */
    144       break;
    145 
    146       case ESC_Q:
    147       literal = TRUE;
    148       break;
    149 
    150       default:
    151       rc = PCRE2_ERROR_BADREPESCAPE;
    152       goto EXIT;
    153       }
    154     }
    155   }
    156 
    157 rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
    158 
    159 EXIT:
    160 *ptrptr = ptr;
    161 return rc;
    162 }
    163 
    164 
    165 
    166 /*************************************************
    167 *              Match and substitute              *
    168 *************************************************/
    169 
    170 /* This function applies a compiled re to a subject string and creates a new
    171 string with substitutions. The first 7 arguments are the same as for
    172 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
    173 
    174 Arguments:
    175   code            points to the compiled expression
    176   subject         points to the subject string
    177   length          length of subject string (may contain binary zeros)
    178   start_offset    where to start in the subject string
    179   options         option bits
    180   match_data      points to a match_data block, or is NULL
    181   context         points a PCRE2 context
    182   replacement     points to the replacement string
    183   rlength         length of replacement string
    184   buffer          where to put the substituted string
    185   blength         points to length of buffer; updated to length of string
    186 
    187 Returns:          >= 0 number of substitutions made
    188                   < 0 an error code
    189                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
    190 */
    191 
    192 /* This macro checks for space in the buffer before copying into it. On
    193 overflow, either give an error immediately, or keep on, accumulating the
    194 length. */
    195 
    196 #define CHECKMEMCPY(from,length) \
    197   if (!overflowed && lengthleft < length) \
    198     { \
    199     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
    200     overflowed = TRUE; \
    201     extra_needed = length - lengthleft; \
    202     } \
    203   else if (overflowed) \
    204     { \
    205     extra_needed += length; \
    206     }  \
    207   else \
    208     {  \
    209     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
    210     buff_offset += length; \
    211     lengthleft -= length; \
    212     }
    213 
    214 /* Here's the function */
    215 
    216 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    217 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
    218   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
    219   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
    220   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
    221 {
    222 int rc;
    223 int subs;
    224 int forcecase = 0;
    225 int forcecasereset = 0;
    226 uint32_t ovector_count;
    227 uint32_t goptions = 0;
    228 uint32_t suboptions;
    229 BOOL match_data_created = FALSE;
    230 BOOL literal = FALSE;
    231 BOOL overflowed = FALSE;
    232 #ifdef SUPPORT_UNICODE
    233 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
    234 #endif
    235 PCRE2_UCHAR temp[6];
    236 PCRE2_SPTR ptr;
    237 PCRE2_SPTR repend;
    238 PCRE2_SIZE extra_needed = 0;
    239 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
    240 PCRE2_SIZE *ovector;
    241 PCRE2_SIZE ovecsave[3];
    242 
    243 buff_offset = 0;
    244 lengthleft = buff_length = *blength;
    245 *blength = PCRE2_UNSET;
    246 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
    247 
    248 /* Partial matching is not valid. */
    249 
    250 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
    251   return PCRE2_ERROR_BADOPTION;
    252 
    253 /* If no match data block is provided, create one. */
    254 
    255 if (match_data == NULL)
    256   {
    257   pcre2_general_context *gcontext = (mcontext == NULL)?
    258     (pcre2_general_context *)code :
    259     (pcre2_general_context *)mcontext;
    260   match_data = pcre2_match_data_create_from_pattern(code, gcontext);
    261   if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
    262   match_data_created = TRUE;
    263   }
    264 ovector = pcre2_get_ovector_pointer(match_data);
    265 ovector_count = pcre2_get_ovector_count(match_data);
    266 
    267 /* Find lengths of zero-terminated strings and the end of the replacement. */
    268 
    269 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
    270 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
    271 repend = replacement + rlength;
    272 
    273 /* Check UTF replacement string if necessary. */
    274 
    275 #ifdef SUPPORT_UNICODE
    276 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
    277   {
    278   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
    279   if (rc != 0)
    280     {
    281     match_data->leftchar = 0;
    282     goto EXIT;
    283     }
    284   }
    285 #endif  /* SUPPORT_UNICODE */
    286 
    287 /* Save the substitute options and remove them from the match options. */
    288 
    289 suboptions = options & SUBSTITUTE_OPTIONS;
    290 options &= ~SUBSTITUTE_OPTIONS;
    291 
    292 /* Copy up to the start offset */
    293 
    294 if (start_offset > length)
    295   {
    296   match_data->leftchar = 0;
    297   rc = PCRE2_ERROR_BADOFFSET;
    298   goto EXIT;
    299   }
    300 CHECKMEMCPY(subject, start_offset);
    301 
    302 /* Loop for global substituting. */
    303 
    304 subs = 0;
    305 do
    306   {
    307   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
    308   uint32_t ptrstackptr = 0;
    309 
    310   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
    311     match_data, mcontext);
    312 
    313 #ifdef SUPPORT_UNICODE
    314   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
    315 #endif
    316 
    317   /* Any error other than no match returns the error code. No match when not
    318   doing the special after-empty-match global rematch, or when at the end of the
    319   subject, breaks the global loop. Otherwise, advance the starting point by one
    320   character, copying it to the output, and try again. */
    321 
    322   if (rc < 0)
    323     {
    324     PCRE2_SIZE save_start;
    325 
    326     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
    327     if (goptions == 0 || start_offset >= length) break;
    328 
    329     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
    330     we have advanced into the middle of it, advance one more code point. In
    331     other words, do not start in the middle of CRLF, even if CR and LF on their
    332     own are valid newlines. */
    333 
    334     save_start = start_offset++;
    335     if (subject[start_offset-1] == CHAR_CR &&
    336         code->newline_convention != PCRE2_NEWLINE_CR &&
    337         code->newline_convention != PCRE2_NEWLINE_LF &&
    338         start_offset < length &&
    339         subject[start_offset] == CHAR_LF)
    340       start_offset++;
    341 
    342     /* Otherwise, in UTF mode, advance past any secondary code points. */
    343 
    344     else if ((code->overall_options & PCRE2_UTF) != 0)
    345       {
    346 #if PCRE2_CODE_UNIT_WIDTH == 8
    347       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
    348         start_offset++;
    349 #elif PCRE2_CODE_UNIT_WIDTH == 16
    350       while (start_offset < length &&
    351             (subject[start_offset] & 0xfc00) == 0xdc00)
    352         start_offset++;
    353 #endif
    354       }
    355 
    356     /* Copy what we have advanced past, reset the special global options, and
    357     continue to the next match. */
    358 
    359     fraglength = start_offset - save_start;
    360     CHECKMEMCPY(subject + save_start, fraglength);
    361     goptions = 0;
    362     continue;
    363     }
    364 
    365   /* Handle a successful match. Matches that use \K to end before they start
    366   or start before the current point in the subject are not supported. */
    367 
    368   if (ovector[1] < ovector[0] || ovector[0] < start_offset)
    369     {
    370     rc = PCRE2_ERROR_BADSUBSPATTERN;
    371     goto EXIT;
    372     }
    373 
    374   /* Check for the same match as previous. This is legitimate after matching an
    375   empty string that starts after the initial match offset. We have tried again
    376   at the match point in case the pattern is one like /(?<=\G.)/ which can never
    377   match at its starting point, so running the match achieves the bumpalong. If
    378   we do get the same (null) match at the original match point, it isn't such a
    379   pattern, so we now do the empty string magic. In all other cases, a repeat
    380   match should never occur. */
    381 
    382   if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
    383     {
    384     if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
    385       {
    386       goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
    387       ovecsave[2] = start_offset;
    388       continue;    /* Back to the top of the loop */
    389       }
    390     rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
    391     goto EXIT;
    392     }
    393 
    394   /* Count substitutions with a paranoid check for integer overflow; surely no
    395   real call to this function would ever hit this! */
    396 
    397   if (subs == INT_MAX)
    398     {
    399     rc = PCRE2_ERROR_TOOMANYREPLACE;
    400     goto EXIT;
    401     }
    402   subs++;
    403 
    404   /* Copy the text leading up to the match. */
    405 
    406   if (rc == 0) rc = ovector_count;
    407   fraglength = ovector[0] - start_offset;
    408   CHECKMEMCPY(subject + start_offset, fraglength);
    409 
    410   /* Process the replacement string. Literal mode is set by \Q, but only in
    411   extended mode when backslashes are being interpreted. In extended mode we
    412   must handle nested substrings that are to be reprocessed. */
    413 
    414   ptr = replacement;
    415   for (;;)
    416     {
    417     uint32_t ch;
    418     unsigned int chlen;
    419 
    420     /* If at the end of a nested substring, pop the stack. */
    421 
    422     if (ptr >= repend)
    423       {
    424       if (ptrstackptr <= 0) break;       /* End of replacement string */
    425       repend = ptrstack[--ptrstackptr];
    426       ptr = ptrstack[--ptrstackptr];
    427       continue;
    428       }
    429 
    430     /* Handle the next character */
    431 
    432     if (literal)
    433       {
    434       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
    435         {
    436         literal = FALSE;
    437         ptr += 2;
    438         continue;
    439         }
    440       goto LOADLITERAL;
    441       }
    442 
    443     /* Not in literal mode. */
    444 
    445     if (*ptr == CHAR_DOLLAR_SIGN)
    446       {
    447       int group, n;
    448       uint32_t special = 0;
    449       BOOL inparens;
    450       BOOL star;
    451       PCRE2_SIZE sublength;
    452       PCRE2_SPTR text1_start = NULL;
    453       PCRE2_SPTR text1_end = NULL;
    454       PCRE2_SPTR text2_start = NULL;
    455       PCRE2_SPTR text2_end = NULL;
    456       PCRE2_UCHAR next;
    457       PCRE2_UCHAR name[33];
    458 
    459       if (++ptr >= repend) goto BAD;
    460       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
    461 
    462       group = -1;
    463       n = 0;
    464       inparens = FALSE;
    465       star = FALSE;
    466 
    467       if (next == CHAR_LEFT_CURLY_BRACKET)
    468         {
    469         if (++ptr >= repend) goto BAD;
    470         next = *ptr;
    471         inparens = TRUE;
    472         }
    473 
    474       if (next == CHAR_ASTERISK)
    475         {
    476         if (++ptr >= repend) goto BAD;
    477         next = *ptr;
    478         star = TRUE;
    479         }
    480 
    481       if (!star && next >= CHAR_0 && next <= CHAR_9)
    482         {
    483         group = next - CHAR_0;
    484         while (++ptr < repend)
    485           {
    486           next = *ptr;
    487           if (next < CHAR_0 || next > CHAR_9) break;
    488           group = group * 10 + next - CHAR_0;
    489 
    490           /* A check for a number greater than the hightest captured group
    491           is sufficient here; no need for a separate overflow check. If unknown
    492           groups are to be treated as unset, just skip over any remaining
    493           digits and carry on. */
    494 
    495           if (group > code->top_bracket)
    496             {
    497             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
    498               {
    499               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
    500               break;
    501               }
    502             else
    503               {
    504               rc = PCRE2_ERROR_NOSUBSTRING;
    505               goto PTREXIT;
    506               }
    507             }
    508           }
    509         }
    510       else
    511         {
    512         const uint8_t *ctypes = code->tables + ctypes_offset;
    513         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
    514           {
    515           name[n++] = next;
    516           if (n > 32) goto BAD;
    517           if (++ptr >= repend) break;
    518           next = *ptr;
    519           }
    520         if (n == 0) goto BAD;
    521         name[n] = 0;
    522         }
    523 
    524       /* In extended mode we recognize ${name:+set text:unset text} and
    525       ${name:-default text}. */
    526 
    527       if (inparens)
    528         {
    529         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
    530              !star && ptr < repend - 2 && next == CHAR_COLON)
    531           {
    532           special = *(++ptr);
    533           if (special != CHAR_PLUS && special != CHAR_MINUS)
    534             {
    535             rc = PCRE2_ERROR_BADSUBSTITUTION;
    536             goto PTREXIT;
    537             }
    538 
    539           text1_start = ++ptr;
    540           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
    541           if (rc != 0) goto PTREXIT;
    542           text1_end = ptr;
    543 
    544           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
    545             {
    546             text2_start = ++ptr;
    547             rc = find_text_end(code, &ptr, repend, TRUE);
    548             if (rc != 0) goto PTREXIT;
    549             text2_end = ptr;
    550             }
    551           }
    552 
    553         else
    554           {
    555           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
    556             {
    557             rc = PCRE2_ERROR_REPMISSINGBRACE;
    558             goto PTREXIT;
    559             }
    560           }
    561 
    562         ptr++;
    563         }
    564 
    565       /* Have found a syntactically correct group number or name, or *name.
    566       Only *MARK is currently recognized. */
    567 
    568       if (star)
    569         {
    570         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
    571           {
    572           PCRE2_SPTR mark = pcre2_get_mark(match_data);
    573           if (mark != NULL)
    574             {
    575             PCRE2_SPTR mark_start = mark;
    576             while (*mark != 0) mark++;
    577             fraglength = mark - mark_start;
    578             CHECKMEMCPY(mark_start, fraglength);
    579             }
    580           }
    581         else goto BAD;
    582         }
    583 
    584       /* Substitute the contents of a group. We don't use substring_copy
    585       functions any more, in order to support case forcing. */
    586 
    587       else
    588         {
    589         PCRE2_SPTR subptr, subptrend;
    590 
    591         /* Find a number for a named group. In case there are duplicate names,
    592         search for the first one that is set. If the name is not found when
    593         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
    594         non-existent group. */
    595 
    596         if (group < 0)
    597           {
    598           PCRE2_SPTR first, last, entry;
    599           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
    600           if (rc == PCRE2_ERROR_NOSUBSTRING &&
    601               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
    602             {
    603             group = code->top_bracket + 1;
    604             }
    605           else
    606             {
    607             if (rc < 0) goto PTREXIT;
    608             for (entry = first; entry <= last; entry += rc)
    609               {
    610               uint32_t ng = GET2(entry, 0);
    611               if (ng < ovector_count)
    612                 {
    613                 if (group < 0) group = ng;          /* First in ovector */
    614                 if (ovector[ng*2] != PCRE2_UNSET)
    615                   {
    616                   group = ng;                       /* First that is set */
    617                   break;
    618                   }
    619                 }
    620               }
    621 
    622             /* If group is still negative, it means we did not find a group
    623             that is in the ovector. Just set the first group. */
    624 
    625             if (group < 0) group = GET2(first, 0);
    626             }
    627           }
    628 
    629         /* We now have a group that is identified by number. Find the length of
    630         the captured string. If a group in a non-special substitution is unset
    631         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
    632 
    633         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
    634         if (rc < 0)
    635           {
    636           if (rc == PCRE2_ERROR_NOSUBSTRING &&
    637               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
    638             {
    639             rc = PCRE2_ERROR_UNSET;
    640             }
    641           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
    642           if (special == 0)                           /* Plain substitution */
    643             {
    644             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
    645             goto PTREXIT;                             /* Else error */
    646             }
    647           }
    648 
    649         /* If special is '+' we have a 'set' and possibly an 'unset' text,
    650         both of which are reprocessed when used. If special is '-' we have a
    651         default text for when the group is unset; it must be reprocessed. */
    652 
    653         if (special != 0)
    654           {
    655           if (special == CHAR_MINUS)
    656             {
    657             if (rc == 0) goto LITERAL_SUBSTITUTE;
    658             text2_start = text1_start;
    659             text2_end = text1_end;
    660             }
    661 
    662           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
    663           ptrstack[ptrstackptr++] = ptr;
    664           ptrstack[ptrstackptr++] = repend;
    665 
    666           if (rc == 0)
    667             {
    668             ptr = text1_start;
    669             repend = text1_end;
    670             }
    671           else
    672             {
    673             ptr = text2_start;
    674             repend = text2_end;
    675             }
    676           continue;
    677           }
    678 
    679         /* Otherwise we have a literal substitution of a group's contents. */
    680 
    681         LITERAL_SUBSTITUTE:
    682         subptr = subject + ovector[group*2];
    683         subptrend = subject + ovector[group*2 + 1];
    684 
    685         /* Substitute a literal string, possibly forcing alphabetic case. */
    686 
    687         while (subptr < subptrend)
    688           {
    689           GETCHARINCTEST(ch, subptr);
    690           if (forcecase != 0)
    691             {
    692 #ifdef SUPPORT_UNICODE
    693             if (utf)
    694               {
    695               uint32_t type = UCD_CHARTYPE(ch);
    696               if (PRIV(ucp_gentype)[type] == ucp_L &&
    697                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
    698                 ch = UCD_OTHERCASE(ch);
    699               }
    700             else
    701 #endif
    702               {
    703               if (((code->tables + cbits_offset +
    704                   ((forcecase > 0)? cbit_upper:cbit_lower)
    705                   )[ch/8] & (1 << (ch%8))) == 0)
    706                 ch = (code->tables + fcc_offset)[ch];
    707               }
    708             forcecase = forcecasereset;
    709             }
    710 
    711 #ifdef SUPPORT_UNICODE
    712           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
    713 #endif
    714             {
    715             temp[0] = ch;
    716             chlen = 1;
    717             }
    718           CHECKMEMCPY(temp, chlen);
    719           }
    720         }
    721       }
    722 
    723     /* Handle an escape sequence in extended mode. We can use check_escape()
    724     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
    725     the case-forcing escapes are not supported in pcre2_compile() so must be
    726     recognized here. */
    727 
    728     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
    729               *ptr == CHAR_BACKSLASH)
    730       {
    731       int errorcode;
    732 
    733       if (ptr < repend - 1) switch (ptr[1])
    734         {
    735         case CHAR_L:
    736         forcecase = forcecasereset = -1;
    737         ptr += 2;
    738         continue;
    739 
    740         case CHAR_l:
    741         forcecase = -1;
    742         forcecasereset = 0;
    743         ptr += 2;
    744         continue;
    745 
    746         case CHAR_U:
    747         forcecase = forcecasereset = 1;
    748         ptr += 2;
    749         continue;
    750 
    751         case CHAR_u:
    752         forcecase = 1;
    753         forcecasereset = 0;
    754         ptr += 2;
    755         continue;
    756 
    757         default:
    758         break;
    759         }
    760 
    761       ptr++;  /* Point after \ */
    762       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
    763         code->overall_options, FALSE, NULL);
    764       if (errorcode != 0) goto BADESCAPE;
    765 
    766       switch(rc)
    767         {
    768         case ESC_E:
    769         forcecase = forcecasereset = 0;
    770         continue;
    771 
    772         case ESC_Q:
    773         literal = TRUE;
    774         continue;
    775 
    776         case 0:      /* Data character */
    777         goto LITERAL;
    778 
    779         default:
    780         goto BADESCAPE;
    781         }
    782       }
    783 
    784     /* Handle a literal code unit */
    785 
    786     else
    787       {
    788       LOADLITERAL:
    789       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
    790 
    791       LITERAL:
    792       if (forcecase != 0)
    793         {
    794 #ifdef SUPPORT_UNICODE
    795         if (utf)
    796           {
    797           uint32_t type = UCD_CHARTYPE(ch);
    798           if (PRIV(ucp_gentype)[type] == ucp_L &&
    799               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
    800             ch = UCD_OTHERCASE(ch);
    801           }
    802         else
    803 #endif
    804           {
    805           if (((code->tables + cbits_offset +
    806               ((forcecase > 0)? cbit_upper:cbit_lower)
    807               )[ch/8] & (1 << (ch%8))) == 0)
    808             ch = (code->tables + fcc_offset)[ch];
    809           }
    810         forcecase = forcecasereset;
    811         }
    812 
    813 #ifdef SUPPORT_UNICODE
    814       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
    815 #endif
    816         {
    817         temp[0] = ch;
    818         chlen = 1;
    819         }
    820       CHECKMEMCPY(temp, chlen);
    821       } /* End handling a literal code unit */
    822     }   /* End of loop for scanning the replacement. */
    823 
    824   /* The replacement has been copied to the output. Save the details of this
    825   match. See above for how this data is used. If we matched an empty string, do
    826   the magic for global matches. Finally, update the start offset to point to
    827   the rest of the subject string. */
    828 
    829   ovecsave[0] = ovector[0];
    830   ovecsave[1] = ovector[1];
    831   ovecsave[2] = start_offset;
    832 
    833   goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
    834     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
    835   start_offset = ovector[1];
    836   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
    837 
    838 /* Copy the rest of the subject. */
    839 
    840 fraglength = length - start_offset;
    841 CHECKMEMCPY(subject + start_offset, fraglength);
    842 temp[0] = 0;
    843 CHECKMEMCPY(temp , 1);
    844 
    845 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
    846 and matching has carried on after a full buffer, in order to compute the length
    847 needed. Otherwise, an overflow generates an immediate error return. */
    848 
    849 if (overflowed)
    850   {
    851   rc = PCRE2_ERROR_NOMEMORY;
    852   *blength = buff_length + extra_needed;
    853   }
    854 
    855 /* After a successful execution, return the number of substitutions and set the
    856 length of buffer used, excluding the trailing zero. */
    857 
    858 else
    859   {
    860   rc = subs;
    861   *blength = buff_offset - 1;
    862   }
    863 
    864 EXIT:
    865 if (match_data_created) pcre2_match_data_free(match_data);
    866   else match_data->rc = rc;
    867 return rc;
    868 
    869 NOROOM:
    870 rc = PCRE2_ERROR_NOMEMORY;
    871 goto EXIT;
    872 
    873 BAD:
    874 rc = PCRE2_ERROR_BADREPLACEMENT;
    875 goto PTREXIT;
    876 
    877 BADESCAPE:
    878 rc = PCRE2_ERROR_BADREPESCAPE;
    879 
    880 PTREXIT:
    881 *blength = (PCRE2_SIZE)(ptr - replacement);
    882 goto EXIT;
    883 }
    884 
    885 /* End of pcre2_substitute.c */
    886