Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifdef HAVE_CONFIG_H
     43 #include "config.h"
     44 #endif
     45 
     46 #include "pcre2_internal.h"
     47 
     48 #define PTR_STACK_SIZE 20
     49 
     50 #define SUBSTITUTE_OPTIONS \
     51   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
     52    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
     53    PCRE2_SUBSTITUTE_UNSET_EMPTY)
     54 
     55 
     56 
     57 /*************************************************
     58 *           Find end of substitute text          *
     59 *************************************************/
     60 
     61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
     62 constructions. This requires the identification of unescaped : and }
     63 characters. This function scans for such. It must deal with nested ${
     64 constructions. The pointer to the text is updated, either to the required end
     65 character, or to where an error was detected.
     66 
     67 Arguments:
     68   code      points to the compiled expression (for options)
     69   ptrptr    points to the pointer to the start of the text (updated)
     70   ptrend    end of the whole string
     71   last      TRUE if the last expected string (only } recognized)
     72 
     73 Returns:    0 on success
     74             negative error code on failure
     75 */
     76 
     77 static int
     78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
     79   BOOL last)
     80 {
     81 int rc = 0;
     82 uint32_t nestlevel = 0;
     83 BOOL literal = FALSE;
     84 PCRE2_SPTR ptr = *ptrptr;
     85 
     86 for (; ptr < ptrend; ptr++)
     87   {
     88   if (literal)
     89     {
     90     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
     91       {
     92       literal = FALSE;
     93       ptr += 1;
     94       }
     95     }
     96 
     97   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
     98     {
     99     if (nestlevel == 0) goto EXIT;
    100     nestlevel--;
    101     }
    102 
    103   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
    104 
    105   else if (*ptr == CHAR_DOLLAR_SIGN)
    106     {
    107     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
    108       {
    109       nestlevel++;
    110       ptr += 1;
    111       }
    112     }
    113 
    114   else if (*ptr == CHAR_BACKSLASH)
    115     {
    116     int erc;
    117     int errorcode = 0;
    118     uint32_t ch;
    119 
    120     if (ptr < ptrend - 1) switch (ptr[1])
    121       {
    122       case CHAR_L:
    123       case CHAR_l:
    124       case CHAR_U:
    125       case CHAR_u:
    126       ptr += 1;
    127       continue;
    128       }
    129 
    130     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
    131       code->overall_options, FALSE, NULL);
    132     if (errorcode != 0)
    133       {
    134       rc = errorcode;
    135       goto EXIT;
    136       }
    137 
    138     switch(erc)
    139       {
    140       case 0:      /* Data character */
    141       case ESC_E:  /* Isolated \E is ignored */
    142       break;
    143 
    144       case ESC_Q:
    145       literal = TRUE;
    146       break;
    147 
    148       default:
    149       rc = PCRE2_ERROR_BADREPESCAPE;
    150       goto EXIT;
    151       }
    152     }
    153   }
    154 
    155 rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
    156 
    157 EXIT:
    158 *ptrptr = ptr;
    159 return rc;
    160 }
    161 
    162 
    163 
    164 /*************************************************
    165 *              Match and substitute              *
    166 *************************************************/
    167 
    168 /* This function applies a compiled re to a subject string and creates a new
    169 string with substitutions. The first 7 arguments are the same as for
    170 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
    171 
    172 Arguments:
    173   code            points to the compiled expression
    174   subject         points to the subject string
    175   length          length of subject string (may contain binary zeros)
    176   start_offset    where to start in the subject string
    177   options         option bits
    178   match_data      points to a match_data block, or is NULL
    179   context         points a PCRE2 context
    180   replacement     points to the replacement string
    181   rlength         length of replacement string
    182   buffer          where to put the substituted string
    183   blength         points to length of buffer; updated to length of string
    184 
    185 Returns:          >= 0 number of substitutions made
    186                   < 0 an error code
    187                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
    188 */
    189 
    190 /* This macro checks for space in the buffer before copying into it. On
    191 overflow, either give an error immediately, or keep on, accumulating the
    192 length. */
    193 
    194 #define CHECKMEMCPY(from,length) \
    195   if (!overflowed && lengthleft < length) \
    196     { \
    197     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
    198     overflowed = TRUE; \
    199     extra_needed = length - lengthleft; \
    200     } \
    201   else if (overflowed) \
    202     { \
    203     extra_needed += length; \
    204     }  \
    205   else \
    206     {  \
    207     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
    208     buff_offset += length; \
    209     lengthleft -= length; \
    210     }
    211 
    212 /* Here's the function */
    213 
    214 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
    215 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
    216   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
    217   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
    218   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
    219 {
    220 int rc;
    221 int subs;
    222 int forcecase = 0;
    223 int forcecasereset = 0;
    224 uint32_t ovector_count;
    225 uint32_t goptions = 0;
    226 uint32_t suboptions;
    227 BOOL match_data_created = FALSE;
    228 BOOL literal = FALSE;
    229 BOOL overflowed = FALSE;
    230 #ifdef SUPPORT_UNICODE
    231 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
    232 #endif
    233 PCRE2_UCHAR temp[6];
    234 PCRE2_SPTR ptr;
    235 PCRE2_SPTR repend;
    236 PCRE2_SIZE extra_needed = 0;
    237 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
    238 PCRE2_SIZE *ovector;
    239 
    240 buff_offset = 0;
    241 lengthleft = buff_length = *blength;
    242 *blength = PCRE2_UNSET;
    243 
    244 /* Partial matching is not valid. */
    245 
    246 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
    247   return PCRE2_ERROR_BADOPTION;
    248 
    249 /* If no match data block is provided, create one. */
    250 
    251 if (match_data == NULL)
    252   {
    253   pcre2_general_context *gcontext = (mcontext == NULL)?
    254     (pcre2_general_context *)code :
    255     (pcre2_general_context *)mcontext;
    256   match_data = pcre2_match_data_create_from_pattern(code, gcontext);
    257   if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
    258   match_data_created = TRUE;
    259   }
    260 ovector = pcre2_get_ovector_pointer(match_data);
    261 ovector_count = pcre2_get_ovector_count(match_data);
    262 
    263 /* Find lengths of zero-terminated strings and the end of the replacement. */
    264 
    265 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
    266 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
    267 repend = replacement + rlength;
    268 
    269 /* Check UTF replacement string if necessary. */
    270 
    271 #ifdef SUPPORT_UNICODE
    272 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
    273   {
    274   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
    275   if (rc != 0)
    276     {
    277     match_data->leftchar = 0;
    278     goto EXIT;
    279     }
    280   }
    281 #endif  /* SUPPORT_UNICODE */
    282 
    283 /* Save the substitute options and remove them from the match options. */
    284 
    285 suboptions = options & SUBSTITUTE_OPTIONS;
    286 options &= ~SUBSTITUTE_OPTIONS;
    287 
    288 /* Copy up to the start offset */
    289 
    290 CHECKMEMCPY(subject, start_offset);
    291 
    292 /* Loop for global substituting. */
    293 
    294 subs = 0;
    295 do
    296   {
    297   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
    298   uint32_t ptrstackptr = 0;
    299 
    300   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
    301     match_data, mcontext);
    302 
    303 #ifdef SUPPORT_UNICODE
    304   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
    305 #endif
    306 
    307   /* Any error other than no match returns the error code. No match when not
    308   doing the special after-empty-match global rematch, or when at the end of the
    309   subject, breaks the global loop. Otherwise, advance the starting point by one
    310   character, copying it to the output, and try again. */
    311 
    312   if (rc < 0)
    313     {
    314     PCRE2_SIZE save_start;
    315 
    316     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
    317     if (goptions == 0 || start_offset >= length) break;
    318 
    319     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
    320     we have advanced into the middle of it, advance one more code point. In
    321     other words, do not start in the middle of CRLF, even if CR and LF on their
    322     own are valid newlines. */
    323 
    324     save_start = start_offset++;
    325     if (subject[start_offset-1] == CHAR_CR &&
    326         code->newline_convention != PCRE2_NEWLINE_CR &&
    327         code->newline_convention != PCRE2_NEWLINE_LF &&
    328         start_offset < length &&
    329         subject[start_offset] == CHAR_LF)
    330       start_offset++;
    331 
    332     /* Otherwise, in UTF mode, advance past any secondary code points. */
    333 
    334     else if ((code->overall_options & PCRE2_UTF) != 0)
    335       {
    336 #if PCRE2_CODE_UNIT_WIDTH == 8
    337       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
    338         start_offset++;
    339 #elif PCRE2_CODE_UNIT_WIDTH == 16
    340       while (start_offset < length &&
    341             (subject[start_offset] & 0xfc00) == 0xdc00)
    342         start_offset++;
    343 #endif
    344       }
    345 
    346     /* Copy what we have advanced past, reset the special global options, and
    347     continue to the next match. */
    348 
    349     fraglength = start_offset - save_start;
    350     CHECKMEMCPY(subject + save_start, fraglength);
    351     goptions = 0;
    352     continue;
    353     }
    354 
    355   /* Handle a successful match. Matches that use \K to end before they start
    356   are not supported. */
    357 
    358   if (ovector[1] < ovector[0])
    359     {
    360     rc = PCRE2_ERROR_BADSUBSPATTERN;
    361     goto EXIT;
    362     }
    363 
    364   /* Count substitutions with a paranoid check for integer overflow; surely no
    365   real call to this function would ever hit this! */
    366 
    367   if (subs == INT_MAX)
    368     {
    369     rc = PCRE2_ERROR_TOOMANYREPLACE;
    370     goto EXIT;
    371     }
    372   subs++;
    373 
    374   /* Copy the text leading up to the match. */
    375 
    376   if (rc == 0) rc = ovector_count;
    377   fraglength = ovector[0] - start_offset;
    378   CHECKMEMCPY(subject + start_offset, fraglength);
    379 
    380   /* Process the replacement string. Literal mode is set by \Q, but only in
    381   extended mode when backslashes are being interpreted. In extended mode we
    382   must handle nested substrings that are to be reprocessed. */
    383 
    384   ptr = replacement;
    385   for (;;)
    386     {
    387     uint32_t ch;
    388     unsigned int chlen;
    389 
    390     /* If at the end of a nested substring, pop the stack. */
    391 
    392     if (ptr >= repend)
    393       {
    394       if (ptrstackptr <= 0) break;       /* End of replacement string */
    395       repend = ptrstack[--ptrstackptr];
    396       ptr = ptrstack[--ptrstackptr];
    397       continue;
    398       }
    399 
    400     /* Handle the next character */
    401 
    402     if (literal)
    403       {
    404       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
    405         {
    406         literal = FALSE;
    407         ptr += 2;
    408         continue;
    409         }
    410       goto LOADLITERAL;
    411       }
    412 
    413     /* Not in literal mode. */
    414 
    415     if (*ptr == CHAR_DOLLAR_SIGN)
    416       {
    417       int group, n;
    418       uint32_t special = 0;
    419       BOOL inparens;
    420       BOOL star;
    421       PCRE2_SIZE sublength;
    422       PCRE2_SPTR text1_start = NULL;
    423       PCRE2_SPTR text1_end = NULL;
    424       PCRE2_SPTR text2_start = NULL;
    425       PCRE2_SPTR text2_end = NULL;
    426       PCRE2_UCHAR next;
    427       PCRE2_UCHAR name[33];
    428 
    429       if (++ptr >= repend) goto BAD;
    430       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
    431 
    432       group = -1;
    433       n = 0;
    434       inparens = FALSE;
    435       star = FALSE;
    436 
    437       if (next == CHAR_LEFT_CURLY_BRACKET)
    438         {
    439         if (++ptr >= repend) goto BAD;
    440         next = *ptr;
    441         inparens = TRUE;
    442         }
    443 
    444       if (next == CHAR_ASTERISK)
    445         {
    446         if (++ptr >= repend) goto BAD;
    447         next = *ptr;
    448         star = TRUE;
    449         }
    450 
    451       if (!star && next >= CHAR_0 && next <= CHAR_9)
    452         {
    453         group = next - CHAR_0;
    454         while (++ptr < repend)
    455           {
    456           next = *ptr;
    457           if (next < CHAR_0 || next > CHAR_9) break;
    458           group = group * 10 + next - CHAR_0;
    459 
    460           /* A check for a number greater than the hightest captured group
    461           is sufficient here; no need for a separate overflow check. If unknown
    462           groups are to be treated as unset, just skip over any remaining
    463           digits and carry on. */
    464 
    465           if (group > code->top_bracket)
    466             {
    467             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
    468               {
    469               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
    470               break;
    471               }
    472             else
    473               {
    474               rc = PCRE2_ERROR_NOSUBSTRING;
    475               goto PTREXIT;
    476               }
    477             }
    478           }
    479         }
    480       else
    481         {
    482         const uint8_t *ctypes = code->tables + ctypes_offset;
    483         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
    484           {
    485           name[n++] = next;
    486           if (n > 32) goto BAD;
    487           if (++ptr >= repend) break;
    488           next = *ptr;
    489           }
    490         if (n == 0) goto BAD;
    491         name[n] = 0;
    492         }
    493 
    494       /* In extended mode we recognize ${name:+set text:unset text} and
    495       ${name:-default text}. */
    496 
    497       if (inparens)
    498         {
    499         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
    500              !star && ptr < repend - 2 && next == CHAR_COLON)
    501           {
    502           special = *(++ptr);
    503           if (special != CHAR_PLUS && special != CHAR_MINUS)
    504             {
    505             rc = PCRE2_ERROR_BADSUBSTITUTION;
    506             goto PTREXIT;
    507             }
    508 
    509           text1_start = ++ptr;
    510           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
    511           if (rc != 0) goto PTREXIT;
    512           text1_end = ptr;
    513 
    514           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
    515             {
    516             text2_start = ++ptr;
    517             rc = find_text_end(code, &ptr, repend, TRUE);
    518             if (rc != 0) goto PTREXIT;
    519             text2_end = ptr;
    520             }
    521           }
    522 
    523         else
    524           {
    525           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
    526             {
    527             rc = PCRE2_ERROR_REPMISSINGBRACE;
    528             goto PTREXIT;
    529             }
    530           }
    531 
    532         ptr++;
    533         }
    534 
    535       /* Have found a syntactically correct group number or name, or *name.
    536       Only *MARK is currently recognized. */
    537 
    538       if (star)
    539         {
    540         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
    541           {
    542           PCRE2_SPTR mark = pcre2_get_mark(match_data);
    543           if (mark != NULL)
    544             {
    545             PCRE2_SPTR mark_start = mark;
    546             while (*mark != 0) mark++;
    547             fraglength = mark - mark_start;
    548             CHECKMEMCPY(mark_start, fraglength);
    549             }
    550           }
    551         else goto BAD;
    552         }
    553 
    554       /* Substitute the contents of a group. We don't use substring_copy
    555       functions any more, in order to support case forcing. */
    556 
    557       else
    558         {
    559         PCRE2_SPTR subptr, subptrend;
    560 
    561         /* Find a number for a named group. In case there are duplicate names,
    562         search for the first one that is set. If the name is not found when
    563         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
    564         non-existent group. */
    565 
    566         if (group < 0)
    567           {
    568           PCRE2_SPTR first, last, entry;
    569           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
    570           if (rc == PCRE2_ERROR_NOSUBSTRING &&
    571               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
    572             {
    573             group = code->top_bracket + 1;
    574             }
    575           else
    576             {
    577             if (rc < 0) goto PTREXIT;
    578             for (entry = first; entry <= last; entry += rc)
    579               {
    580               uint32_t ng = GET2(entry, 0);
    581               if (ng < ovector_count)
    582                 {
    583                 if (group < 0) group = ng;          /* First in ovector */
    584                 if (ovector[ng*2] != PCRE2_UNSET)
    585                   {
    586                   group = ng;                       /* First that is set */
    587                   break;
    588                   }
    589                 }
    590               }
    591 
    592             /* If group is still negative, it means we did not find a group
    593             that is in the ovector. Just set the first group. */
    594 
    595             if (group < 0) group = GET2(first, 0);
    596             }
    597           }
    598 
    599         /* We now have a group that is identified by number. Find the length of
    600         the captured string. If a group in a non-special substitution is unset
    601         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
    602 
    603         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
    604         if (rc < 0)
    605           {
    606           if (rc == PCRE2_ERROR_NOSUBSTRING &&
    607               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
    608             {
    609             rc = PCRE2_ERROR_UNSET;
    610             }
    611           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
    612           if (special == 0)                           /* Plain substitution */
    613             {
    614             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
    615             goto PTREXIT;                             /* Else error */
    616             }
    617           }
    618 
    619         /* If special is '+' we have a 'set' and possibly an 'unset' text,
    620         both of which are reprocessed when used. If special is '-' we have a
    621         default text for when the group is unset; it must be reprocessed. */
    622 
    623         if (special != 0)
    624           {
    625           if (special == CHAR_MINUS)
    626             {
    627             if (rc == 0) goto LITERAL_SUBSTITUTE;
    628             text2_start = text1_start;
    629             text2_end = text1_end;
    630             }
    631 
    632           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
    633           ptrstack[ptrstackptr++] = ptr;
    634           ptrstack[ptrstackptr++] = repend;
    635 
    636           if (rc == 0)
    637             {
    638             ptr = text1_start;
    639             repend = text1_end;
    640             }
    641           else
    642             {
    643             ptr = text2_start;
    644             repend = text2_end;
    645             }
    646           continue;
    647           }
    648 
    649         /* Otherwise we have a literal substitution of a group's contents. */
    650 
    651         LITERAL_SUBSTITUTE:
    652         subptr = subject + ovector[group*2];
    653         subptrend = subject + ovector[group*2 + 1];
    654 
    655         /* Substitute a literal string, possibly forcing alphabetic case. */
    656 
    657         while (subptr < subptrend)
    658           {
    659           GETCHARINCTEST(ch, subptr);
    660           if (forcecase != 0)
    661             {
    662 #ifdef SUPPORT_UNICODE
    663             if (utf)
    664               {
    665               uint32_t type = UCD_CHARTYPE(ch);
    666               if (PRIV(ucp_gentype)[type] == ucp_L &&
    667                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
    668                 ch = UCD_OTHERCASE(ch);
    669               }
    670             else
    671 #endif
    672               {
    673               if (((code->tables + cbits_offset +
    674                   ((forcecase > 0)? cbit_upper:cbit_lower)
    675                   )[ch/8] & (1 << (ch%8))) == 0)
    676                 ch = (code->tables + fcc_offset)[ch];
    677               }
    678             forcecase = forcecasereset;
    679             }
    680 
    681 #ifdef SUPPORT_UNICODE
    682           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
    683 #endif
    684             {
    685             temp[0] = ch;
    686             chlen = 1;
    687             }
    688           CHECKMEMCPY(temp, chlen);
    689           }
    690         }
    691       }
    692 
    693     /* Handle an escape sequence in extended mode. We can use check_escape()
    694     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
    695     the case-forcing escapes are not supported in pcre2_compile() so must be
    696     recognized here. */
    697 
    698     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
    699               *ptr == CHAR_BACKSLASH)
    700       {
    701       int errorcode = 0;
    702 
    703       if (ptr < repend - 1) switch (ptr[1])
    704         {
    705         case CHAR_L:
    706         forcecase = forcecasereset = -1;
    707         ptr += 2;
    708         continue;
    709 
    710         case CHAR_l:
    711         forcecase = -1;
    712         forcecasereset = 0;
    713         ptr += 2;
    714         continue;
    715 
    716         case CHAR_U:
    717         forcecase = forcecasereset = 1;
    718         ptr += 2;
    719         continue;
    720 
    721         case CHAR_u:
    722         forcecase = 1;
    723         forcecasereset = 0;
    724         ptr += 2;
    725         continue;
    726 
    727         default:
    728         break;
    729         }
    730 
    731       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
    732         code->overall_options, FALSE, NULL);
    733       if (errorcode != 0) goto BADESCAPE;
    734       ptr++;
    735 
    736       switch(rc)
    737         {
    738         case ESC_E:
    739         forcecase = forcecasereset = 0;
    740         continue;
    741 
    742         case ESC_Q:
    743         literal = TRUE;
    744         continue;
    745 
    746         case 0:      /* Data character */
    747         goto LITERAL;
    748 
    749         default:
    750         goto BADESCAPE;
    751         }
    752       }
    753 
    754     /* Handle a literal code unit */
    755 
    756     else
    757       {
    758       LOADLITERAL:
    759       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
    760 
    761       LITERAL:
    762       if (forcecase != 0)
    763         {
    764 #ifdef SUPPORT_UNICODE
    765         if (utf)
    766           {
    767           uint32_t type = UCD_CHARTYPE(ch);
    768           if (PRIV(ucp_gentype)[type] == ucp_L &&
    769               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
    770             ch = UCD_OTHERCASE(ch);
    771           }
    772         else
    773 #endif
    774           {
    775           if (((code->tables + cbits_offset +
    776               ((forcecase > 0)? cbit_upper:cbit_lower)
    777               )[ch/8] & (1 << (ch%8))) == 0)
    778             ch = (code->tables + fcc_offset)[ch];
    779           }
    780         forcecase = forcecasereset;
    781         }
    782 
    783 #ifdef SUPPORT_UNICODE
    784       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
    785 #endif
    786         {
    787         temp[0] = ch;
    788         chlen = 1;
    789         }
    790       CHECKMEMCPY(temp, chlen);
    791       } /* End handling a literal code unit */
    792     }   /* End of loop for scanning the replacement. */
    793 
    794   /* The replacement has been copied to the output. Update the start offset to
    795   point to the rest of the subject string. If we matched an empty string,
    796   do the magic for global matches. */
    797 
    798   start_offset = ovector[1];
    799   goptions = (ovector[0] != ovector[1])? 0 :
    800     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
    801   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
    802 
    803 /* Copy the rest of the subject. */
    804 
    805 fraglength = length - start_offset;
    806 CHECKMEMCPY(subject + start_offset, fraglength);
    807 temp[0] = 0;
    808 CHECKMEMCPY(temp , 1);
    809 
    810 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
    811 and matching has carried on after a full buffer, in order to compute the length
    812 needed. Otherwise, an overflow generates an immediate error return. */
    813 
    814 if (overflowed)
    815   {
    816   rc = PCRE2_ERROR_NOMEMORY;
    817   *blength = buff_length + extra_needed;
    818   }
    819 
    820 /* After a successful execution, return the number of substitutions and set the
    821 length of buffer used, excluding the trailing zero. */
    822 
    823 else
    824   {
    825   rc = subs;
    826   *blength = buff_offset - 1;
    827   }
    828 
    829 EXIT:
    830 if (match_data_created) pcre2_match_data_free(match_data);
    831   else match_data->rc = rc;
    832 return rc;
    833 
    834 NOROOM:
    835 rc = PCRE2_ERROR_NOMEMORY;
    836 goto EXIT;
    837 
    838 BAD:
    839 rc = PCRE2_ERROR_BADREPLACEMENT;
    840 goto PTREXIT;
    841 
    842 BADESCAPE:
    843 rc = PCRE2_ERROR_BADREPESCAPE;
    844 
    845 PTREXIT:
    846 *blength = (PCRE2_SIZE)(ptr - replacement);
    847 goto EXIT;
    848 }
    849 
    850 /* End of pcre2_substitute.c */
    851