1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016-2018 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifdef HAVE_CONFIG_H 43 #include "config.h" 44 #endif 45 46 #include "pcre2_internal.h" 47 48 #define PTR_STACK_SIZE 20 49 50 #define SUBSTITUTE_OPTIONS \ 51 (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \ 52 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \ 53 PCRE2_SUBSTITUTE_UNSET_EMPTY) 54 55 56 57 /************************************************* 58 * Find end of substitute text * 59 *************************************************/ 60 61 /* In extended mode, we recognize ${name:+set text:unset text} and similar 62 constructions. This requires the identification of unescaped : and } 63 characters. This function scans for such. It must deal with nested ${ 64 constructions. The pointer to the text is updated, either to the required end 65 character, or to where an error was detected. 66 67 Arguments: 68 code points to the compiled expression (for options) 69 ptrptr points to the pointer to the start of the text (updated) 70 ptrend end of the whole string 71 last TRUE if the last expected string (only } recognized) 72 73 Returns: 0 on success 74 negative error code on failure 75 */ 76 77 static int 78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, 79 BOOL last) 80 { 81 int rc = 0; 82 uint32_t nestlevel = 0; 83 BOOL literal = FALSE; 84 PCRE2_SPTR ptr = *ptrptr; 85 86 for (; ptr < ptrend; ptr++) 87 { 88 if (literal) 89 { 90 if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E) 91 { 92 literal = FALSE; 93 ptr += 1; 94 } 95 } 96 97 else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 98 { 99 if (nestlevel == 0) goto EXIT; 100 nestlevel--; 101 } 102 103 else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT; 104 105 else if (*ptr == CHAR_DOLLAR_SIGN) 106 { 107 if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET) 108 { 109 nestlevel++; 110 ptr += 1; 111 } 112 } 113 114 else if (*ptr == CHAR_BACKSLASH) 115 { 116 int erc; 117 int errorcode; 118 uint32_t ch; 119 120 if (ptr < ptrend - 1) switch (ptr[1]) 121 { 122 case CHAR_L: 123 case CHAR_l: 124 case CHAR_U: 125 case CHAR_u: 126 ptr += 1; 127 continue; 128 } 129 130 ptr += 1; /* Must point after \ */ 131 erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, 132 code->overall_options, FALSE, NULL); 133 ptr -= 1; /* Back to last code unit of escape */ 134 if (errorcode != 0) 135 { 136 rc = errorcode; 137 goto EXIT; 138 } 139 140 switch(erc) 141 { 142 case 0: /* Data character */ 143 case ESC_E: /* Isolated \E is ignored */ 144 break; 145 146 case ESC_Q: 147 literal = TRUE; 148 break; 149 150 default: 151 rc = PCRE2_ERROR_BADREPESCAPE; 152 goto EXIT; 153 } 154 } 155 } 156 157 rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */ 158 159 EXIT: 160 *ptrptr = ptr; 161 return rc; 162 } 163 164 165 166 /************************************************* 167 * Match and substitute * 168 *************************************************/ 169 170 /* This function applies a compiled re to a subject string and creates a new 171 string with substitutions. The first 7 arguments are the same as for 172 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED. 173 174 Arguments: 175 code points to the compiled expression 176 subject points to the subject string 177 length length of subject string (may contain binary zeros) 178 start_offset where to start in the subject string 179 options option bits 180 match_data points to a match_data block, or is NULL 181 context points a PCRE2 context 182 replacement points to the replacement string 183 rlength length of replacement string 184 buffer where to put the substituted string 185 blength points to length of buffer; updated to length of string 186 187 Returns: >= 0 number of substitutions made 188 < 0 an error code 189 PCRE2_ERROR_BADREPLACEMENT means invalid use of $ 190 */ 191 192 /* This macro checks for space in the buffer before copying into it. On 193 overflow, either give an error immediately, or keep on, accumulating the 194 length. */ 195 196 #define CHECKMEMCPY(from,length) \ 197 if (!overflowed && lengthleft < length) \ 198 { \ 199 if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ 200 overflowed = TRUE; \ 201 extra_needed = length - lengthleft; \ 202 } \ 203 else if (overflowed) \ 204 { \ 205 extra_needed += length; \ 206 } \ 207 else \ 208 { \ 209 memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ 210 buff_offset += length; \ 211 lengthleft -= length; \ 212 } 213 214 /* Here's the function */ 215 216 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 217 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, 218 PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, 219 pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength, 220 PCRE2_UCHAR *buffer, PCRE2_SIZE *blength) 221 { 222 int rc; 223 int subs; 224 int forcecase = 0; 225 int forcecasereset = 0; 226 uint32_t ovector_count; 227 uint32_t goptions = 0; 228 uint32_t suboptions; 229 BOOL match_data_created = FALSE; 230 BOOL literal = FALSE; 231 BOOL overflowed = FALSE; 232 #ifdef SUPPORT_UNICODE 233 BOOL utf = (code->overall_options & PCRE2_UTF) != 0; 234 #endif 235 PCRE2_UCHAR temp[6]; 236 PCRE2_SPTR ptr; 237 PCRE2_SPTR repend; 238 PCRE2_SIZE extra_needed = 0; 239 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; 240 PCRE2_SIZE *ovector; 241 PCRE2_SIZE ovecsave[3]; 242 243 buff_offset = 0; 244 lengthleft = buff_length = *blength; 245 *blength = PCRE2_UNSET; 246 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; 247 248 /* Partial matching is not valid. */ 249 250 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) 251 return PCRE2_ERROR_BADOPTION; 252 253 /* If no match data block is provided, create one. */ 254 255 if (match_data == NULL) 256 { 257 pcre2_general_context *gcontext = (mcontext == NULL)? 258 (pcre2_general_context *)code : 259 (pcre2_general_context *)mcontext; 260 match_data = pcre2_match_data_create_from_pattern(code, gcontext); 261 if (match_data == NULL) return PCRE2_ERROR_NOMEMORY; 262 match_data_created = TRUE; 263 } 264 ovector = pcre2_get_ovector_pointer(match_data); 265 ovector_count = pcre2_get_ovector_count(match_data); 266 267 /* Find lengths of zero-terminated strings and the end of the replacement. */ 268 269 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); 270 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); 271 repend = replacement + rlength; 272 273 /* Check UTF replacement string if necessary. */ 274 275 #ifdef SUPPORT_UNICODE 276 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) 277 { 278 rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar)); 279 if (rc != 0) 280 { 281 match_data->leftchar = 0; 282 goto EXIT; 283 } 284 } 285 #endif /* SUPPORT_UNICODE */ 286 287 /* Save the substitute options and remove them from the match options. */ 288 289 suboptions = options & SUBSTITUTE_OPTIONS; 290 options &= ~SUBSTITUTE_OPTIONS; 291 292 /* Copy up to the start offset */ 293 294 if (start_offset > length) 295 { 296 match_data->leftchar = 0; 297 rc = PCRE2_ERROR_BADOFFSET; 298 goto EXIT; 299 } 300 CHECKMEMCPY(subject, start_offset); 301 302 /* Loop for global substituting. */ 303 304 subs = 0; 305 do 306 { 307 PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; 308 uint32_t ptrstackptr = 0; 309 310 rc = pcre2_match(code, subject, length, start_offset, options|goptions, 311 match_data, mcontext); 312 313 #ifdef SUPPORT_UNICODE 314 if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */ 315 #endif 316 317 /* Any error other than no match returns the error code. No match when not 318 doing the special after-empty-match global rematch, or when at the end of the 319 subject, breaks the global loop. Otherwise, advance the starting point by one 320 character, copying it to the output, and try again. */ 321 322 if (rc < 0) 323 { 324 PCRE2_SIZE save_start; 325 326 if (rc != PCRE2_ERROR_NOMATCH) goto EXIT; 327 if (goptions == 0 || start_offset >= length) break; 328 329 /* Advance by one code point. Then, if CRLF is a valid newline sequence and 330 we have advanced into the middle of it, advance one more code point. In 331 other words, do not start in the middle of CRLF, even if CR and LF on their 332 own are valid newlines. */ 333 334 save_start = start_offset++; 335 if (subject[start_offset-1] == CHAR_CR && 336 code->newline_convention != PCRE2_NEWLINE_CR && 337 code->newline_convention != PCRE2_NEWLINE_LF && 338 start_offset < length && 339 subject[start_offset] == CHAR_LF) 340 start_offset++; 341 342 /* Otherwise, in UTF mode, advance past any secondary code points. */ 343 344 else if ((code->overall_options & PCRE2_UTF) != 0) 345 { 346 #if PCRE2_CODE_UNIT_WIDTH == 8 347 while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80) 348 start_offset++; 349 #elif PCRE2_CODE_UNIT_WIDTH == 16 350 while (start_offset < length && 351 (subject[start_offset] & 0xfc00) == 0xdc00) 352 start_offset++; 353 #endif 354 } 355 356 /* Copy what we have advanced past, reset the special global options, and 357 continue to the next match. */ 358 359 fraglength = start_offset - save_start; 360 CHECKMEMCPY(subject + save_start, fraglength); 361 goptions = 0; 362 continue; 363 } 364 365 /* Handle a successful match. Matches that use \K to end before they start 366 or start before the current point in the subject are not supported. */ 367 368 if (ovector[1] < ovector[0] || ovector[0] < start_offset) 369 { 370 rc = PCRE2_ERROR_BADSUBSPATTERN; 371 goto EXIT; 372 } 373 374 /* Check for the same match as previous. This is legitimate after matching an 375 empty string that starts after the initial match offset. We have tried again 376 at the match point in case the pattern is one like /(?<=\G.)/ which can never 377 match at its starting point, so running the match achieves the bumpalong. If 378 we do get the same (null) match at the original match point, it isn't such a 379 pattern, so we now do the empty string magic. In all other cases, a repeat 380 match should never occur. */ 381 382 if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) 383 { 384 if (ovector[0] == ovector[1] && ovecsave[2] != start_offset) 385 { 386 goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; 387 ovecsave[2] = start_offset; 388 continue; /* Back to the top of the loop */ 389 } 390 rc = PCRE2_ERROR_INTERNAL_DUPMATCH; 391 goto EXIT; 392 } 393 394 /* Count substitutions with a paranoid check for integer overflow; surely no 395 real call to this function would ever hit this! */ 396 397 if (subs == INT_MAX) 398 { 399 rc = PCRE2_ERROR_TOOMANYREPLACE; 400 goto EXIT; 401 } 402 subs++; 403 404 /* Copy the text leading up to the match. */ 405 406 if (rc == 0) rc = ovector_count; 407 fraglength = ovector[0] - start_offset; 408 CHECKMEMCPY(subject + start_offset, fraglength); 409 410 /* Process the replacement string. Literal mode is set by \Q, but only in 411 extended mode when backslashes are being interpreted. In extended mode we 412 must handle nested substrings that are to be reprocessed. */ 413 414 ptr = replacement; 415 for (;;) 416 { 417 uint32_t ch; 418 unsigned int chlen; 419 420 /* If at the end of a nested substring, pop the stack. */ 421 422 if (ptr >= repend) 423 { 424 if (ptrstackptr <= 0) break; /* End of replacement string */ 425 repend = ptrstack[--ptrstackptr]; 426 ptr = ptrstack[--ptrstackptr]; 427 continue; 428 } 429 430 /* Handle the next character */ 431 432 if (literal) 433 { 434 if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E) 435 { 436 literal = FALSE; 437 ptr += 2; 438 continue; 439 } 440 goto LOADLITERAL; 441 } 442 443 /* Not in literal mode. */ 444 445 if (*ptr == CHAR_DOLLAR_SIGN) 446 { 447 int group, n; 448 uint32_t special = 0; 449 BOOL inparens; 450 BOOL star; 451 PCRE2_SIZE sublength; 452 PCRE2_SPTR text1_start = NULL; 453 PCRE2_SPTR text1_end = NULL; 454 PCRE2_SPTR text2_start = NULL; 455 PCRE2_SPTR text2_end = NULL; 456 PCRE2_UCHAR next; 457 PCRE2_UCHAR name[33]; 458 459 if (++ptr >= repend) goto BAD; 460 if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; 461 462 group = -1; 463 n = 0; 464 inparens = FALSE; 465 star = FALSE; 466 467 if (next == CHAR_LEFT_CURLY_BRACKET) 468 { 469 if (++ptr >= repend) goto BAD; 470 next = *ptr; 471 inparens = TRUE; 472 } 473 474 if (next == CHAR_ASTERISK) 475 { 476 if (++ptr >= repend) goto BAD; 477 next = *ptr; 478 star = TRUE; 479 } 480 481 if (!star && next >= CHAR_0 && next <= CHAR_9) 482 { 483 group = next - CHAR_0; 484 while (++ptr < repend) 485 { 486 next = *ptr; 487 if (next < CHAR_0 || next > CHAR_9) break; 488 group = group * 10 + next - CHAR_0; 489 490 /* A check for a number greater than the hightest captured group 491 is sufficient here; no need for a separate overflow check. If unknown 492 groups are to be treated as unset, just skip over any remaining 493 digits and carry on. */ 494 495 if (group > code->top_bracket) 496 { 497 if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 498 { 499 while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9); 500 break; 501 } 502 else 503 { 504 rc = PCRE2_ERROR_NOSUBSTRING; 505 goto PTREXIT; 506 } 507 } 508 } 509 } 510 else 511 { 512 const uint8_t *ctypes = code->tables + ctypes_offset; 513 while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) 514 { 515 name[n++] = next; 516 if (n > 32) goto BAD; 517 if (++ptr >= repend) break; 518 next = *ptr; 519 } 520 if (n == 0) goto BAD; 521 name[n] = 0; 522 } 523 524 /* In extended mode we recognize ${name:+set text:unset text} and 525 ${name:-default text}. */ 526 527 if (inparens) 528 { 529 if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && 530 !star && ptr < repend - 2 && next == CHAR_COLON) 531 { 532 special = *(++ptr); 533 if (special != CHAR_PLUS && special != CHAR_MINUS) 534 { 535 rc = PCRE2_ERROR_BADSUBSTITUTION; 536 goto PTREXIT; 537 } 538 539 text1_start = ++ptr; 540 rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS); 541 if (rc != 0) goto PTREXIT; 542 text1_end = ptr; 543 544 if (special == CHAR_PLUS && *ptr == CHAR_COLON) 545 { 546 text2_start = ++ptr; 547 rc = find_text_end(code, &ptr, repend, TRUE); 548 if (rc != 0) goto PTREXIT; 549 text2_end = ptr; 550 } 551 } 552 553 else 554 { 555 if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET) 556 { 557 rc = PCRE2_ERROR_REPMISSINGBRACE; 558 goto PTREXIT; 559 } 560 } 561 562 ptr++; 563 } 564 565 /* Have found a syntactically correct group number or name, or *name. 566 Only *MARK is currently recognized. */ 567 568 if (star) 569 { 570 if (PRIV(strcmp_c8)(name, STRING_MARK) == 0) 571 { 572 PCRE2_SPTR mark = pcre2_get_mark(match_data); 573 if (mark != NULL) 574 { 575 PCRE2_SPTR mark_start = mark; 576 while (*mark != 0) mark++; 577 fraglength = mark - mark_start; 578 CHECKMEMCPY(mark_start, fraglength); 579 } 580 } 581 else goto BAD; 582 } 583 584 /* Substitute the contents of a group. We don't use substring_copy 585 functions any more, in order to support case forcing. */ 586 587 else 588 { 589 PCRE2_SPTR subptr, subptrend; 590 591 /* Find a number for a named group. In case there are duplicate names, 592 search for the first one that is set. If the name is not found when 593 PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a 594 non-existent group. */ 595 596 if (group < 0) 597 { 598 PCRE2_SPTR first, last, entry; 599 rc = pcre2_substring_nametable_scan(code, name, &first, &last); 600 if (rc == PCRE2_ERROR_NOSUBSTRING && 601 (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 602 { 603 group = code->top_bracket + 1; 604 } 605 else 606 { 607 if (rc < 0) goto PTREXIT; 608 for (entry = first; entry <= last; entry += rc) 609 { 610 uint32_t ng = GET2(entry, 0); 611 if (ng < ovector_count) 612 { 613 if (group < 0) group = ng; /* First in ovector */ 614 if (ovector[ng*2] != PCRE2_UNSET) 615 { 616 group = ng; /* First that is set */ 617 break; 618 } 619 } 620 } 621 622 /* If group is still negative, it means we did not find a group 623 that is in the ovector. Just set the first group. */ 624 625 if (group < 0) group = GET2(first, 0); 626 } 627 } 628 629 /* We now have a group that is identified by number. Find the length of 630 the captured string. If a group in a non-special substitution is unset 631 when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */ 632 633 rc = pcre2_substring_length_bynumber(match_data, group, &sublength); 634 if (rc < 0) 635 { 636 if (rc == PCRE2_ERROR_NOSUBSTRING && 637 (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 638 { 639 rc = PCRE2_ERROR_UNSET; 640 } 641 if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */ 642 if (special == 0) /* Plain substitution */ 643 { 644 if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue; 645 goto PTREXIT; /* Else error */ 646 } 647 } 648 649 /* If special is '+' we have a 'set' and possibly an 'unset' text, 650 both of which are reprocessed when used. If special is '-' we have a 651 default text for when the group is unset; it must be reprocessed. */ 652 653 if (special != 0) 654 { 655 if (special == CHAR_MINUS) 656 { 657 if (rc == 0) goto LITERAL_SUBSTITUTE; 658 text2_start = text1_start; 659 text2_end = text1_end; 660 } 661 662 if (ptrstackptr >= PTR_STACK_SIZE) goto BAD; 663 ptrstack[ptrstackptr++] = ptr; 664 ptrstack[ptrstackptr++] = repend; 665 666 if (rc == 0) 667 { 668 ptr = text1_start; 669 repend = text1_end; 670 } 671 else 672 { 673 ptr = text2_start; 674 repend = text2_end; 675 } 676 continue; 677 } 678 679 /* Otherwise we have a literal substitution of a group's contents. */ 680 681 LITERAL_SUBSTITUTE: 682 subptr = subject + ovector[group*2]; 683 subptrend = subject + ovector[group*2 + 1]; 684 685 /* Substitute a literal string, possibly forcing alphabetic case. */ 686 687 while (subptr < subptrend) 688 { 689 GETCHARINCTEST(ch, subptr); 690 if (forcecase != 0) 691 { 692 #ifdef SUPPORT_UNICODE 693 if (utf) 694 { 695 uint32_t type = UCD_CHARTYPE(ch); 696 if (PRIV(ucp_gentype)[type] == ucp_L && 697 type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) 698 ch = UCD_OTHERCASE(ch); 699 } 700 else 701 #endif 702 { 703 if (((code->tables + cbits_offset + 704 ((forcecase > 0)? cbit_upper:cbit_lower) 705 )[ch/8] & (1 << (ch%8))) == 0) 706 ch = (code->tables + fcc_offset)[ch]; 707 } 708 forcecase = forcecasereset; 709 } 710 711 #ifdef SUPPORT_UNICODE 712 if (utf) chlen = PRIV(ord2utf)(ch, temp); else 713 #endif 714 { 715 temp[0] = ch; 716 chlen = 1; 717 } 718 CHECKMEMCPY(temp, chlen); 719 } 720 } 721 } 722 723 /* Handle an escape sequence in extended mode. We can use check_escape() 724 to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but 725 the case-forcing escapes are not supported in pcre2_compile() so must be 726 recognized here. */ 727 728 else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && 729 *ptr == CHAR_BACKSLASH) 730 { 731 int errorcode; 732 733 if (ptr < repend - 1) switch (ptr[1]) 734 { 735 case CHAR_L: 736 forcecase = forcecasereset = -1; 737 ptr += 2; 738 continue; 739 740 case CHAR_l: 741 forcecase = -1; 742 forcecasereset = 0; 743 ptr += 2; 744 continue; 745 746 case CHAR_U: 747 forcecase = forcecasereset = 1; 748 ptr += 2; 749 continue; 750 751 case CHAR_u: 752 forcecase = 1; 753 forcecasereset = 0; 754 ptr += 2; 755 continue; 756 757 default: 758 break; 759 } 760 761 ptr++; /* Point after \ */ 762 rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, 763 code->overall_options, FALSE, NULL); 764 if (errorcode != 0) goto BADESCAPE; 765 766 switch(rc) 767 { 768 case ESC_E: 769 forcecase = forcecasereset = 0; 770 continue; 771 772 case ESC_Q: 773 literal = TRUE; 774 continue; 775 776 case 0: /* Data character */ 777 goto LITERAL; 778 779 default: 780 goto BADESCAPE; 781 } 782 } 783 784 /* Handle a literal code unit */ 785 786 else 787 { 788 LOADLITERAL: 789 GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ 790 791 LITERAL: 792 if (forcecase != 0) 793 { 794 #ifdef SUPPORT_UNICODE 795 if (utf) 796 { 797 uint32_t type = UCD_CHARTYPE(ch); 798 if (PRIV(ucp_gentype)[type] == ucp_L && 799 type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) 800 ch = UCD_OTHERCASE(ch); 801 } 802 else 803 #endif 804 { 805 if (((code->tables + cbits_offset + 806 ((forcecase > 0)? cbit_upper:cbit_lower) 807 )[ch/8] & (1 << (ch%8))) == 0) 808 ch = (code->tables + fcc_offset)[ch]; 809 } 810 forcecase = forcecasereset; 811 } 812 813 #ifdef SUPPORT_UNICODE 814 if (utf) chlen = PRIV(ord2utf)(ch, temp); else 815 #endif 816 { 817 temp[0] = ch; 818 chlen = 1; 819 } 820 CHECKMEMCPY(temp, chlen); 821 } /* End handling a literal code unit */ 822 } /* End of loop for scanning the replacement. */ 823 824 /* The replacement has been copied to the output. Save the details of this 825 match. See above for how this data is used. If we matched an empty string, do 826 the magic for global matches. Finally, update the start offset to point to 827 the rest of the subject string. */ 828 829 ovecsave[0] = ovector[0]; 830 ovecsave[1] = ovector[1]; 831 ovecsave[2] = start_offset; 832 833 goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 : 834 PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; 835 start_offset = ovector[1]; 836 } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ 837 838 /* Copy the rest of the subject. */ 839 840 fraglength = length - start_offset; 841 CHECKMEMCPY(subject + start_offset, fraglength); 842 temp[0] = 0; 843 CHECKMEMCPY(temp , 1); 844 845 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set, 846 and matching has carried on after a full buffer, in order to compute the length 847 needed. Otherwise, an overflow generates an immediate error return. */ 848 849 if (overflowed) 850 { 851 rc = PCRE2_ERROR_NOMEMORY; 852 *blength = buff_length + extra_needed; 853 } 854 855 /* After a successful execution, return the number of substitutions and set the 856 length of buffer used, excluding the trailing zero. */ 857 858 else 859 { 860 rc = subs; 861 *blength = buff_offset - 1; 862 } 863 864 EXIT: 865 if (match_data_created) pcre2_match_data_free(match_data); 866 else match_data->rc = rc; 867 return rc; 868 869 NOROOM: 870 rc = PCRE2_ERROR_NOMEMORY; 871 goto EXIT; 872 873 BAD: 874 rc = PCRE2_ERROR_BADREPLACEMENT; 875 goto PTREXIT; 876 877 BADESCAPE: 878 rc = PCRE2_ERROR_BADREPESCAPE; 879 880 PTREXIT: 881 *blength = (PCRE2_SIZE)(ptr - replacement); 882 goto EXIT; 883 } 884 885 /* End of pcre2_substitute.c */ 886