1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 42 #ifdef HAVE_CONFIG_H 43 #include "config.h" 44 #endif 45 46 #include "pcre2_internal.h" 47 48 #define PTR_STACK_SIZE 20 49 50 #define SUBSTITUTE_OPTIONS \ 51 (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \ 52 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \ 53 PCRE2_SUBSTITUTE_UNSET_EMPTY) 54 55 56 57 /************************************************* 58 * Find end of substitute text * 59 *************************************************/ 60 61 /* In extended mode, we recognize ${name:+set text:unset text} and similar 62 constructions. This requires the identification of unescaped : and } 63 characters. This function scans for such. It must deal with nested ${ 64 constructions. The pointer to the text is updated, either to the required end 65 character, or to where an error was detected. 66 67 Arguments: 68 code points to the compiled expression (for options) 69 ptrptr points to the pointer to the start of the text (updated) 70 ptrend end of the whole string 71 last TRUE if the last expected string (only } recognized) 72 73 Returns: 0 on success 74 negative error code on failure 75 */ 76 77 static int 78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, 79 BOOL last) 80 { 81 int rc = 0; 82 uint32_t nestlevel = 0; 83 BOOL literal = FALSE; 84 PCRE2_SPTR ptr = *ptrptr; 85 86 for (; ptr < ptrend; ptr++) 87 { 88 if (literal) 89 { 90 if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E) 91 { 92 literal = FALSE; 93 ptr += 1; 94 } 95 } 96 97 else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 98 { 99 if (nestlevel == 0) goto EXIT; 100 nestlevel--; 101 } 102 103 else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT; 104 105 else if (*ptr == CHAR_DOLLAR_SIGN) 106 { 107 if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET) 108 { 109 nestlevel++; 110 ptr += 1; 111 } 112 } 113 114 else if (*ptr == CHAR_BACKSLASH) 115 { 116 int erc; 117 int errorcode = 0; 118 uint32_t ch; 119 120 if (ptr < ptrend - 1) switch (ptr[1]) 121 { 122 case CHAR_L: 123 case CHAR_l: 124 case CHAR_U: 125 case CHAR_u: 126 ptr += 1; 127 continue; 128 } 129 130 erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, 131 code->overall_options, FALSE, NULL); 132 if (errorcode != 0) 133 { 134 rc = errorcode; 135 goto EXIT; 136 } 137 138 switch(erc) 139 { 140 case 0: /* Data character */ 141 case ESC_E: /* Isolated \E is ignored */ 142 break; 143 144 case ESC_Q: 145 literal = TRUE; 146 break; 147 148 default: 149 rc = PCRE2_ERROR_BADREPESCAPE; 150 goto EXIT; 151 } 152 } 153 } 154 155 rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */ 156 157 EXIT: 158 *ptrptr = ptr; 159 return rc; 160 } 161 162 163 164 /************************************************* 165 * Match and substitute * 166 *************************************************/ 167 168 /* This function applies a compiled re to a subject string and creates a new 169 string with substitutions. The first 7 arguments are the same as for 170 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED. 171 172 Arguments: 173 code points to the compiled expression 174 subject points to the subject string 175 length length of subject string (may contain binary zeros) 176 start_offset where to start in the subject string 177 options option bits 178 match_data points to a match_data block, or is NULL 179 context points a PCRE2 context 180 replacement points to the replacement string 181 rlength length of replacement string 182 buffer where to put the substituted string 183 blength points to length of buffer; updated to length of string 184 185 Returns: >= 0 number of substitutions made 186 < 0 an error code 187 PCRE2_ERROR_BADREPLACEMENT means invalid use of $ 188 */ 189 190 /* This macro checks for space in the buffer before copying into it. On 191 overflow, either give an error immediately, or keep on, accumulating the 192 length. */ 193 194 #define CHECKMEMCPY(from,length) \ 195 if (!overflowed && lengthleft < length) \ 196 { \ 197 if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \ 198 overflowed = TRUE; \ 199 extra_needed = length - lengthleft; \ 200 } \ 201 else if (overflowed) \ 202 { \ 203 extra_needed += length; \ 204 } \ 205 else \ 206 { \ 207 memcpy(buffer + buff_offset, from, CU2BYTES(length)); \ 208 buff_offset += length; \ 209 lengthleft -= length; \ 210 } 211 212 /* Here's the function */ 213 214 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION 215 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, 216 PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, 217 pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength, 218 PCRE2_UCHAR *buffer, PCRE2_SIZE *blength) 219 { 220 int rc; 221 int subs; 222 int forcecase = 0; 223 int forcecasereset = 0; 224 uint32_t ovector_count; 225 uint32_t goptions = 0; 226 uint32_t suboptions; 227 BOOL match_data_created = FALSE; 228 BOOL literal = FALSE; 229 BOOL overflowed = FALSE; 230 #ifdef SUPPORT_UNICODE 231 BOOL utf = (code->overall_options & PCRE2_UTF) != 0; 232 #endif 233 PCRE2_UCHAR temp[6]; 234 PCRE2_SPTR ptr; 235 PCRE2_SPTR repend; 236 PCRE2_SIZE extra_needed = 0; 237 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; 238 PCRE2_SIZE *ovector; 239 240 buff_offset = 0; 241 lengthleft = buff_length = *blength; 242 *blength = PCRE2_UNSET; 243 244 /* Partial matching is not valid. */ 245 246 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) 247 return PCRE2_ERROR_BADOPTION; 248 249 /* If no match data block is provided, create one. */ 250 251 if (match_data == NULL) 252 { 253 pcre2_general_context *gcontext = (mcontext == NULL)? 254 (pcre2_general_context *)code : 255 (pcre2_general_context *)mcontext; 256 match_data = pcre2_match_data_create_from_pattern(code, gcontext); 257 if (match_data == NULL) return PCRE2_ERROR_NOMEMORY; 258 match_data_created = TRUE; 259 } 260 ovector = pcre2_get_ovector_pointer(match_data); 261 ovector_count = pcre2_get_ovector_count(match_data); 262 263 /* Find lengths of zero-terminated strings and the end of the replacement. */ 264 265 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); 266 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); 267 repend = replacement + rlength; 268 269 /* Check UTF replacement string if necessary. */ 270 271 #ifdef SUPPORT_UNICODE 272 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) 273 { 274 rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar)); 275 if (rc != 0) 276 { 277 match_data->leftchar = 0; 278 goto EXIT; 279 } 280 } 281 #endif /* SUPPORT_UNICODE */ 282 283 /* Save the substitute options and remove them from the match options. */ 284 285 suboptions = options & SUBSTITUTE_OPTIONS; 286 options &= ~SUBSTITUTE_OPTIONS; 287 288 /* Copy up to the start offset */ 289 290 CHECKMEMCPY(subject, start_offset); 291 292 /* Loop for global substituting. */ 293 294 subs = 0; 295 do 296 { 297 PCRE2_SPTR ptrstack[PTR_STACK_SIZE]; 298 uint32_t ptrstackptr = 0; 299 300 rc = pcre2_match(code, subject, length, start_offset, options|goptions, 301 match_data, mcontext); 302 303 #ifdef SUPPORT_UNICODE 304 if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */ 305 #endif 306 307 /* Any error other than no match returns the error code. No match when not 308 doing the special after-empty-match global rematch, or when at the end of the 309 subject, breaks the global loop. Otherwise, advance the starting point by one 310 character, copying it to the output, and try again. */ 311 312 if (rc < 0) 313 { 314 PCRE2_SIZE save_start; 315 316 if (rc != PCRE2_ERROR_NOMATCH) goto EXIT; 317 if (goptions == 0 || start_offset >= length) break; 318 319 /* Advance by one code point. Then, if CRLF is a valid newline sequence and 320 we have advanced into the middle of it, advance one more code point. In 321 other words, do not start in the middle of CRLF, even if CR and LF on their 322 own are valid newlines. */ 323 324 save_start = start_offset++; 325 if (subject[start_offset-1] == CHAR_CR && 326 code->newline_convention != PCRE2_NEWLINE_CR && 327 code->newline_convention != PCRE2_NEWLINE_LF && 328 start_offset < length && 329 subject[start_offset] == CHAR_LF) 330 start_offset++; 331 332 /* Otherwise, in UTF mode, advance past any secondary code points. */ 333 334 else if ((code->overall_options & PCRE2_UTF) != 0) 335 { 336 #if PCRE2_CODE_UNIT_WIDTH == 8 337 while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80) 338 start_offset++; 339 #elif PCRE2_CODE_UNIT_WIDTH == 16 340 while (start_offset < length && 341 (subject[start_offset] & 0xfc00) == 0xdc00) 342 start_offset++; 343 #endif 344 } 345 346 /* Copy what we have advanced past, reset the special global options, and 347 continue to the next match. */ 348 349 fraglength = start_offset - save_start; 350 CHECKMEMCPY(subject + save_start, fraglength); 351 goptions = 0; 352 continue; 353 } 354 355 /* Handle a successful match. Matches that use \K to end before they start 356 are not supported. */ 357 358 if (ovector[1] < ovector[0]) 359 { 360 rc = PCRE2_ERROR_BADSUBSPATTERN; 361 goto EXIT; 362 } 363 364 /* Count substitutions with a paranoid check for integer overflow; surely no 365 real call to this function would ever hit this! */ 366 367 if (subs == INT_MAX) 368 { 369 rc = PCRE2_ERROR_TOOMANYREPLACE; 370 goto EXIT; 371 } 372 subs++; 373 374 /* Copy the text leading up to the match. */ 375 376 if (rc == 0) rc = ovector_count; 377 fraglength = ovector[0] - start_offset; 378 CHECKMEMCPY(subject + start_offset, fraglength); 379 380 /* Process the replacement string. Literal mode is set by \Q, but only in 381 extended mode when backslashes are being interpreted. In extended mode we 382 must handle nested substrings that are to be reprocessed. */ 383 384 ptr = replacement; 385 for (;;) 386 { 387 uint32_t ch; 388 unsigned int chlen; 389 390 /* If at the end of a nested substring, pop the stack. */ 391 392 if (ptr >= repend) 393 { 394 if (ptrstackptr <= 0) break; /* End of replacement string */ 395 repend = ptrstack[--ptrstackptr]; 396 ptr = ptrstack[--ptrstackptr]; 397 continue; 398 } 399 400 /* Handle the next character */ 401 402 if (literal) 403 { 404 if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E) 405 { 406 literal = FALSE; 407 ptr += 2; 408 continue; 409 } 410 goto LOADLITERAL; 411 } 412 413 /* Not in literal mode. */ 414 415 if (*ptr == CHAR_DOLLAR_SIGN) 416 { 417 int group, n; 418 uint32_t special = 0; 419 BOOL inparens; 420 BOOL star; 421 PCRE2_SIZE sublength; 422 PCRE2_SPTR text1_start = NULL; 423 PCRE2_SPTR text1_end = NULL; 424 PCRE2_SPTR text2_start = NULL; 425 PCRE2_SPTR text2_end = NULL; 426 PCRE2_UCHAR next; 427 PCRE2_UCHAR name[33]; 428 429 if (++ptr >= repend) goto BAD; 430 if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL; 431 432 group = -1; 433 n = 0; 434 inparens = FALSE; 435 star = FALSE; 436 437 if (next == CHAR_LEFT_CURLY_BRACKET) 438 { 439 if (++ptr >= repend) goto BAD; 440 next = *ptr; 441 inparens = TRUE; 442 } 443 444 if (next == CHAR_ASTERISK) 445 { 446 if (++ptr >= repend) goto BAD; 447 next = *ptr; 448 star = TRUE; 449 } 450 451 if (!star && next >= CHAR_0 && next <= CHAR_9) 452 { 453 group = next - CHAR_0; 454 while (++ptr < repend) 455 { 456 next = *ptr; 457 if (next < CHAR_0 || next > CHAR_9) break; 458 group = group * 10 + next - CHAR_0; 459 460 /* A check for a number greater than the hightest captured group 461 is sufficient here; no need for a separate overflow check. If unknown 462 groups are to be treated as unset, just skip over any remaining 463 digits and carry on. */ 464 465 if (group > code->top_bracket) 466 { 467 if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 468 { 469 while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9); 470 break; 471 } 472 else 473 { 474 rc = PCRE2_ERROR_NOSUBSTRING; 475 goto PTREXIT; 476 } 477 } 478 } 479 } 480 else 481 { 482 const uint8_t *ctypes = code->tables + ctypes_offset; 483 while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) 484 { 485 name[n++] = next; 486 if (n > 32) goto BAD; 487 if (++ptr >= repend) break; 488 next = *ptr; 489 } 490 if (n == 0) goto BAD; 491 name[n] = 0; 492 } 493 494 /* In extended mode we recognize ${name:+set text:unset text} and 495 ${name:-default text}. */ 496 497 if (inparens) 498 { 499 if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && 500 !star && ptr < repend - 2 && next == CHAR_COLON) 501 { 502 special = *(++ptr); 503 if (special != CHAR_PLUS && special != CHAR_MINUS) 504 { 505 rc = PCRE2_ERROR_BADSUBSTITUTION; 506 goto PTREXIT; 507 } 508 509 text1_start = ++ptr; 510 rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS); 511 if (rc != 0) goto PTREXIT; 512 text1_end = ptr; 513 514 if (special == CHAR_PLUS && *ptr == CHAR_COLON) 515 { 516 text2_start = ++ptr; 517 rc = find_text_end(code, &ptr, repend, TRUE); 518 if (rc != 0) goto PTREXIT; 519 text2_end = ptr; 520 } 521 } 522 523 else 524 { 525 if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET) 526 { 527 rc = PCRE2_ERROR_REPMISSINGBRACE; 528 goto PTREXIT; 529 } 530 } 531 532 ptr++; 533 } 534 535 /* Have found a syntactically correct group number or name, or *name. 536 Only *MARK is currently recognized. */ 537 538 if (star) 539 { 540 if (PRIV(strcmp_c8)(name, STRING_MARK) == 0) 541 { 542 PCRE2_SPTR mark = pcre2_get_mark(match_data); 543 if (mark != NULL) 544 { 545 PCRE2_SPTR mark_start = mark; 546 while (*mark != 0) mark++; 547 fraglength = mark - mark_start; 548 CHECKMEMCPY(mark_start, fraglength); 549 } 550 } 551 else goto BAD; 552 } 553 554 /* Substitute the contents of a group. We don't use substring_copy 555 functions any more, in order to support case forcing. */ 556 557 else 558 { 559 PCRE2_SPTR subptr, subptrend; 560 561 /* Find a number for a named group. In case there are duplicate names, 562 search for the first one that is set. If the name is not found when 563 PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a 564 non-existent group. */ 565 566 if (group < 0) 567 { 568 PCRE2_SPTR first, last, entry; 569 rc = pcre2_substring_nametable_scan(code, name, &first, &last); 570 if (rc == PCRE2_ERROR_NOSUBSTRING && 571 (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 572 { 573 group = code->top_bracket + 1; 574 } 575 else 576 { 577 if (rc < 0) goto PTREXIT; 578 for (entry = first; entry <= last; entry += rc) 579 { 580 uint32_t ng = GET2(entry, 0); 581 if (ng < ovector_count) 582 { 583 if (group < 0) group = ng; /* First in ovector */ 584 if (ovector[ng*2] != PCRE2_UNSET) 585 { 586 group = ng; /* First that is set */ 587 break; 588 } 589 } 590 } 591 592 /* If group is still negative, it means we did not find a group 593 that is in the ovector. Just set the first group. */ 594 595 if (group < 0) group = GET2(first, 0); 596 } 597 } 598 599 /* We now have a group that is identified by number. Find the length of 600 the captured string. If a group in a non-special substitution is unset 601 when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */ 602 603 rc = pcre2_substring_length_bynumber(match_data, group, &sublength); 604 if (rc < 0) 605 { 606 if (rc == PCRE2_ERROR_NOSUBSTRING && 607 (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0) 608 { 609 rc = PCRE2_ERROR_UNSET; 610 } 611 if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */ 612 if (special == 0) /* Plain substitution */ 613 { 614 if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue; 615 goto PTREXIT; /* Else error */ 616 } 617 } 618 619 /* If special is '+' we have a 'set' and possibly an 'unset' text, 620 both of which are reprocessed when used. If special is '-' we have a 621 default text for when the group is unset; it must be reprocessed. */ 622 623 if (special != 0) 624 { 625 if (special == CHAR_MINUS) 626 { 627 if (rc == 0) goto LITERAL_SUBSTITUTE; 628 text2_start = text1_start; 629 text2_end = text1_end; 630 } 631 632 if (ptrstackptr >= PTR_STACK_SIZE) goto BAD; 633 ptrstack[ptrstackptr++] = ptr; 634 ptrstack[ptrstackptr++] = repend; 635 636 if (rc == 0) 637 { 638 ptr = text1_start; 639 repend = text1_end; 640 } 641 else 642 { 643 ptr = text2_start; 644 repend = text2_end; 645 } 646 continue; 647 } 648 649 /* Otherwise we have a literal substitution of a group's contents. */ 650 651 LITERAL_SUBSTITUTE: 652 subptr = subject + ovector[group*2]; 653 subptrend = subject + ovector[group*2 + 1]; 654 655 /* Substitute a literal string, possibly forcing alphabetic case. */ 656 657 while (subptr < subptrend) 658 { 659 GETCHARINCTEST(ch, subptr); 660 if (forcecase != 0) 661 { 662 #ifdef SUPPORT_UNICODE 663 if (utf) 664 { 665 uint32_t type = UCD_CHARTYPE(ch); 666 if (PRIV(ucp_gentype)[type] == ucp_L && 667 type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) 668 ch = UCD_OTHERCASE(ch); 669 } 670 else 671 #endif 672 { 673 if (((code->tables + cbits_offset + 674 ((forcecase > 0)? cbit_upper:cbit_lower) 675 )[ch/8] & (1 << (ch%8))) == 0) 676 ch = (code->tables + fcc_offset)[ch]; 677 } 678 forcecase = forcecasereset; 679 } 680 681 #ifdef SUPPORT_UNICODE 682 if (utf) chlen = PRIV(ord2utf)(ch, temp); else 683 #endif 684 { 685 temp[0] = ch; 686 chlen = 1; 687 } 688 CHECKMEMCPY(temp, chlen); 689 } 690 } 691 } 692 693 /* Handle an escape sequence in extended mode. We can use check_escape() 694 to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but 695 the case-forcing escapes are not supported in pcre2_compile() so must be 696 recognized here. */ 697 698 else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 && 699 *ptr == CHAR_BACKSLASH) 700 { 701 int errorcode = 0; 702 703 if (ptr < repend - 1) switch (ptr[1]) 704 { 705 case CHAR_L: 706 forcecase = forcecasereset = -1; 707 ptr += 2; 708 continue; 709 710 case CHAR_l: 711 forcecase = -1; 712 forcecasereset = 0; 713 ptr += 2; 714 continue; 715 716 case CHAR_U: 717 forcecase = forcecasereset = 1; 718 ptr += 2; 719 continue; 720 721 case CHAR_u: 722 forcecase = 1; 723 forcecasereset = 0; 724 ptr += 2; 725 continue; 726 727 default: 728 break; 729 } 730 731 rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, 732 code->overall_options, FALSE, NULL); 733 if (errorcode != 0) goto BADESCAPE; 734 ptr++; 735 736 switch(rc) 737 { 738 case ESC_E: 739 forcecase = forcecasereset = 0; 740 continue; 741 742 case ESC_Q: 743 literal = TRUE; 744 continue; 745 746 case 0: /* Data character */ 747 goto LITERAL; 748 749 default: 750 goto BADESCAPE; 751 } 752 } 753 754 /* Handle a literal code unit */ 755 756 else 757 { 758 LOADLITERAL: 759 GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */ 760 761 LITERAL: 762 if (forcecase != 0) 763 { 764 #ifdef SUPPORT_UNICODE 765 if (utf) 766 { 767 uint32_t type = UCD_CHARTYPE(ch); 768 if (PRIV(ucp_gentype)[type] == ucp_L && 769 type != ((forcecase > 0)? ucp_Lu : ucp_Ll)) 770 ch = UCD_OTHERCASE(ch); 771 } 772 else 773 #endif 774 { 775 if (((code->tables + cbits_offset + 776 ((forcecase > 0)? cbit_upper:cbit_lower) 777 )[ch/8] & (1 << (ch%8))) == 0) 778 ch = (code->tables + fcc_offset)[ch]; 779 } 780 forcecase = forcecasereset; 781 } 782 783 #ifdef SUPPORT_UNICODE 784 if (utf) chlen = PRIV(ord2utf)(ch, temp); else 785 #endif 786 { 787 temp[0] = ch; 788 chlen = 1; 789 } 790 CHECKMEMCPY(temp, chlen); 791 } /* End handling a literal code unit */ 792 } /* End of loop for scanning the replacement. */ 793 794 /* The replacement has been copied to the output. Update the start offset to 795 point to the rest of the subject string. If we matched an empty string, 796 do the magic for global matches. */ 797 798 start_offset = ovector[1]; 799 goptions = (ovector[0] != ovector[1])? 0 : 800 PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; 801 } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ 802 803 /* Copy the rest of the subject. */ 804 805 fraglength = length - start_offset; 806 CHECKMEMCPY(subject + start_offset, fraglength); 807 temp[0] = 0; 808 CHECKMEMCPY(temp , 1); 809 810 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set, 811 and matching has carried on after a full buffer, in order to compute the length 812 needed. Otherwise, an overflow generates an immediate error return. */ 813 814 if (overflowed) 815 { 816 rc = PCRE2_ERROR_NOMEMORY; 817 *blength = buff_length + extra_needed; 818 } 819 820 /* After a successful execution, return the number of substitutions and set the 821 length of buffer used, excluding the trailing zero. */ 822 823 else 824 { 825 rc = subs; 826 *blength = buff_offset - 1; 827 } 828 829 EXIT: 830 if (match_data_created) pcre2_match_data_free(match_data); 831 else match_data->rc = rc; 832 return rc; 833 834 NOROOM: 835 rc = PCRE2_ERROR_NOMEMORY; 836 goto EXIT; 837 838 BAD: 839 rc = PCRE2_ERROR_BADREPLACEMENT; 840 goto PTREXIT; 841 842 BADESCAPE: 843 rc = PCRE2_ERROR_BADREPESCAPE; 844 845 PTREXIT: 846 *blength = (PCRE2_SIZE)(ptr - replacement); 847 goto EXIT; 848 } 849 850 /* End of pcre2_substitute.c */ 851