1 /************************************************* 2 * Perl-Compatible Regular Expressions * 3 *************************************************/ 4 5 /* PCRE is a library of functions to support regular expressions whose syntax 6 and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016-2018 University of Cambridge 11 12 ----------------------------------------------------------------------------- 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 POSSIBILITY OF SUCH DAMAGE. 38 ----------------------------------------------------------------------------- 39 */ 40 41 /* This module contains functions that scan a compiled pattern and change 42 repeats into possessive repeats where possible. */ 43 44 45 #ifdef HAVE_CONFIG_H 46 #include "config.h" 47 #endif 48 49 50 #include "pcre2_internal.h" 51 52 53 /************************************************* 54 * Tables for auto-possessification * 55 *************************************************/ 56 57 /* This table is used to check whether auto-possessification is possible 58 between adjacent character-type opcodes. The left-hand (repeated) opcode is 59 used to select the row, and the right-hand opcode is use to select the column. 60 A value of 1 means that auto-possessification is OK. For example, the second 61 value in the first row means that \D+\d can be turned into \D++\d. 62 63 The Unicode property types (\P and \p) have to be present to fill out the table 64 because of what their opcode values are, but the table values should always be 65 zero because property types are handled separately in the code. The last four 66 columns apply to items that cannot be repeated, so there is no need to have 67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is 68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ 69 70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) 71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1) 72 73 static const uint8_t autoposstab[APTROWS][APTCOLS] = { 74 /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */ 75 { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */ 76 { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */ 77 { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */ 78 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */ 79 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */ 80 { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */ 81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */ 82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */ 83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */ 84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */ 85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */ 86 { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */ 87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */ 88 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */ 89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */ 90 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */ 91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */ 92 }; 93 94 #ifdef SUPPORT_UNICODE 95 /* This table is used to check whether auto-possessification is possible 96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The 97 left-hand (repeated) opcode is used to select the row, and the right-hand 98 opcode is used to select the column. The values are as follows: 99 100 0 Always return FALSE (never auto-possessify) 101 1 Character groups are distinct (possessify if both are OP_PROP) 102 2 Check character categories in the same group (general or particular) 103 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP) 104 105 4 Check left general category vs right particular category 106 5 Check right general category vs left particular category 107 108 6 Left alphanum vs right general category 109 7 Left space vs right general category 110 8 Left word vs right general category 111 112 9 Right alphanum vs left general category 113 10 Right space vs left general category 114 11 Right word vs left general category 115 116 12 Left alphanum vs right particular category 117 13 Left space vs right particular category 118 14 Left word vs right particular category 119 120 15 Right alphanum vs left particular category 121 16 Right space vs left particular category 122 17 Right word vs left particular category 123 */ 124 125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { 126 /* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */ 127 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ 128 { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */ 129 { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */ 130 { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */ 131 { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ 132 { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */ 133 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */ 134 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */ 135 { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */ 136 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ 137 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */ 138 }; 139 140 /* This table is used to check whether auto-possessification is possible 141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one 142 specifies a general category and the other specifies a particular category. The 143 row is selected by the general category and the column by the particular 144 category. The value is 1 if the particular category is not part of the general 145 category. */ 146 147 static const uint8_t catposstab[7][30] = { 148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */ 149 { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */ 150 { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */ 151 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */ 152 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ 153 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */ 154 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */ 155 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */ 156 }; 157 158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against 159 a general or particular category. The properties in each row are those 160 that apply to the character set in question. Duplication means that a little 161 unnecessary work is done when checking, but this keeps things much simpler 162 because they can all use the same code. For more details see the comment where 163 this table is used. 164 165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from 166 "space", but from Perl 5.18 it's included, so both categories are treated the 167 same here. */ 168 169 static const uint8_t posspropstab[3][4] = { 170 { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */ 171 { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */ 172 { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */ 173 }; 174 #endif /* SUPPORT_UNICODE */ 175 176 177 178 #ifdef SUPPORT_UNICODE 179 /************************************************* 180 * Check a character and a property * 181 *************************************************/ 182 183 /* This function is called by compare_opcodes() when a property item is 184 adjacent to a fixed character. 185 186 Arguments: 187 c the character 188 ptype the property type 189 pdata the data for the type 190 negated TRUE if it's a negated property (\P or \p{^) 191 192 Returns: TRUE if auto-possessifying is OK 193 */ 194 195 static BOOL 196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, 197 BOOL negated) 198 { 199 const uint32_t *p; 200 const ucd_record *prop = GET_UCD(c); 201 202 switch(ptype) 203 { 204 case PT_LAMP: 205 return (prop->chartype == ucp_Lu || 206 prop->chartype == ucp_Ll || 207 prop->chartype == ucp_Lt) == negated; 208 209 case PT_GC: 210 return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; 211 212 case PT_PC: 213 return (pdata == prop->chartype) == negated; 214 215 case PT_SC: 216 return (pdata == prop->script) == negated; 217 218 /* These are specials */ 219 220 case PT_ALNUM: 221 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || 222 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; 223 224 /* Perl space used to exclude VT, but from Perl 5.18 it is included, which 225 means that Perl space and POSIX space are now identical. PCRE was changed 226 at release 8.34. */ 227 228 case PT_SPACE: /* Perl space */ 229 case PT_PXSPACE: /* POSIX space */ 230 switch(c) 231 { 232 HSPACE_CASES: 233 VSPACE_CASES: 234 return negated; 235 236 default: 237 return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; 238 } 239 break; /* Control never reaches here */ 240 241 case PT_WORD: 242 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || 243 PRIV(ucp_gentype)[prop->chartype] == ucp_N || 244 c == CHAR_UNDERSCORE) == negated; 245 246 case PT_CLIST: 247 p = PRIV(ucd_caseless_sets) + prop->caseset; 248 for (;;) 249 { 250 if (c < *p) return !negated; 251 if (c == *p++) return negated; 252 } 253 break; /* Control never reaches here */ 254 } 255 256 return FALSE; 257 } 258 #endif /* SUPPORT_UNICODE */ 259 260 261 262 /************************************************* 263 * Base opcode of repeated opcodes * 264 *************************************************/ 265 266 /* Returns the base opcode for repeated single character type opcodes. If the 267 opcode is not a repeated character type, it returns with the original value. 268 269 Arguments: c opcode 270 Returns: base opcode for the type 271 */ 272 273 static PCRE2_UCHAR 274 get_repeat_base(PCRE2_UCHAR c) 275 { 276 return (c > OP_TYPEPOSUPTO)? c : 277 (c >= OP_TYPESTAR)? OP_TYPESTAR : 278 (c >= OP_NOTSTARI)? OP_NOTSTARI : 279 (c >= OP_NOTSTAR)? OP_NOTSTAR : 280 (c >= OP_STARI)? OP_STARI : 281 OP_STAR; 282 } 283 284 285 /************************************************* 286 * Fill the character property list * 287 *************************************************/ 288 289 /* Checks whether the code points to an opcode that can take part in auto- 290 possessification, and if so, fills a list with its properties. 291 292 Arguments: 293 code points to start of expression 294 utf TRUE if in UTF mode 295 fcc points to the case-flipping table 296 list points to output list 297 list[0] will be filled with the opcode 298 list[1] will be non-zero if this opcode 299 can match an empty character string 300 list[2..7] depends on the opcode 301 302 Returns: points to the start of the next opcode if *code is accepted 303 NULL if *code is not accepted 304 */ 305 306 static PCRE2_SPTR 307 get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc, 308 uint32_t *list) 309 { 310 PCRE2_UCHAR c = *code; 311 PCRE2_UCHAR base; 312 PCRE2_SPTR end; 313 uint32_t chr; 314 315 #ifdef SUPPORT_UNICODE 316 uint32_t *clist_dest; 317 const uint32_t *clist_src; 318 #else 319 (void)utf; /* Suppress "unused parameter" compiler warning */ 320 #endif 321 322 list[0] = c; 323 list[1] = FALSE; 324 code++; 325 326 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) 327 { 328 base = get_repeat_base(c); 329 c -= (base - OP_STAR); 330 331 if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO) 332 code += IMM2_SIZE; 333 334 list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && 335 c != OP_POSPLUS); 336 337 switch(base) 338 { 339 case OP_STAR: 340 list[0] = OP_CHAR; 341 break; 342 343 case OP_STARI: 344 list[0] = OP_CHARI; 345 break; 346 347 case OP_NOTSTAR: 348 list[0] = OP_NOT; 349 break; 350 351 case OP_NOTSTARI: 352 list[0] = OP_NOTI; 353 break; 354 355 case OP_TYPESTAR: 356 list[0] = *code; 357 code++; 358 break; 359 } 360 c = list[0]; 361 } 362 363 switch(c) 364 { 365 case OP_NOT_DIGIT: 366 case OP_DIGIT: 367 case OP_NOT_WHITESPACE: 368 case OP_WHITESPACE: 369 case OP_NOT_WORDCHAR: 370 case OP_WORDCHAR: 371 case OP_ANY: 372 case OP_ALLANY: 373 case OP_ANYNL: 374 case OP_NOT_HSPACE: 375 case OP_HSPACE: 376 case OP_NOT_VSPACE: 377 case OP_VSPACE: 378 case OP_EXTUNI: 379 case OP_EODN: 380 case OP_EOD: 381 case OP_DOLL: 382 case OP_DOLLM: 383 return code; 384 385 case OP_CHAR: 386 case OP_NOT: 387 GETCHARINCTEST(chr, code); 388 list[2] = chr; 389 list[3] = NOTACHAR; 390 return code; 391 392 case OP_CHARI: 393 case OP_NOTI: 394 list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT; 395 GETCHARINCTEST(chr, code); 396 list[2] = chr; 397 398 #ifdef SUPPORT_UNICODE 399 if (chr < 128 || (chr < 256 && !utf)) 400 list[3] = fcc[chr]; 401 else 402 list[3] = UCD_OTHERCASE(chr); 403 #elif defined SUPPORT_WIDE_CHARS 404 list[3] = (chr < 256) ? fcc[chr] : chr; 405 #else 406 list[3] = fcc[chr]; 407 #endif 408 409 /* The othercase might be the same value. */ 410 411 if (chr == list[3]) 412 list[3] = NOTACHAR; 413 else 414 list[4] = NOTACHAR; 415 return code; 416 417 #ifdef SUPPORT_UNICODE 418 case OP_PROP: 419 case OP_NOTPROP: 420 if (code[0] != PT_CLIST) 421 { 422 list[2] = code[0]; 423 list[3] = code[1]; 424 return code + 2; 425 } 426 427 /* Convert only if we have enough space. */ 428 429 clist_src = PRIV(ucd_caseless_sets) + code[1]; 430 clist_dest = list + 2; 431 code += 2; 432 433 do { 434 if (clist_dest >= list + 8) 435 { 436 /* Early return if there is not enough space. This should never 437 happen, since all clists are shorter than 5 character now. */ 438 list[2] = code[0]; 439 list[3] = code[1]; 440 return code; 441 } 442 *clist_dest++ = *clist_src; 443 } 444 while(*clist_src++ != NOTACHAR); 445 446 /* All characters are stored. The terminating NOTACHAR is copied from the 447 clist itself. */ 448 449 list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT; 450 return code; 451 #endif 452 453 case OP_NCLASS: 454 case OP_CLASS: 455 #ifdef SUPPORT_WIDE_CHARS 456 case OP_XCLASS: 457 if (c == OP_XCLASS) 458 end = code + GET(code, 0) - 1; 459 else 460 #endif 461 end = code + 32 / sizeof(PCRE2_UCHAR); 462 463 switch(*end) 464 { 465 case OP_CRSTAR: 466 case OP_CRMINSTAR: 467 case OP_CRQUERY: 468 case OP_CRMINQUERY: 469 case OP_CRPOSSTAR: 470 case OP_CRPOSQUERY: 471 list[1] = TRUE; 472 end++; 473 break; 474 475 case OP_CRPLUS: 476 case OP_CRMINPLUS: 477 case OP_CRPOSPLUS: 478 end++; 479 break; 480 481 case OP_CRRANGE: 482 case OP_CRMINRANGE: 483 case OP_CRPOSRANGE: 484 list[1] = (GET2(end, 1) == 0); 485 end += 1 + 2 * IMM2_SIZE; 486 break; 487 } 488 list[2] = (uint32_t)(end - code); 489 return end; 490 } 491 return NULL; /* Opcode not accepted */ 492 } 493 494 495 496 /************************************************* 497 * Scan further character sets for match * 498 *************************************************/ 499 500 /* Checks whether the base and the current opcode have a common character, in 501 which case the base cannot be possessified. 502 503 Arguments: 504 code points to the byte code 505 utf TRUE in UTF mode 506 cb compile data block 507 base_list the data list of the base opcode 508 base_end the end of the base opcode 509 rec_limit points to recursion depth counter 510 511 Returns: TRUE if the auto-possessification is possible 512 */ 513 514 static BOOL 515 compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb, 516 const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit) 517 { 518 PCRE2_UCHAR c; 519 uint32_t list[8]; 520 const uint32_t *chr_ptr; 521 const uint32_t *ochr_ptr; 522 const uint32_t *list_ptr; 523 PCRE2_SPTR next_code; 524 #ifdef SUPPORT_WIDE_CHARS 525 PCRE2_SPTR xclass_flags; 526 #endif 527 const uint8_t *class_bitset; 528 const uint8_t *set1, *set2, *set_end; 529 uint32_t chr; 530 BOOL accepted, invert_bits; 531 BOOL entered_a_group = FALSE; 532 533 if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */ 534 535 /* Note: the base_list[1] contains whether the current opcode has a greedy 536 (represented by a non-zero value) quantifier. This is a different from 537 other character type lists, which store here that the character iterator 538 matches to an empty string (also represented by a non-zero value). */ 539 540 for(;;) 541 { 542 /* All operations move the code pointer forward. 543 Therefore infinite recursions are not possible. */ 544 545 c = *code; 546 547 /* Skip over callouts */ 548 549 if (c == OP_CALLOUT) 550 { 551 code += PRIV(OP_lengths)[c]; 552 continue; 553 } 554 555 if (c == OP_CALLOUT_STR) 556 { 557 code += GET(code, 1 + 2*LINK_SIZE); 558 continue; 559 } 560 561 /* At the end of a branch, skip to the end of the group. */ 562 563 if (c == OP_ALT) 564 { 565 do code += GET(code, 1); while (*code == OP_ALT); 566 c = *code; 567 } 568 569 /* Inspect the next opcode. */ 570 571 switch(c) 572 { 573 /* We can always possessify a greedy iterator at the end of the pattern, 574 which is reached after skipping over the final OP_KET. A non-greedy 575 iterator must never be possessified. */ 576 577 case OP_END: 578 return base_list[1] != 0; 579 580 /* When an iterator is at the end of certain kinds of group we can inspect 581 what follows the group by skipping over the closing ket. Note that this 582 does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given 583 iteration is variable (could be another iteration or could be the next 584 item). As these two opcodes are not listed in the next switch, they will 585 end up as the next code to inspect, and return FALSE by virtue of being 586 unsupported. */ 587 588 case OP_KET: 589 case OP_KETRPOS: 590 /* The non-greedy case cannot be converted to a possessive form. */ 591 592 if (base_list[1] == 0) return FALSE; 593 594 /* If the bracket is capturing it might be referenced by an OP_RECURSE 595 so its last iterator can never be possessified if the pattern contains 596 recursions. (This could be improved by keeping a list of group numbers that 597 are called by recursion.) */ 598 599 switch(*(code - GET(code, 1))) 600 { 601 case OP_CBRA: 602 case OP_SCBRA: 603 case OP_CBRAPOS: 604 case OP_SCBRAPOS: 605 if (cb->had_recurse) return FALSE; 606 break; 607 608 /* Atomic sub-patterns and assertions can always auto-possessify their 609 last iterator. However, if the group was entered as a result of checking 610 a previous iterator, this is not possible. */ 611 612 case OP_ASSERT: 613 case OP_ASSERT_NOT: 614 case OP_ASSERTBACK: 615 case OP_ASSERTBACK_NOT: 616 case OP_ONCE: 617 618 return !entered_a_group; 619 } 620 621 /* Skip over the bracket and inspect what comes next. */ 622 623 code += PRIV(OP_lengths)[c]; 624 continue; 625 626 /* Handle cases where the next item is a group. */ 627 628 case OP_ONCE: 629 case OP_BRA: 630 case OP_CBRA: 631 next_code = code + GET(code, 1); 632 code += PRIV(OP_lengths)[c]; 633 634 /* Check each branch. We have to recurse a level for all but the last 635 branch. */ 636 637 while (*next_code == OP_ALT) 638 { 639 if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit)) 640 return FALSE; 641 code = next_code + 1 + LINK_SIZE; 642 next_code += GET(next_code, 1); 643 } 644 645 entered_a_group = TRUE; 646 continue; 647 648 case OP_BRAZERO: 649 case OP_BRAMINZERO: 650 651 next_code = code + 1; 652 if (*next_code != OP_BRA && *next_code != OP_CBRA && 653 *next_code != OP_ONCE) return FALSE; 654 655 do next_code += GET(next_code, 1); while (*next_code == OP_ALT); 656 657 /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ 658 659 next_code += 1 + LINK_SIZE; 660 if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit)) 661 return FALSE; 662 663 code += PRIV(OP_lengths)[c]; 664 continue; 665 666 /* The next opcode does not need special handling; fall through and use it 667 to see if the base can be possessified. */ 668 669 default: 670 break; 671 } 672 673 /* We now have the next appropriate opcode to compare with the base. Check 674 for a supported opcode, and load its properties. */ 675 676 code = get_chr_property_list(code, utf, cb->fcc, list); 677 if (code == NULL) return FALSE; /* Unsupported */ 678 679 /* If either opcode is a small character list, set pointers for comparing 680 characters from that list with another list, or with a property. */ 681 682 if (base_list[0] == OP_CHAR) 683 { 684 chr_ptr = base_list + 2; 685 list_ptr = list; 686 } 687 else if (list[0] == OP_CHAR) 688 { 689 chr_ptr = list + 2; 690 list_ptr = base_list; 691 } 692 693 /* Character bitsets can also be compared to certain opcodes. */ 694 695 else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS 696 #if PCRE2_CODE_UNIT_WIDTH == 8 697 /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */ 698 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS)) 699 #endif 700 ) 701 { 702 #if PCRE2_CODE_UNIT_WIDTH == 8 703 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS)) 704 #else 705 if (base_list[0] == OP_CLASS) 706 #endif 707 { 708 set1 = (uint8_t *)(base_end - base_list[2]); 709 list_ptr = list; 710 } 711 else 712 { 713 set1 = (uint8_t *)(code - list[2]); 714 list_ptr = base_list; 715 } 716 717 invert_bits = FALSE; 718 switch(list_ptr[0]) 719 { 720 case OP_CLASS: 721 case OP_NCLASS: 722 set2 = (uint8_t *) 723 ((list_ptr == list ? code : base_end) - list_ptr[2]); 724 break; 725 726 #ifdef SUPPORT_WIDE_CHARS 727 case OP_XCLASS: 728 xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; 729 if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; 730 if ((*xclass_flags & XCL_MAP) == 0) 731 { 732 /* No bits are set for characters < 256. */ 733 if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0; 734 /* Might be an empty repeat. */ 735 continue; 736 } 737 set2 = (uint8_t *)(xclass_flags + 1); 738 break; 739 #endif 740 741 case OP_NOT_DIGIT: 742 invert_bits = TRUE; 743 /* Fall through */ 744 case OP_DIGIT: 745 set2 = (uint8_t *)(cb->cbits + cbit_digit); 746 break; 747 748 case OP_NOT_WHITESPACE: 749 invert_bits = TRUE; 750 /* Fall through */ 751 case OP_WHITESPACE: 752 set2 = (uint8_t *)(cb->cbits + cbit_space); 753 break; 754 755 case OP_NOT_WORDCHAR: 756 invert_bits = TRUE; 757 /* Fall through */ 758 case OP_WORDCHAR: 759 set2 = (uint8_t *)(cb->cbits + cbit_word); 760 break; 761 762 default: 763 return FALSE; 764 } 765 766 /* Because the bit sets are unaligned bytes, we need to perform byte 767 comparison here. */ 768 769 set_end = set1 + 32; 770 if (invert_bits) 771 { 772 do 773 { 774 if ((*set1++ & ~(*set2++)) != 0) return FALSE; 775 } 776 while (set1 < set_end); 777 } 778 else 779 { 780 do 781 { 782 if ((*set1++ & *set2++) != 0) return FALSE; 783 } 784 while (set1 < set_end); 785 } 786 787 if (list[1] == 0) return TRUE; 788 /* Might be an empty repeat. */ 789 continue; 790 } 791 792 /* Some property combinations also acceptable. Unicode property opcodes are 793 processed specially; the rest can be handled with a lookup table. */ 794 795 else 796 { 797 uint32_t leftop, rightop; 798 799 leftop = base_list[0]; 800 rightop = list[0]; 801 802 #ifdef SUPPORT_UNICODE 803 accepted = FALSE; /* Always set in non-unicode case. */ 804 if (leftop == OP_PROP || leftop == OP_NOTPROP) 805 { 806 if (rightop == OP_EOD) 807 accepted = TRUE; 808 else if (rightop == OP_PROP || rightop == OP_NOTPROP) 809 { 810 int n; 811 const uint8_t *p; 812 BOOL same = leftop == rightop; 813 BOOL lisprop = leftop == OP_PROP; 814 BOOL risprop = rightop == OP_PROP; 815 BOOL bothprop = lisprop && risprop; 816 817 /* There's a table that specifies how each combination is to be 818 processed: 819 0 Always return FALSE (never auto-possessify) 820 1 Character groups are distinct (possessify if both are OP_PROP) 821 2 Check character categories in the same group (general or particular) 822 3 Return TRUE if the two opcodes are not the same 823 ... see comments below 824 */ 825 826 n = propposstab[base_list[2]][list[2]]; 827 switch(n) 828 { 829 case 0: break; 830 case 1: accepted = bothprop; break; 831 case 2: accepted = (base_list[3] == list[3]) != same; break; 832 case 3: accepted = !same; break; 833 834 case 4: /* Left general category, right particular category */ 835 accepted = risprop && catposstab[base_list[3]][list[3]] == same; 836 break; 837 838 case 5: /* Right general category, left particular category */ 839 accepted = lisprop && catposstab[list[3]][base_list[3]] == same; 840 break; 841 842 /* This code is logically tricky. Think hard before fiddling with it. 843 The posspropstab table has four entries per row. Each row relates to 844 one of PCRE's special properties such as ALNUM or SPACE or WORD. 845 Only WORD actually needs all four entries, but using repeats for the 846 others means they can all use the same code below. 847 848 The first two entries in each row are Unicode general categories, and 849 apply always, because all the characters they include are part of the 850 PCRE character set. The third and fourth entries are a general and a 851 particular category, respectively, that include one or more relevant 852 characters. One or the other is used, depending on whether the check 853 is for a general or a particular category. However, in both cases the 854 category contains more characters than the specials that are defined 855 for the property being tested against. Therefore, it cannot be used 856 in a NOTPROP case. 857 858 Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po. 859 Underscore is covered by ucp_P or ucp_Po. */ 860 861 case 6: /* Left alphanum vs right general category */ 862 case 7: /* Left space vs right general category */ 863 case 8: /* Left word vs right general category */ 864 p = posspropstab[n-6]; 865 accepted = risprop && lisprop == 866 (list[3] != p[0] && 867 list[3] != p[1] && 868 (list[3] != p[2] || !lisprop)); 869 break; 870 871 case 9: /* Right alphanum vs left general category */ 872 case 10: /* Right space vs left general category */ 873 case 11: /* Right word vs left general category */ 874 p = posspropstab[n-9]; 875 accepted = lisprop && risprop == 876 (base_list[3] != p[0] && 877 base_list[3] != p[1] && 878 (base_list[3] != p[2] || !risprop)); 879 break; 880 881 case 12: /* Left alphanum vs right particular category */ 882 case 13: /* Left space vs right particular category */ 883 case 14: /* Left word vs right particular category */ 884 p = posspropstab[n-12]; 885 accepted = risprop && lisprop == 886 (catposstab[p[0]][list[3]] && 887 catposstab[p[1]][list[3]] && 888 (list[3] != p[3] || !lisprop)); 889 break; 890 891 case 15: /* Right alphanum vs left particular category */ 892 case 16: /* Right space vs left particular category */ 893 case 17: /* Right word vs left particular category */ 894 p = posspropstab[n-15]; 895 accepted = lisprop && risprop == 896 (catposstab[p[0]][base_list[3]] && 897 catposstab[p[1]][base_list[3]] && 898 (base_list[3] != p[3] || !risprop)); 899 break; 900 } 901 } 902 } 903 904 else 905 #endif /* SUPPORT_UNICODE */ 906 907 accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP && 908 rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP && 909 autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP]; 910 911 if (!accepted) return FALSE; 912 913 if (list[1] == 0) return TRUE; 914 /* Might be an empty repeat. */ 915 continue; 916 } 917 918 /* Control reaches here only if one of the items is a small character list. 919 All characters are checked against the other side. */ 920 921 do 922 { 923 chr = *chr_ptr; 924 925 switch(list_ptr[0]) 926 { 927 case OP_CHAR: 928 ochr_ptr = list_ptr + 2; 929 do 930 { 931 if (chr == *ochr_ptr) return FALSE; 932 ochr_ptr++; 933 } 934 while(*ochr_ptr != NOTACHAR); 935 break; 936 937 case OP_NOT: 938 ochr_ptr = list_ptr + 2; 939 do 940 { 941 if (chr == *ochr_ptr) 942 break; 943 ochr_ptr++; 944 } 945 while(*ochr_ptr != NOTACHAR); 946 if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */ 947 break; 948 949 /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not* 950 set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ 951 952 case OP_DIGIT: 953 if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE; 954 break; 955 956 case OP_NOT_DIGIT: 957 if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE; 958 break; 959 960 case OP_WHITESPACE: 961 if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE; 962 break; 963 964 case OP_NOT_WHITESPACE: 965 if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE; 966 break; 967 968 case OP_WORDCHAR: 969 if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE; 970 break; 971 972 case OP_NOT_WORDCHAR: 973 if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE; 974 break; 975 976 case OP_HSPACE: 977 switch(chr) 978 { 979 HSPACE_CASES: return FALSE; 980 default: break; 981 } 982 break; 983 984 case OP_NOT_HSPACE: 985 switch(chr) 986 { 987 HSPACE_CASES: break; 988 default: return FALSE; 989 } 990 break; 991 992 case OP_ANYNL: 993 case OP_VSPACE: 994 switch(chr) 995 { 996 VSPACE_CASES: return FALSE; 997 default: break; 998 } 999 break; 1000 1001 case OP_NOT_VSPACE: 1002 switch(chr) 1003 { 1004 VSPACE_CASES: break; 1005 default: return FALSE; 1006 } 1007 break; 1008 1009 case OP_DOLL: 1010 case OP_EODN: 1011 switch (chr) 1012 { 1013 case CHAR_CR: 1014 case CHAR_LF: 1015 case CHAR_VT: 1016 case CHAR_FF: 1017 case CHAR_NEL: 1018 #ifndef EBCDIC 1019 case 0x2028: 1020 case 0x2029: 1021 #endif /* Not EBCDIC */ 1022 return FALSE; 1023 } 1024 break; 1025 1026 case OP_EOD: /* Can always possessify before \z */ 1027 break; 1028 1029 #ifdef SUPPORT_UNICODE 1030 case OP_PROP: 1031 case OP_NOTPROP: 1032 if (!check_char_prop(chr, list_ptr[2], list_ptr[3], 1033 list_ptr[0] == OP_NOTPROP)) 1034 return FALSE; 1035 break; 1036 #endif 1037 1038 case OP_NCLASS: 1039 if (chr > 255) return FALSE; 1040 /* Fall through */ 1041 1042 case OP_CLASS: 1043 if (chr > 255) break; 1044 class_bitset = (uint8_t *) 1045 ((list_ptr == list ? code : base_end) - list_ptr[2]); 1046 if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE; 1047 break; 1048 1049 #ifdef SUPPORT_WIDE_CHARS 1050 case OP_XCLASS: 1051 if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) - 1052 list_ptr[2] + LINK_SIZE, utf)) return FALSE; 1053 break; 1054 #endif 1055 1056 default: 1057 return FALSE; 1058 } 1059 1060 chr_ptr++; 1061 } 1062 while(*chr_ptr != NOTACHAR); 1063 1064 /* At least one character must be matched from this opcode. */ 1065 1066 if (list[1] == 0) return TRUE; 1067 } 1068 1069 /* Control never reaches here. There used to be a fail-save return FALSE; here, 1070 but some compilers complain about an unreachable statement. */ 1071 } 1072 1073 1074 1075 /************************************************* 1076 * Scan compiled regex for auto-possession * 1077 *************************************************/ 1078 1079 /* Replaces single character iterations with their possessive alternatives 1080 if appropriate. This function modifies the compiled opcode! Hitting a 1081 non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a 1082 bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches 1083 overly complicated or large patterns. In these cases, the check just stops, 1084 leaving the remainder of the pattern unpossessified. 1085 1086 Arguments: 1087 code points to start of the byte code 1088 utf TRUE in UTF mode 1089 cb compile data block 1090 1091 Returns: 0 for success 1092 -1 if a non-existant opcode is encountered 1093 */ 1094 1095 int 1096 PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb) 1097 { 1098 PCRE2_UCHAR c; 1099 PCRE2_SPTR end; 1100 PCRE2_UCHAR *repeat_opcode; 1101 uint32_t list[8]; 1102 int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ 1103 1104 for (;;) 1105 { 1106 c = *code; 1107 1108 if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */ 1109 1110 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) 1111 { 1112 c -= get_repeat_base(c) - OP_STAR; 1113 end = (c <= OP_MINUPTO) ? 1114 get_chr_property_list(code, utf, cb->fcc, list) : NULL; 1115 list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; 1116 1117 if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit)) 1118 { 1119 switch(c) 1120 { 1121 case OP_STAR: 1122 *code += OP_POSSTAR - OP_STAR; 1123 break; 1124 1125 case OP_MINSTAR: 1126 *code += OP_POSSTAR - OP_MINSTAR; 1127 break; 1128 1129 case OP_PLUS: 1130 *code += OP_POSPLUS - OP_PLUS; 1131 break; 1132 1133 case OP_MINPLUS: 1134 *code += OP_POSPLUS - OP_MINPLUS; 1135 break; 1136 1137 case OP_QUERY: 1138 *code += OP_POSQUERY - OP_QUERY; 1139 break; 1140 1141 case OP_MINQUERY: 1142 *code += OP_POSQUERY - OP_MINQUERY; 1143 break; 1144 1145 case OP_UPTO: 1146 *code += OP_POSUPTO - OP_UPTO; 1147 break; 1148 1149 case OP_MINUPTO: 1150 *code += OP_POSUPTO - OP_MINUPTO; 1151 break; 1152 } 1153 } 1154 c = *code; 1155 } 1156 else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS) 1157 { 1158 #ifdef SUPPORT_WIDE_CHARS 1159 if (c == OP_XCLASS) 1160 repeat_opcode = code + GET(code, 1); 1161 else 1162 #endif 1163 repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR)); 1164 1165 c = *repeat_opcode; 1166 if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) 1167 { 1168 /* end must not be NULL. */ 1169 end = get_chr_property_list(code, utf, cb->fcc, list); 1170 1171 list[1] = (c & 1) == 0; 1172 1173 if (compare_opcodes(end, utf, cb, list, end, &rec_limit)) 1174 { 1175 switch (c) 1176 { 1177 case OP_CRSTAR: 1178 case OP_CRMINSTAR: 1179 *repeat_opcode = OP_CRPOSSTAR; 1180 break; 1181 1182 case OP_CRPLUS: 1183 case OP_CRMINPLUS: 1184 *repeat_opcode = OP_CRPOSPLUS; 1185 break; 1186 1187 case OP_CRQUERY: 1188 case OP_CRMINQUERY: 1189 *repeat_opcode = OP_CRPOSQUERY; 1190 break; 1191 1192 case OP_CRRANGE: 1193 case OP_CRMINRANGE: 1194 *repeat_opcode = OP_CRPOSRANGE; 1195 break; 1196 } 1197 } 1198 } 1199 c = *code; 1200 } 1201 1202 switch(c) 1203 { 1204 case OP_END: 1205 return 0; 1206 1207 case OP_TYPESTAR: 1208 case OP_TYPEMINSTAR: 1209 case OP_TYPEPLUS: 1210 case OP_TYPEMINPLUS: 1211 case OP_TYPEQUERY: 1212 case OP_TYPEMINQUERY: 1213 case OP_TYPEPOSSTAR: 1214 case OP_TYPEPOSPLUS: 1215 case OP_TYPEPOSQUERY: 1216 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 1217 break; 1218 1219 case OP_TYPEUPTO: 1220 case OP_TYPEMINUPTO: 1221 case OP_TYPEEXACT: 1222 case OP_TYPEPOSUPTO: 1223 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 1224 code += 2; 1225 break; 1226 1227 case OP_CALLOUT_STR: 1228 code += GET(code, 1 + 2*LINK_SIZE); 1229 break; 1230 1231 #ifdef SUPPORT_WIDE_CHARS 1232 case OP_XCLASS: 1233 code += GET(code, 1); 1234 break; 1235 #endif 1236 1237 case OP_MARK: 1238 case OP_COMMIT_ARG: 1239 case OP_PRUNE_ARG: 1240 case OP_SKIP_ARG: 1241 case OP_THEN_ARG: 1242 code += code[1]; 1243 break; 1244 } 1245 1246 /* Add in the fixed length from the table */ 1247 1248 code += PRIV(OP_lengths)[c]; 1249 1250 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be 1251 followed by a multi-byte character. The length in the table is a minimum, so 1252 we have to arrange to skip the extra code units. */ 1253 1254 #ifdef MAYBE_UTF_MULTI 1255 if (utf) switch(c) 1256 { 1257 case OP_CHAR: 1258 case OP_CHARI: 1259 case OP_NOT: 1260 case OP_NOTI: 1261 case OP_STAR: 1262 case OP_MINSTAR: 1263 case OP_PLUS: 1264 case OP_MINPLUS: 1265 case OP_QUERY: 1266 case OP_MINQUERY: 1267 case OP_UPTO: 1268 case OP_MINUPTO: 1269 case OP_EXACT: 1270 case OP_POSSTAR: 1271 case OP_POSPLUS: 1272 case OP_POSQUERY: 1273 case OP_POSUPTO: 1274 case OP_STARI: 1275 case OP_MINSTARI: 1276 case OP_PLUSI: 1277 case OP_MINPLUSI: 1278 case OP_QUERYI: 1279 case OP_MINQUERYI: 1280 case OP_UPTOI: 1281 case OP_MINUPTOI: 1282 case OP_EXACTI: 1283 case OP_POSSTARI: 1284 case OP_POSPLUSI: 1285 case OP_POSQUERYI: 1286 case OP_POSUPTOI: 1287 case OP_NOTSTAR: 1288 case OP_NOTMINSTAR: 1289 case OP_NOTPLUS: 1290 case OP_NOTMINPLUS: 1291 case OP_NOTQUERY: 1292 case OP_NOTMINQUERY: 1293 case OP_NOTUPTO: 1294 case OP_NOTMINUPTO: 1295 case OP_NOTEXACT: 1296 case OP_NOTPOSSTAR: 1297 case OP_NOTPOSPLUS: 1298 case OP_NOTPOSQUERY: 1299 case OP_NOTPOSUPTO: 1300 case OP_NOTSTARI: 1301 case OP_NOTMINSTARI: 1302 case OP_NOTPLUSI: 1303 case OP_NOTMINPLUSI: 1304 case OP_NOTQUERYI: 1305 case OP_NOTMINQUERYI: 1306 case OP_NOTUPTOI: 1307 case OP_NOTMINUPTOI: 1308 case OP_NOTEXACTI: 1309 case OP_NOTPOSSTARI: 1310 case OP_NOTPOSPLUSI: 1311 case OP_NOTPOSQUERYI: 1312 case OP_NOTPOSUPTOI: 1313 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 1314 break; 1315 } 1316 #else 1317 (void)(utf); /* Keep compiler happy by referencing function argument */ 1318 #endif /* SUPPORT_WIDE_CHARS */ 1319 } 1320 } 1321 1322 /* End of pcre2_auto_possess.c */ 1323