Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 /* This module contains functions that scan a compiled pattern and change
     42 repeats into possessive repeats where possible. */
     43 
     44 
     45 #ifdef HAVE_CONFIG_H
     46 #include "config.h"
     47 #endif
     48 
     49 
     50 #include "pcre2_internal.h"
     51 
     52 
     53 /*************************************************
     54 *        Tables for auto-possessification        *
     55 *************************************************/
     56 
     57 /* This table is used to check whether auto-possessification is possible
     58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
     59 used to select the row, and the right-hand opcode is use to select the column.
     60 A value of 1 means that auto-possessification is OK. For example, the second
     61 value in the first row means that \D+\d can be turned into \D++\d.
     62 
     63 The Unicode property types (\P and \p) have to be present to fill out the table
     64 because of what their opcode values are, but the table values should always be
     65 zero because property types are handled separately in the code. The last four
     66 columns apply to items that cannot be repeated, so there is no need to have
     67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
     68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
     69 
     70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
     71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
     72 
     73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
     74 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
     75   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
     76   { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
     77   { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
     78   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
     79   { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
     80   { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
     81   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
     82   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
     83   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
     84   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
     85   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
     86   { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
     87   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
     88   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
     89   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
     90   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
     91   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
     92 };
     93 
     94 #ifdef SUPPORT_UNICODE
     95 /* This table is used to check whether auto-possessification is possible
     96 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
     97 left-hand (repeated) opcode is used to select the row, and the right-hand
     98 opcode is used to select the column. The values are as follows:
     99 
    100   0   Always return FALSE (never auto-possessify)
    101   1   Character groups are distinct (possessify if both are OP_PROP)
    102   2   Check character categories in the same group (general or particular)
    103   3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
    104 
    105   4   Check left general category vs right particular category
    106   5   Check right general category vs left particular category
    107 
    108   6   Left alphanum vs right general category
    109   7   Left space vs right general category
    110   8   Left word vs right general category
    111 
    112   9   Right alphanum vs left general category
    113  10   Right space vs left general category
    114  11   Right word vs left general category
    115 
    116  12   Left alphanum vs right particular category
    117  13   Left space vs right particular category
    118  14   Left word vs right particular category
    119 
    120  15   Right alphanum vs left particular category
    121  16   Right space vs left particular category
    122  17   Right word vs left particular category
    123 */
    124 
    125 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
    126 /* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
    127   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
    128   { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
    129   { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
    130   { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
    131   { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
    132   { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
    133   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
    134   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
    135   { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
    136   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
    137   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
    138 };
    139 
    140 /* This table is used to check whether auto-possessification is possible
    141 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
    142 specifies a general category and the other specifies a particular category. The
    143 row is selected by the general category and the column by the particular
    144 category. The value is 1 if the particular category is not part of the general
    145 category. */
    146 
    147 static const uint8_t catposstab[7][30] = {
    148 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
    149   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
    150   { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
    151   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
    152   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
    153   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
    154   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
    155   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
    156 };
    157 
    158 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
    159 a general or particular category. The properties in each row are those
    160 that apply to the character set in question. Duplication means that a little
    161 unnecessary work is done when checking, but this keeps things much simpler
    162 because they can all use the same code. For more details see the comment where
    163 this table is used.
    164 
    165 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
    166 "space", but from Perl 5.18 it's included, so both categories are treated the
    167 same here. */
    168 
    169 static const uint8_t posspropstab[3][4] = {
    170   { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
    171   { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
    172   { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
    173 };
    174 #endif  /* SUPPORT_UNICODE */
    175 
    176 
    177 
    178 #ifdef SUPPORT_UNICODE
    179 /*************************************************
    180 *        Check a character and a property        *
    181 *************************************************/
    182 
    183 /* This function is called by compare_opcodes() when a property item is
    184 adjacent to a fixed character.
    185 
    186 Arguments:
    187   c            the character
    188   ptype        the property type
    189   pdata        the data for the type
    190   negated      TRUE if it's a negated property (\P or \p{^)
    191 
    192 Returns:       TRUE if auto-possessifying is OK
    193 */
    194 
    195 static BOOL
    196 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
    197   BOOL negated)
    198 {
    199 const uint32_t *p;
    200 const ucd_record *prop = GET_UCD(c);
    201 
    202 switch(ptype)
    203   {
    204   case PT_LAMP:
    205   return (prop->chartype == ucp_Lu ||
    206           prop->chartype == ucp_Ll ||
    207           prop->chartype == ucp_Lt) == negated;
    208 
    209   case PT_GC:
    210   return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
    211 
    212   case PT_PC:
    213   return (pdata == prop->chartype) == negated;
    214 
    215   case PT_SC:
    216   return (pdata == prop->script) == negated;
    217 
    218   /* These are specials */
    219 
    220   case PT_ALNUM:
    221   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
    222           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
    223 
    224   /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
    225   means that Perl space and POSIX space are now identical. PCRE was changed
    226   at release 8.34. */
    227 
    228   case PT_SPACE:    /* Perl space */
    229   case PT_PXSPACE:  /* POSIX space */
    230   switch(c)
    231     {
    232     HSPACE_CASES:
    233     VSPACE_CASES:
    234     return negated;
    235 
    236     default:
    237     return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
    238     }
    239   break;  /* Control never reaches here */
    240 
    241   case PT_WORD:
    242   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
    243           PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
    244           c == CHAR_UNDERSCORE) == negated;
    245 
    246   case PT_CLIST:
    247   p = PRIV(ucd_caseless_sets) + prop->caseset;
    248   for (;;)
    249     {
    250     if (c < *p) return !negated;
    251     if (c == *p++) return negated;
    252     }
    253   break;  /* Control never reaches here */
    254   }
    255 
    256 return FALSE;
    257 }
    258 #endif  /* SUPPORT_UNICODE */
    259 
    260 
    261 
    262 /*************************************************
    263 *        Base opcode of repeated opcodes         *
    264 *************************************************/
    265 
    266 /* Returns the base opcode for repeated single character type opcodes. If the
    267 opcode is not a repeated character type, it returns with the original value.
    268 
    269 Arguments:  c opcode
    270 Returns:    base opcode for the type
    271 */
    272 
    273 static PCRE2_UCHAR
    274 get_repeat_base(PCRE2_UCHAR c)
    275 {
    276 return (c > OP_TYPEPOSUPTO)? c :
    277        (c >= OP_TYPESTAR)?   OP_TYPESTAR :
    278        (c >= OP_NOTSTARI)?   OP_NOTSTARI :
    279        (c >= OP_NOTSTAR)?    OP_NOTSTAR :
    280        (c >= OP_STARI)?      OP_STARI :
    281                              OP_STAR;
    282 }
    283 
    284 
    285 /*************************************************
    286 *        Fill the character property list        *
    287 *************************************************/
    288 
    289 /* Checks whether the code points to an opcode that can take part in auto-
    290 possessification, and if so, fills a list with its properties.
    291 
    292 Arguments:
    293   code        points to start of expression
    294   utf         TRUE if in UTF mode
    295   fcc         points to the case-flipping table
    296   list        points to output list
    297               list[0] will be filled with the opcode
    298               list[1] will be non-zero if this opcode
    299                 can match an empty character string
    300               list[2..7] depends on the opcode
    301 
    302 Returns:      points to the start of the next opcode if *code is accepted
    303               NULL if *code is not accepted
    304 */
    305 
    306 static PCRE2_SPTR
    307 get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
    308   uint32_t *list)
    309 {
    310 PCRE2_UCHAR c = *code;
    311 PCRE2_UCHAR base;
    312 PCRE2_SPTR end;
    313 uint32_t chr;
    314 
    315 #ifdef SUPPORT_UNICODE
    316 uint32_t *clist_dest;
    317 const uint32_t *clist_src;
    318 #else
    319 (void)utf;    /* Suppress "unused parameter" compiler warning */
    320 #endif
    321 
    322 list[0] = c;
    323 list[1] = FALSE;
    324 code++;
    325 
    326 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
    327   {
    328   base = get_repeat_base(c);
    329   c -= (base - OP_STAR);
    330 
    331   if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
    332     code += IMM2_SIZE;
    333 
    334   list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
    335              c != OP_POSPLUS);
    336 
    337   switch(base)
    338     {
    339     case OP_STAR:
    340     list[0] = OP_CHAR;
    341     break;
    342 
    343     case OP_STARI:
    344     list[0] = OP_CHARI;
    345     break;
    346 
    347     case OP_NOTSTAR:
    348     list[0] = OP_NOT;
    349     break;
    350 
    351     case OP_NOTSTARI:
    352     list[0] = OP_NOTI;
    353     break;
    354 
    355     case OP_TYPESTAR:
    356     list[0] = *code;
    357     code++;
    358     break;
    359     }
    360   c = list[0];
    361   }
    362 
    363 switch(c)
    364   {
    365   case OP_NOT_DIGIT:
    366   case OP_DIGIT:
    367   case OP_NOT_WHITESPACE:
    368   case OP_WHITESPACE:
    369   case OP_NOT_WORDCHAR:
    370   case OP_WORDCHAR:
    371   case OP_ANY:
    372   case OP_ALLANY:
    373   case OP_ANYNL:
    374   case OP_NOT_HSPACE:
    375   case OP_HSPACE:
    376   case OP_NOT_VSPACE:
    377   case OP_VSPACE:
    378   case OP_EXTUNI:
    379   case OP_EODN:
    380   case OP_EOD:
    381   case OP_DOLL:
    382   case OP_DOLLM:
    383   return code;
    384 
    385   case OP_CHAR:
    386   case OP_NOT:
    387   GETCHARINCTEST(chr, code);
    388   list[2] = chr;
    389   list[3] = NOTACHAR;
    390   return code;
    391 
    392   case OP_CHARI:
    393   case OP_NOTI:
    394   list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
    395   GETCHARINCTEST(chr, code);
    396   list[2] = chr;
    397 
    398 #ifdef SUPPORT_UNICODE
    399   if (chr < 128 || (chr < 256 && !utf))
    400     list[3] = fcc[chr];
    401   else
    402     list[3] = UCD_OTHERCASE(chr);
    403 #elif defined SUPPORT_WIDE_CHARS
    404   list[3] = (chr < 256) ? fcc[chr] : chr;
    405 #else
    406   list[3] = fcc[chr];
    407 #endif
    408 
    409   /* The othercase might be the same value. */
    410 
    411   if (chr == list[3])
    412     list[3] = NOTACHAR;
    413   else
    414     list[4] = NOTACHAR;
    415   return code;
    416 
    417 #ifdef SUPPORT_UNICODE
    418   case OP_PROP:
    419   case OP_NOTPROP:
    420   if (code[0] != PT_CLIST)
    421     {
    422     list[2] = code[0];
    423     list[3] = code[1];
    424     return code + 2;
    425     }
    426 
    427   /* Convert only if we have enough space. */
    428 
    429   clist_src = PRIV(ucd_caseless_sets) + code[1];
    430   clist_dest = list + 2;
    431   code += 2;
    432 
    433   do {
    434      if (clist_dest >= list + 8)
    435        {
    436        /* Early return if there is not enough space. This should never
    437        happen, since all clists are shorter than 5 character now. */
    438        list[2] = code[0];
    439        list[3] = code[1];
    440        return code;
    441        }
    442      *clist_dest++ = *clist_src;
    443      }
    444   while(*clist_src++ != NOTACHAR);
    445 
    446   /* All characters are stored. The terminating NOTACHAR is copied from the
    447   clist itself. */
    448 
    449   list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
    450   return code;
    451 #endif
    452 
    453   case OP_NCLASS:
    454   case OP_CLASS:
    455 #ifdef SUPPORT_WIDE_CHARS
    456   case OP_XCLASS:
    457   if (c == OP_XCLASS)
    458     end = code + GET(code, 0) - 1;
    459   else
    460 #endif
    461     end = code + 32 / sizeof(PCRE2_UCHAR);
    462 
    463   switch(*end)
    464     {
    465     case OP_CRSTAR:
    466     case OP_CRMINSTAR:
    467     case OP_CRQUERY:
    468     case OP_CRMINQUERY:
    469     case OP_CRPOSSTAR:
    470     case OP_CRPOSQUERY:
    471     list[1] = TRUE;
    472     end++;
    473     break;
    474 
    475     case OP_CRPLUS:
    476     case OP_CRMINPLUS:
    477     case OP_CRPOSPLUS:
    478     end++;
    479     break;
    480 
    481     case OP_CRRANGE:
    482     case OP_CRMINRANGE:
    483     case OP_CRPOSRANGE:
    484     list[1] = (GET2(end, 1) == 0);
    485     end += 1 + 2 * IMM2_SIZE;
    486     break;
    487     }
    488   list[2] = (uint32_t)(end - code);
    489   return end;
    490   }
    491 return NULL;    /* Opcode not accepted */
    492 }
    493 
    494 
    495 
    496 /*************************************************
    497 *    Scan further character sets for match       *
    498 *************************************************/
    499 
    500 /* Checks whether the base and the current opcode have a common character, in
    501 which case the base cannot be possessified.
    502 
    503 Arguments:
    504   code        points to the byte code
    505   utf         TRUE in UTF mode
    506   cb          compile data block
    507   base_list   the data list of the base opcode
    508   base_end    the end of the data list
    509   rec_limit   points to recursion depth counter
    510 
    511 Returns:      TRUE if the auto-possessification is possible
    512 */
    513 
    514 static BOOL
    515 compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
    516   const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
    517 {
    518 PCRE2_UCHAR c;
    519 uint32_t list[8];
    520 const uint32_t *chr_ptr;
    521 const uint32_t *ochr_ptr;
    522 const uint32_t *list_ptr;
    523 PCRE2_SPTR next_code;
    524 #ifdef SUPPORT_WIDE_CHARS
    525 PCRE2_SPTR xclass_flags;
    526 #endif
    527 const uint8_t *class_bitset;
    528 const uint8_t *set1, *set2, *set_end;
    529 uint32_t chr;
    530 BOOL accepted, invert_bits;
    531 BOOL entered_a_group = FALSE;
    532 
    533 if (--(*rec_limit) <= 0) return FALSE;  /* Recursion has gone too deep */
    534 
    535 /* Note: the base_list[1] contains whether the current opcode has a greedy
    536 (represented by a non-zero value) quantifier. This is a different from
    537 other character type lists, which store here that the character iterator
    538 matches to an empty string (also represented by a non-zero value). */
    539 
    540 for(;;)
    541   {
    542   /* All operations move the code pointer forward.
    543   Therefore infinite recursions are not possible. */
    544 
    545   c = *code;
    546 
    547   /* Skip over callouts */
    548 
    549   if (c == OP_CALLOUT)
    550     {
    551     code += PRIV(OP_lengths)[c];
    552     continue;
    553     }
    554 
    555   if (c == OP_CALLOUT_STR)
    556     {
    557     code += GET(code, 1 + 2*LINK_SIZE);
    558     continue;
    559     }
    560 
    561   if (c == OP_ALT)
    562     {
    563     do code += GET(code, 1); while (*code == OP_ALT);
    564     c = *code;
    565     }
    566 
    567   switch(c)
    568     {
    569     case OP_END:
    570     case OP_KETRPOS:
    571     /* TRUE only in greedy case. The non-greedy case could be replaced by
    572     an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
    573     uses more memory, which we cannot get at this stage.) */
    574 
    575     return base_list[1] != 0;
    576 
    577     case OP_KET:
    578     /* If the bracket is capturing, and referenced by an OP_RECURSE, or
    579     it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
    580     cannot be converted to a possessive form. */
    581 
    582     if (base_list[1] == 0) return FALSE;
    583 
    584     switch(*(code - GET(code, 1)))
    585       {
    586       case OP_ASSERT:
    587       case OP_ASSERT_NOT:
    588       case OP_ASSERTBACK:
    589       case OP_ASSERTBACK_NOT:
    590       case OP_ONCE:
    591       case OP_ONCE_NC:
    592       /* Atomic sub-patterns and assertions can always auto-possessify their
    593       last iterator. However, if the group was entered as a result of checking
    594       a previous iterator, this is not possible. */
    595 
    596       return !entered_a_group;
    597       }
    598 
    599     code += PRIV(OP_lengths)[c];
    600     continue;
    601 
    602     case OP_ONCE:
    603     case OP_ONCE_NC:
    604     case OP_BRA:
    605     case OP_CBRA:
    606     next_code = code + GET(code, 1);
    607     code += PRIV(OP_lengths)[c];
    608 
    609     while (*next_code == OP_ALT)
    610       {
    611       if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
    612         return FALSE;
    613       code = next_code + 1 + LINK_SIZE;
    614       next_code += GET(next_code, 1);
    615       }
    616 
    617     entered_a_group = TRUE;
    618     continue;
    619 
    620     case OP_BRAZERO:
    621     case OP_BRAMINZERO:
    622 
    623     next_code = code + 1;
    624     if (*next_code != OP_BRA && *next_code != OP_CBRA
    625         && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
    626 
    627     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
    628 
    629     /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
    630 
    631     next_code += 1 + LINK_SIZE;
    632     if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
    633       return FALSE;
    634 
    635     code += PRIV(OP_lengths)[c];
    636     continue;
    637 
    638     default:
    639     break;
    640     }
    641 
    642   /* Check for a supported opcode, and load its properties. */
    643 
    644   code = get_chr_property_list(code, utf, cb->fcc, list);
    645   if (code == NULL) return FALSE;    /* Unsupported */
    646 
    647   /* If either opcode is a small character list, set pointers for comparing
    648   characters from that list with another list, or with a property. */
    649 
    650   if (base_list[0] == OP_CHAR)
    651     {
    652     chr_ptr = base_list + 2;
    653     list_ptr = list;
    654     }
    655   else if (list[0] == OP_CHAR)
    656     {
    657     chr_ptr = list + 2;
    658     list_ptr = base_list;
    659     }
    660 
    661   /* Character bitsets can also be compared to certain opcodes. */
    662 
    663   else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
    664 #if PCRE2_CODE_UNIT_WIDTH == 8
    665       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
    666       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
    667 #endif
    668       )
    669     {
    670 #if PCRE2_CODE_UNIT_WIDTH == 8
    671     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
    672 #else
    673     if (base_list[0] == OP_CLASS)
    674 #endif
    675       {
    676       set1 = (uint8_t *)(base_end - base_list[2]);
    677       list_ptr = list;
    678       }
    679     else
    680       {
    681       set1 = (uint8_t *)(code - list[2]);
    682       list_ptr = base_list;
    683       }
    684 
    685     invert_bits = FALSE;
    686     switch(list_ptr[0])
    687       {
    688       case OP_CLASS:
    689       case OP_NCLASS:
    690       set2 = (uint8_t *)
    691         ((list_ptr == list ? code : base_end) - list_ptr[2]);
    692       break;
    693 
    694 #ifdef SUPPORT_WIDE_CHARS
    695       case OP_XCLASS:
    696       xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
    697       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
    698       if ((*xclass_flags & XCL_MAP) == 0)
    699         {
    700         /* No bits are set for characters < 256. */
    701         if (list[1] == 0) return TRUE;
    702         /* Might be an empty repeat. */
    703         continue;
    704         }
    705       set2 = (uint8_t *)(xclass_flags + 1);
    706       break;
    707 #endif
    708 
    709       case OP_NOT_DIGIT:
    710       invert_bits = TRUE;
    711       /* Fall through */
    712       case OP_DIGIT:
    713       set2 = (uint8_t *)(cb->cbits + cbit_digit);
    714       break;
    715 
    716       case OP_NOT_WHITESPACE:
    717       invert_bits = TRUE;
    718       /* Fall through */
    719       case OP_WHITESPACE:
    720       set2 = (uint8_t *)(cb->cbits + cbit_space);
    721       break;
    722 
    723       case OP_NOT_WORDCHAR:
    724       invert_bits = TRUE;
    725       /* Fall through */
    726       case OP_WORDCHAR:
    727       set2 = (uint8_t *)(cb->cbits + cbit_word);
    728       break;
    729 
    730       default:
    731       return FALSE;
    732       }
    733 
    734     /* Because the bit sets are unaligned bytes, we need to perform byte
    735     comparison here. */
    736 
    737     set_end = set1 + 32;
    738     if (invert_bits)
    739       {
    740       do
    741         {
    742         if ((*set1++ & ~(*set2++)) != 0) return FALSE;
    743         }
    744       while (set1 < set_end);
    745       }
    746     else
    747       {
    748       do
    749         {
    750         if ((*set1++ & *set2++) != 0) return FALSE;
    751         }
    752       while (set1 < set_end);
    753       }
    754 
    755     if (list[1] == 0) return TRUE;
    756     /* Might be an empty repeat. */
    757     continue;
    758     }
    759 
    760   /* Some property combinations also acceptable. Unicode property opcodes are
    761   processed specially; the rest can be handled with a lookup table. */
    762 
    763   else
    764     {
    765     uint32_t leftop, rightop;
    766 
    767     leftop = base_list[0];
    768     rightop = list[0];
    769 
    770 #ifdef SUPPORT_UNICODE
    771     accepted = FALSE; /* Always set in non-unicode case. */
    772     if (leftop == OP_PROP || leftop == OP_NOTPROP)
    773       {
    774       if (rightop == OP_EOD)
    775         accepted = TRUE;
    776       else if (rightop == OP_PROP || rightop == OP_NOTPROP)
    777         {
    778         int n;
    779         const uint8_t *p;
    780         BOOL same = leftop == rightop;
    781         BOOL lisprop = leftop == OP_PROP;
    782         BOOL risprop = rightop == OP_PROP;
    783         BOOL bothprop = lisprop && risprop;
    784 
    785         /* There's a table that specifies how each combination is to be
    786         processed:
    787           0   Always return FALSE (never auto-possessify)
    788           1   Character groups are distinct (possessify if both are OP_PROP)
    789           2   Check character categories in the same group (general or particular)
    790           3   Return TRUE if the two opcodes are not the same
    791           ... see comments below
    792         */
    793 
    794         n = propposstab[base_list[2]][list[2]];
    795         switch(n)
    796           {
    797           case 0: break;
    798           case 1: accepted = bothprop; break;
    799           case 2: accepted = (base_list[3] == list[3]) != same; break;
    800           case 3: accepted = !same; break;
    801 
    802           case 4:  /* Left general category, right particular category */
    803           accepted = risprop && catposstab[base_list[3]][list[3]] == same;
    804           break;
    805 
    806           case 5:  /* Right general category, left particular category */
    807           accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
    808           break;
    809 
    810           /* This code is logically tricky. Think hard before fiddling with it.
    811           The posspropstab table has four entries per row. Each row relates to
    812           one of PCRE's special properties such as ALNUM or SPACE or WORD.
    813           Only WORD actually needs all four entries, but using repeats for the
    814           others means they can all use the same code below.
    815 
    816           The first two entries in each row are Unicode general categories, and
    817           apply always, because all the characters they include are part of the
    818           PCRE character set. The third and fourth entries are a general and a
    819           particular category, respectively, that include one or more relevant
    820           characters. One or the other is used, depending on whether the check
    821           is for a general or a particular category. However, in both cases the
    822           category contains more characters than the specials that are defined
    823           for the property being tested against. Therefore, it cannot be used
    824           in a NOTPROP case.
    825 
    826           Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
    827           Underscore is covered by ucp_P or ucp_Po. */
    828 
    829           case 6:  /* Left alphanum vs right general category */
    830           case 7:  /* Left space vs right general category */
    831           case 8:  /* Left word vs right general category */
    832           p = posspropstab[n-6];
    833           accepted = risprop && lisprop ==
    834             (list[3] != p[0] &&
    835              list[3] != p[1] &&
    836             (list[3] != p[2] || !lisprop));
    837           break;
    838 
    839           case 9:   /* Right alphanum vs left general category */
    840           case 10:  /* Right space vs left general category */
    841           case 11:  /* Right word vs left general category */
    842           p = posspropstab[n-9];
    843           accepted = lisprop && risprop ==
    844             (base_list[3] != p[0] &&
    845              base_list[3] != p[1] &&
    846             (base_list[3] != p[2] || !risprop));
    847           break;
    848 
    849           case 12:  /* Left alphanum vs right particular category */
    850           case 13:  /* Left space vs right particular category */
    851           case 14:  /* Left word vs right particular category */
    852           p = posspropstab[n-12];
    853           accepted = risprop && lisprop ==
    854             (catposstab[p[0]][list[3]] &&
    855              catposstab[p[1]][list[3]] &&
    856             (list[3] != p[3] || !lisprop));
    857           break;
    858 
    859           case 15:  /* Right alphanum vs left particular category */
    860           case 16:  /* Right space vs left particular category */
    861           case 17:  /* Right word vs left particular category */
    862           p = posspropstab[n-15];
    863           accepted = lisprop && risprop ==
    864             (catposstab[p[0]][base_list[3]] &&
    865              catposstab[p[1]][base_list[3]] &&
    866             (base_list[3] != p[3] || !risprop));
    867           break;
    868           }
    869         }
    870       }
    871 
    872     else
    873 #endif  /* SUPPORT_UNICODE */
    874 
    875     accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
    876            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
    877            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
    878 
    879     if (!accepted) return FALSE;
    880 
    881     if (list[1] == 0) return TRUE;
    882     /* Might be an empty repeat. */
    883     continue;
    884     }
    885 
    886   /* Control reaches here only if one of the items is a small character list.
    887   All characters are checked against the other side. */
    888 
    889   do
    890     {
    891     chr = *chr_ptr;
    892 
    893     switch(list_ptr[0])
    894       {
    895       case OP_CHAR:
    896       ochr_ptr = list_ptr + 2;
    897       do
    898         {
    899         if (chr == *ochr_ptr) return FALSE;
    900         ochr_ptr++;
    901         }
    902       while(*ochr_ptr != NOTACHAR);
    903       break;
    904 
    905       case OP_NOT:
    906       ochr_ptr = list_ptr + 2;
    907       do
    908         {
    909         if (chr == *ochr_ptr)
    910           break;
    911         ochr_ptr++;
    912         }
    913       while(*ochr_ptr != NOTACHAR);
    914       if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
    915       break;
    916 
    917       /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
    918       set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
    919 
    920       case OP_DIGIT:
    921       if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
    922       break;
    923 
    924       case OP_NOT_DIGIT:
    925       if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
    926       break;
    927 
    928       case OP_WHITESPACE:
    929       if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
    930       break;
    931 
    932       case OP_NOT_WHITESPACE:
    933       if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
    934       break;
    935 
    936       case OP_WORDCHAR:
    937       if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
    938       break;
    939 
    940       case OP_NOT_WORDCHAR:
    941       if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
    942       break;
    943 
    944       case OP_HSPACE:
    945       switch(chr)
    946         {
    947         HSPACE_CASES: return FALSE;
    948         default: break;
    949         }
    950       break;
    951 
    952       case OP_NOT_HSPACE:
    953       switch(chr)
    954         {
    955         HSPACE_CASES: break;
    956         default: return FALSE;
    957         }
    958       break;
    959 
    960       case OP_ANYNL:
    961       case OP_VSPACE:
    962       switch(chr)
    963         {
    964         VSPACE_CASES: return FALSE;
    965         default: break;
    966         }
    967       break;
    968 
    969       case OP_NOT_VSPACE:
    970       switch(chr)
    971         {
    972         VSPACE_CASES: break;
    973         default: return FALSE;
    974         }
    975       break;
    976 
    977       case OP_DOLL:
    978       case OP_EODN:
    979       switch (chr)
    980         {
    981         case CHAR_CR:
    982         case CHAR_LF:
    983         case CHAR_VT:
    984         case CHAR_FF:
    985         case CHAR_NEL:
    986 #ifndef EBCDIC
    987         case 0x2028:
    988         case 0x2029:
    989 #endif  /* Not EBCDIC */
    990         return FALSE;
    991         }
    992       break;
    993 
    994       case OP_EOD:    /* Can always possessify before \z */
    995       break;
    996 
    997 #ifdef SUPPORT_UNICODE
    998       case OP_PROP:
    999       case OP_NOTPROP:
   1000       if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
   1001             list_ptr[0] == OP_NOTPROP))
   1002         return FALSE;
   1003       break;
   1004 #endif
   1005 
   1006       case OP_NCLASS:
   1007       if (chr > 255) return FALSE;
   1008       /* Fall through */
   1009 
   1010       case OP_CLASS:
   1011       if (chr > 255) break;
   1012       class_bitset = (uint8_t *)
   1013         ((list_ptr == list ? code : base_end) - list_ptr[2]);
   1014       if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
   1015       break;
   1016 
   1017 #ifdef SUPPORT_WIDE_CHARS
   1018       case OP_XCLASS:
   1019       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
   1020           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
   1021       break;
   1022 #endif
   1023 
   1024       default:
   1025       return FALSE;
   1026       }
   1027 
   1028     chr_ptr++;
   1029     }
   1030   while(*chr_ptr != NOTACHAR);
   1031 
   1032   /* At least one character must be matched from this opcode. */
   1033 
   1034   if (list[1] == 0) return TRUE;
   1035   }
   1036 
   1037 /* Control never reaches here. There used to be a fail-save return FALSE; here,
   1038 but some compilers complain about an unreachable statement. */
   1039 }
   1040 
   1041 
   1042 
   1043 /*************************************************
   1044 *    Scan compiled regex for auto-possession     *
   1045 *************************************************/
   1046 
   1047 /* Replaces single character iterations with their possessive alternatives
   1048 if appropriate. This function modifies the compiled opcode! Hitting a
   1049 non-existant opcode may indicate a bug in PCRE2, but it can also be caused if a
   1050 bad UTF string was compiled with PCRE2_NO_UTF_CHECK.
   1051 
   1052 Arguments:
   1053   code        points to start of the byte code
   1054   utf         TRUE in UTF mode
   1055   cb          compile data block
   1056 
   1057 Returns:      0 for success
   1058               -1 if a non-existant opcode is encountered
   1059 */
   1060 
   1061 int
   1062 PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
   1063 {
   1064 register PCRE2_UCHAR c;
   1065 PCRE2_SPTR end;
   1066 PCRE2_UCHAR *repeat_opcode;
   1067 uint32_t list[8];
   1068 int rec_limit;
   1069 
   1070 for (;;)
   1071   {
   1072   c = *code;
   1073 
   1074   if (c > OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
   1075 
   1076   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
   1077     {
   1078     c -= get_repeat_base(c) - OP_STAR;
   1079     end = (c <= OP_MINUPTO) ?
   1080       get_chr_property_list(code, utf, cb->fcc, list) : NULL;
   1081     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
   1082 
   1083     rec_limit = 1000;
   1084     if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
   1085       {
   1086       switch(c)
   1087         {
   1088         case OP_STAR:
   1089         *code += OP_POSSTAR - OP_STAR;
   1090         break;
   1091 
   1092         case OP_MINSTAR:
   1093         *code += OP_POSSTAR - OP_MINSTAR;
   1094         break;
   1095 
   1096         case OP_PLUS:
   1097         *code += OP_POSPLUS - OP_PLUS;
   1098         break;
   1099 
   1100         case OP_MINPLUS:
   1101         *code += OP_POSPLUS - OP_MINPLUS;
   1102         break;
   1103 
   1104         case OP_QUERY:
   1105         *code += OP_POSQUERY - OP_QUERY;
   1106         break;
   1107 
   1108         case OP_MINQUERY:
   1109         *code += OP_POSQUERY - OP_MINQUERY;
   1110         break;
   1111 
   1112         case OP_UPTO:
   1113         *code += OP_POSUPTO - OP_UPTO;
   1114         break;
   1115 
   1116         case OP_MINUPTO:
   1117         *code += OP_POSUPTO - OP_MINUPTO;
   1118         break;
   1119         }
   1120       }
   1121     c = *code;
   1122     }
   1123   else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
   1124     {
   1125 #ifdef SUPPORT_WIDE_CHARS
   1126     if (c == OP_XCLASS)
   1127       repeat_opcode = code + GET(code, 1);
   1128     else
   1129 #endif
   1130       repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
   1131 
   1132     c = *repeat_opcode;
   1133     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
   1134       {
   1135       /* end must not be NULL. */
   1136       end = get_chr_property_list(code, utf, cb->fcc, list);
   1137 
   1138       list[1] = (c & 1) == 0;
   1139 
   1140       rec_limit = 1000;
   1141       if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
   1142         {
   1143         switch (c)
   1144           {
   1145           case OP_CRSTAR:
   1146           case OP_CRMINSTAR:
   1147           *repeat_opcode = OP_CRPOSSTAR;
   1148           break;
   1149 
   1150           case OP_CRPLUS:
   1151           case OP_CRMINPLUS:
   1152           *repeat_opcode = OP_CRPOSPLUS;
   1153           break;
   1154 
   1155           case OP_CRQUERY:
   1156           case OP_CRMINQUERY:
   1157           *repeat_opcode = OP_CRPOSQUERY;
   1158           break;
   1159 
   1160           case OP_CRRANGE:
   1161           case OP_CRMINRANGE:
   1162           *repeat_opcode = OP_CRPOSRANGE;
   1163           break;
   1164           }
   1165         }
   1166       }
   1167     c = *code;
   1168     }
   1169 
   1170   switch(c)
   1171     {
   1172     case OP_END:
   1173     return 0;
   1174 
   1175     case OP_TYPESTAR:
   1176     case OP_TYPEMINSTAR:
   1177     case OP_TYPEPLUS:
   1178     case OP_TYPEMINPLUS:
   1179     case OP_TYPEQUERY:
   1180     case OP_TYPEMINQUERY:
   1181     case OP_TYPEPOSSTAR:
   1182     case OP_TYPEPOSPLUS:
   1183     case OP_TYPEPOSQUERY:
   1184     if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
   1185     break;
   1186 
   1187     case OP_TYPEUPTO:
   1188     case OP_TYPEMINUPTO:
   1189     case OP_TYPEEXACT:
   1190     case OP_TYPEPOSUPTO:
   1191     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
   1192       code += 2;
   1193     break;
   1194 
   1195     case OP_CALLOUT_STR:
   1196     code += GET(code, 1 + 2*LINK_SIZE);
   1197     break;
   1198 
   1199 #ifdef SUPPORT_WIDE_CHARS
   1200     case OP_XCLASS:
   1201     code += GET(code, 1);
   1202     break;
   1203 #endif
   1204 
   1205     case OP_MARK:
   1206     case OP_PRUNE_ARG:
   1207     case OP_SKIP_ARG:
   1208     case OP_THEN_ARG:
   1209     code += code[1];
   1210     break;
   1211     }
   1212 
   1213   /* Add in the fixed length from the table */
   1214 
   1215   code += PRIV(OP_lengths)[c];
   1216 
   1217   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
   1218   followed by a multi-byte character. The length in the table is a minimum, so
   1219   we have to arrange to skip the extra code units. */
   1220 
   1221 #ifdef MAYBE_UTF_MULTI
   1222   if (utf) switch(c)
   1223     {
   1224     case OP_CHAR:
   1225     case OP_CHARI:
   1226     case OP_NOT:
   1227     case OP_NOTI:
   1228     case OP_STAR:
   1229     case OP_MINSTAR:
   1230     case OP_PLUS:
   1231     case OP_MINPLUS:
   1232     case OP_QUERY:
   1233     case OP_MINQUERY:
   1234     case OP_UPTO:
   1235     case OP_MINUPTO:
   1236     case OP_EXACT:
   1237     case OP_POSSTAR:
   1238     case OP_POSPLUS:
   1239     case OP_POSQUERY:
   1240     case OP_POSUPTO:
   1241     case OP_STARI:
   1242     case OP_MINSTARI:
   1243     case OP_PLUSI:
   1244     case OP_MINPLUSI:
   1245     case OP_QUERYI:
   1246     case OP_MINQUERYI:
   1247     case OP_UPTOI:
   1248     case OP_MINUPTOI:
   1249     case OP_EXACTI:
   1250     case OP_POSSTARI:
   1251     case OP_POSPLUSI:
   1252     case OP_POSQUERYI:
   1253     case OP_POSUPTOI:
   1254     case OP_NOTSTAR:
   1255     case OP_NOTMINSTAR:
   1256     case OP_NOTPLUS:
   1257     case OP_NOTMINPLUS:
   1258     case OP_NOTQUERY:
   1259     case OP_NOTMINQUERY:
   1260     case OP_NOTUPTO:
   1261     case OP_NOTMINUPTO:
   1262     case OP_NOTEXACT:
   1263     case OP_NOTPOSSTAR:
   1264     case OP_NOTPOSPLUS:
   1265     case OP_NOTPOSQUERY:
   1266     case OP_NOTPOSUPTO:
   1267     case OP_NOTSTARI:
   1268     case OP_NOTMINSTARI:
   1269     case OP_NOTPLUSI:
   1270     case OP_NOTMINPLUSI:
   1271     case OP_NOTQUERYI:
   1272     case OP_NOTMINQUERYI:
   1273     case OP_NOTUPTOI:
   1274     case OP_NOTMINUPTOI:
   1275     case OP_NOTEXACTI:
   1276     case OP_NOTPOSSTARI:
   1277     case OP_NOTPOSPLUSI:
   1278     case OP_NOTPOSQUERYI:
   1279     case OP_NOTPOSUPTOI:
   1280     if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
   1281     break;
   1282     }
   1283 #else
   1284   (void)(utf);  /* Keep compiler happy by referencing function argument */
   1285 #endif  /* SUPPORT_WIDE_CHARS */
   1286   }
   1287 }
   1288 
   1289 /* End of pcre2_auto_possess.c */
   1290