Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 #ifdef HAVE_CONFIG_H
     43 #include "config.h"
     44 #endif
     45 
     46 #define NLBLOCK mb             /* Block containing newline information */
     47 #define PSSTART start_subject  /* Field containing processed string start */
     48 #define PSEND   end_subject    /* Field containing processed string end */
     49 
     50 #include "pcre2_internal.h"
     51 
     52 /* Masks for identifying the public options that are permitted at match time.
     53 */
     54 
     55 #define PUBLIC_MATCH_OPTIONS \
     56   (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
     57    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
     58    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
     59 
     60 #define PUBLIC_JIT_MATCH_OPTIONS \
     61    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
     62     PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
     63 
     64 /* The mb->capture_last field uses the lower 16 bits for the last captured
     65 substring (which can never be greater than 65535) and a bit in the top half
     66 to mean "capture vector overflowed". This odd way of doing things was
     67 implemented when it was realized that preserving and restoring the overflow bit
     68 whenever the last capture number was saved/restored made for a neater
     69 interface, and doing it this way saved on (a) another variable, which would
     70 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
     71 separate set of save/restore instructions. The following defines are used in
     72 implementing this. */
     73 
     74 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
     75 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
     76 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
     77 
     78 /* Bits for setting in mb->match_function_type to indicate two special types
     79 of call to match(). We do it this way to save on using another stack variable,
     80 as stack usage is to be discouraged. */
     81 
     82 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
     83 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
     84 
     85 /* Non-error returns from the match() function. Error returns are externally
     86 defined PCRE2_ERROR_xxx codes, which are all negative. */
     87 
     88 #define MATCH_MATCH        1
     89 #define MATCH_NOMATCH      0
     90 
     91 /* Special internal returns from the match() function. Make them sufficiently
     92 negative to avoid the external error codes. */
     93 
     94 #define MATCH_ACCEPT       (-999)
     95 #define MATCH_KETRPOS      (-998)
     96 #define MATCH_ONCE         (-997)
     97 /* The next 5 must be kept together and in sequence so that a test that checks
     98 for any one of them can use a range. */
     99 #define MATCH_COMMIT       (-996)
    100 #define MATCH_PRUNE        (-995)
    101 #define MATCH_SKIP         (-994)
    102 #define MATCH_SKIP_ARG     (-993)
    103 #define MATCH_THEN         (-992)
    104 #define MATCH_BACKTRACK_MAX MATCH_THEN
    105 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
    106 
    107 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
    108 
    109 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
    110 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
    111 
    112 /* Maximum number of ovector elements that can be saved on the system stack
    113 when processing OP_RECURSE in non-HEAP_MATCH_RECURSE mode. If the ovector is
    114 bigger, malloc() is used. This value should be a multiple of 3, because the
    115 ovector length is always a multiple of 3. */
    116 
    117 #define OP_RECURSE_STACK_SAVE_MAX 45
    118 
    119 
    120 
    121 /*************************************************
    122 *          Match a back-reference                *
    123 *************************************************/
    124 
    125 /* This function is called only when it is known that the offset lies within
    126 the offsets that have so far been used in the match. Note that in caseless
    127 UTF-8 mode, the number of subject bytes matched may be different to the number
    128 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
    129 seems unlikely.)
    130 
    131 Arguments:
    132   offset      index into the offset vector
    133   offset_top  top of the used offset vector
    134   eptr        pointer into the subject
    135   mb          points to match block
    136   caseless    TRUE if caseless
    137   lengthptr   pointer for returning the length matched
    138 
    139 Returns:      = 0 sucessful match; number of code units matched is set
    140               < 0 no match
    141               > 0 partial match
    142 */
    143 
    144 static int
    145 match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
    146   match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
    147 {
    148 #if defined SUPPORT_UNICODE
    149 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
    150 #endif
    151 
    152 register PCRE2_SPTR p;
    153 PCRE2_SIZE length;
    154 PCRE2_SPTR eptr_start = eptr;
    155 
    156 /* Deal with an unset group. The default is no match, but there is an option to
    157 match an empty string. */
    158 
    159 if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
    160   {
    161   if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
    162     {
    163     *lengthptr = 0;
    164     return 0;      /* Match */
    165     }
    166   else return -1;  /* No match */
    167   }
    168 
    169 /* Separate the caseless and UTF cases for speed. */
    170 
    171 p = mb->start_subject + mb->ovector[offset];
    172 length = mb->ovector[offset+1] - mb->ovector[offset];
    173 
    174 if (caseless)
    175   {
    176 #if defined SUPPORT_UNICODE
    177   if (utf)
    178     {
    179     /* Match characters up to the end of the reference. NOTE: the number of
    180     code units matched may differ, because in UTF-8 there are some characters
    181     whose upper and lower case versions code have different numbers of bytes.
    182     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
    183     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
    184     sequence of two of the latter. It is important, therefore, to check the
    185     length along the reference, not along the subject (earlier code did this
    186     wrong). */
    187 
    188     PCRE2_SPTR endptr = p + length;
    189     while (p < endptr)
    190       {
    191       uint32_t c, d;
    192       const ucd_record *ur;
    193       if (eptr >= mb->end_subject) return 1;   /* Partial match */
    194       GETCHARINC(c, eptr);
    195       GETCHARINC(d, p);
    196       ur = GET_UCD(d);
    197       if (c != d && c != (uint32_t)((int)d + ur->other_case))
    198         {
    199         const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
    200         for (;;)
    201           {
    202           if (c < *pp) return -1;  /* No match */
    203           if (c == *pp++) break;
    204           }
    205         }
    206       }
    207     }
    208   else
    209 #endif
    210 
    211     /* Not in UTF mode */
    212 
    213     {
    214     for (; length > 0; length--)
    215       {
    216       uint32_t cc, cp;
    217       if (eptr >= mb->end_subject) return 1;   /* Partial match */
    218       cc = UCHAR21TEST(eptr);
    219       cp = UCHAR21TEST(p);
    220       if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
    221         return -1;  /* No match */
    222       p++;
    223       eptr++;
    224       }
    225     }
    226   }
    227 
    228 /* In the caseful case, we can just compare the code units, whether or not we
    229 are in UTF mode. */
    230 
    231 else
    232   {
    233   for (; length > 0; length--)
    234     {
    235     if (eptr >= mb->end_subject) return 1;   /* Partial match */
    236     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /*No match */
    237     }
    238   }
    239 
    240 *lengthptr = eptr - eptr_start;
    241 return 0;  /* Match */
    242 }
    243 
    244 
    245 
    246 /***************************************************************************
    247 ****************************************************************************
    248                    RECURSION IN THE match() FUNCTION
    249 
    250 The match() function is highly recursive, though not every recursive call
    251 increases the recursion depth. Nevertheless, some regular expressions can cause
    252 it to recurse to a great depth. I was writing for Unix, so I just let it call
    253 itself recursively. This uses the stack for saving everything that has to be
    254 saved for a recursive call. On Unix, the stack can be large, and this works
    255 fine.
    256 
    257 It turns out that on some non-Unix-like systems there are problems with
    258 programs that use a lot of stack. (This despite the fact that every last chip
    259 has oodles of memory these days, and techniques for extending the stack have
    260 been known for decades.) So....
    261 
    262 There is a fudge, triggered by defining HEAP_MATCH_RECURSE, which avoids
    263 recursive calls by keeping local variables that need to be preserved in blocks
    264 of memory on the heap instead instead of on the stack. Macros are used to
    265 achieve this so that the actual code doesn't look very different to what it
    266 always used to.
    267 
    268 The original heap-recursive code used longjmp(). However, it seems that this
    269 can be very slow on some operating systems. Following a suggestion from Stan
    270 Switzer, the use of longjmp() has been abolished, at the cost of having to
    271 provide a unique number for each call to RMATCH. There is no way of generating
    272 a sequence of numbers at compile time in C. I have given them names, to make
    273 them stand out more clearly.
    274 
    275 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
    276 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
    277 tests. Furthermore, not using longjmp() means that local dynamic variables
    278 don't have indeterminate values; this has meant that the frame size can be
    279 reduced because the result can be "passed back" by straight setting of the
    280 variable instead of being passed in the frame.
    281 ****************************************************************************
    282 ***************************************************************************/
    283 
    284 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
    285 below must be updated in sync.  */
    286 
    287 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
    288        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
    289        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
    290        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
    291        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
    292        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
    293        RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
    294 
    295 /* These versions of the macros use the stack, as normal. Note that the "rw"
    296 argument of RMATCH isn't actually used in this definition. */
    297 
    298 #ifndef HEAP_MATCH_RECURSE
    299 #define REGISTER register
    300 #define RMATCH(ra,rb,rc,rd,re,rw) \
    301   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
    302 #define RRETURN(ra) return ra
    303 #else
    304 
    305 /* These versions of the macros manage a private stack on the heap. Note that
    306 the "rd" argument of RMATCH isn't actually used in this definition. It's the mb
    307 argument of match(), which never changes. */
    308 
    309 #define REGISTER
    310 
    311 #define RMATCH(ra,rb,rc,rd,re,rw)\
    312   {\
    313   heapframe *newframe = frame->Xnextframe;\
    314   if (newframe == NULL)\
    315     {\
    316     newframe = (heapframe *)(mb->stack_memctl.malloc)\
    317       (sizeof(heapframe), mb->stack_memctl.memory_data);\
    318     if (newframe == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);\
    319     newframe->Xnextframe = NULL;\
    320     frame->Xnextframe = newframe;\
    321     }\
    322   frame->Xwhere = rw;\
    323   newframe->Xeptr = ra;\
    324   newframe->Xecode = rb;\
    325   newframe->Xmstart = mstart;\
    326   newframe->Xoffset_top = rc;\
    327   newframe->Xeptrb = re;\
    328   newframe->Xrdepth = frame->Xrdepth + 1;\
    329   newframe->Xprevframe = frame;\
    330   frame = newframe;\
    331   goto HEAP_RECURSE;\
    332   L_##rw:;\
    333   }
    334 
    335 #define RRETURN(ra)\
    336   {\
    337   heapframe *oldframe = frame;\
    338   frame = oldframe->Xprevframe;\
    339   if (frame != NULL)\
    340     {\
    341     rrc = ra;\
    342     goto HEAP_RETURN;\
    343     }\
    344   return ra;\
    345   }
    346 
    347 
    348 /* Structure for remembering the local variables in a private frame. Arrange it
    349 so as to minimize the number of holes. */
    350 
    351 typedef struct heapframe {
    352   struct heapframe *Xprevframe;
    353   struct heapframe *Xnextframe;
    354 
    355 #ifdef SUPPORT_UNICODE
    356   PCRE2_SPTR Xcharptr;
    357 #endif
    358   PCRE2_SPTR Xeptr;
    359   PCRE2_SPTR Xecode;
    360   PCRE2_SPTR Xmstart;
    361   PCRE2_SPTR Xcallpat;
    362   PCRE2_SPTR Xdata;
    363   PCRE2_SPTR Xnext_ecode;
    364   PCRE2_SPTR Xpp;
    365   PCRE2_SPTR Xprev;
    366   PCRE2_SPTR Xsaved_eptr;
    367 
    368   eptrblock *Xeptrb;
    369 
    370   PCRE2_SIZE Xlength;
    371   PCRE2_SIZE Xoffset;
    372   PCRE2_SIZE Xoffset_top;
    373   PCRE2_SIZE Xsave_offset1, Xsave_offset2, Xsave_offset3;
    374 
    375   uint32_t Xfc;
    376   uint32_t Xnumber;
    377   uint32_t Xrdepth;
    378   uint32_t Xop;
    379   uint32_t Xsave_capture_last;
    380 
    381 #ifdef SUPPORT_UNICODE
    382   uint32_t Xprop_value;
    383   int Xprop_type;
    384   int Xprop_fail_result;
    385   int Xoclength;
    386 #endif
    387 
    388   int Xcodelink;
    389   int Xctype;
    390   int Xfi;
    391   int Xmax;
    392   int Xmin;
    393   int Xwhere;    /* Where to jump back to */
    394 
    395   BOOL Xcondition;
    396   BOOL Xcur_is_word;
    397   BOOL Xprev_is_word;
    398 
    399   eptrblock Xnewptrb;
    400   recursion_info Xnew_recursive;
    401 
    402 #ifdef SUPPORT_UNICODE
    403   PCRE2_UCHAR Xocchars[6];
    404 #endif
    405 } heapframe;
    406 
    407 #endif
    408 
    409 
    410 /***************************************************************************
    411 ***************************************************************************/
    412 
    413 
    414 /* When HEAP_MATCH_RECURSE is not defined, the match() function implements
    415 backtrack points by calling itself recursively in all but one case. The one
    416 special case is when processing OP_RECURSE, which specifies recursion in the
    417 pattern. The entire ovector must be saved and restored while processing
    418 OP_RECURSE. If the ovector is small enough, instead of calling match()
    419 directly, op_recurse_ovecsave() is called. This function uses the system stack
    420 to save the ovector while calling match() to process the pattern recursion. */
    421 
    422 #ifndef HEAP_MATCH_RECURSE
    423 
    424 /* We need a prototype for match() because it is mutually recursive with
    425 op_recurse_ovecsave(). */
    426 
    427 static int
    428 match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
    429   PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
    430 
    431 
    432 /*************************************************
    433 *      Process OP_RECURSE, stacking ovector      *
    434 *************************************************/
    435 
    436 /* When this function is called, mb->recursive has already been updated to
    437 point to a new recursion data block, and all its fields other than ovec_save
    438 have been set.
    439 
    440 This function exists so that the local vector variable ovecsave is no longer
    441 defined in the match() function, as it was in PCRE1. It is used only when there
    442 is recursion in the pattern, so it wastes a lot of stack to have it defined for
    443 every call of match(). We now use this function as an indirect way of calling
    444 match() only in the case when ovecsave is needed. (David Wheeler used to say
    445 "All problems in computer science can be solved by another level of
    446 indirection.")
    447 
    448 HOWEVER: when this file is compiled by gcc in an optimizing mode, because this
    449 function is called only once, and only from within match(), gcc will "inline"
    450 it - that is, move it inside match() - and this completely negates its reason
    451 for existence. Therefore, we mark it as non-inline when gcc is in use.
    452 
    453 Arguments:
    454   eptr        pointer to current character in subject
    455   callpat     the recursion point in the pattern
    456   mstart      pointer to the current match start position (can be modified
    457                 by encountering \K)
    458   offset_top  current top pointer (highest ovector offset used + 1)
    459   mb          pointer to "static" info block for the match
    460   eptrb       pointer to chain of blocks containing eptr at start of
    461                 brackets - for testing for empty matches
    462   rdepth      the recursion depth
    463 
    464 Returns:      a match() return code
    465 */
    466 
    467 static int
    468 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
    469 __attribute__ ((noinline))
    470 #endif
    471 op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
    472   PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb,
    473   uint32_t rdepth)
    474 {
    475 register int rrc;
    476 BOOL cbegroup = *callpat >= OP_SBRA;
    477 recursion_info *new_recursive = mb->recursive;
    478 PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
    479 
    480 /* Save the ovector */
    481 
    482 new_recursive->ovec_save = ovecsave;
    483 memcpy(ovecsave, mb->ovector, mb->offset_end * sizeof(PCRE2_SIZE));
    484 
    485 /* Do the recursion. After processing each alternative, restore the ovector
    486 data and the last captured value. */
    487 
    488 do
    489   {
    490   if (cbegroup) mb->match_function_type |= MATCH_CBEGROUP;
    491   rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
    492     mb, eptrb, rdepth + 1);
    493   memcpy(mb->ovector, new_recursive->ovec_save,
    494       mb->offset_end * sizeof(PCRE2_SIZE));
    495   mb->capture_last = new_recursive->saved_capture_last;
    496   if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
    497 
    498   /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
    499   recursion; they cause a NOMATCH for the entire recursion. These codes
    500   are defined in a range that can be tested for. */
    501 
    502   if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
    503     return MATCH_NOMATCH;
    504 
    505   /* Any return code other than NOMATCH is an error. Otherwise, advance to the
    506   next alternative or to the end of the recursing subpattern. If there were
    507   nested recursions, mb->recursive might be changed, so reset it before
    508   looping. */
    509 
    510   if (rrc != MATCH_NOMATCH) return rrc;
    511   mb->recursive = new_recursive;
    512   callpat += GET(callpat, 1);
    513   }
    514 while (*callpat == OP_ALT);  /* Loop for the alternatives */
    515 
    516 /* None of the alternatives matched. */
    517 
    518 return MATCH_NOMATCH;
    519 }
    520 #endif  /* HEAP_MATCH_RECURSE */
    521 
    522 
    523 
    524 /*************************************************
    525 *         Match from current position            *
    526 *************************************************/
    527 
    528 /* This function is called recursively in many circumstances. Whenever it
    529 returns a negative (error) response, the outer incarnation must also return the
    530 same response. */
    531 
    532 /* These macros pack up tests that are used for partial matching, and which
    533 appear several times in the code. We set the "hit end" flag if the pointer is
    534 at the end of the subject and also past the earliest inspected character (i.e.
    535 something has been matched, even if not part of the actual matched string). For
    536 hard partial matching, we then return immediately. The second one is used when
    537 we already know we are past the end of the subject. */
    538 
    539 #define CHECK_PARTIAL()\
    540   if (mb->partial != 0 && eptr >= mb->end_subject && \
    541       eptr > mb->start_used_ptr) \
    542     { \
    543     mb->hitend = TRUE; \
    544     if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL); \
    545     }
    546 
    547 #define SCHECK_PARTIAL()\
    548   if (mb->partial != 0 && eptr > mb->start_used_ptr) \
    549     { \
    550     mb->hitend = TRUE; \
    551     if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL); \
    552     }
    553 
    554 
    555 /* Performance note: It might be tempting to extract commonly used fields from
    556 the mb structure (e.g. utf, end_subject) into individual variables to improve
    557 performance. Tests using gcc on a SPARC disproved this; in the first case, it
    558 made performance worse.
    559 
    560 Arguments:
    561    eptr        pointer to current character in subject
    562    ecode       pointer to current position in compiled code
    563    mstart      pointer to the current match start position (can be modified
    564                  by encountering \K)
    565    offset_top  current top pointer (highest ovector offset used + 1)
    566    mb          pointer to "static" info block for the match
    567    eptrb       pointer to chain of blocks containing eptr at start of
    568                  brackets - for testing for empty matches
    569    rdepth      the recursion depth
    570 
    571 Returns:       MATCH_MATCH if matched            )  these values are >= 0
    572                MATCH_NOMATCH if failed to match  )
    573                a negative MATCH_xxx value for PRUNE, SKIP, etc
    574                a negative PCRE2_ERROR_xxx value if aborted by an error condition
    575                  (e.g. stopped by repeated call or recursion limit)
    576 */
    577 
    578 static int
    579 match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
    580   PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
    581 {
    582 /* These variables do not need to be preserved over recursion in this function,
    583 so they can be ordinary variables in all cases. Mark some of them with
    584 "register" because they are used a lot in loops. */
    585 
    586 register int  rrc;         /* Returns from recursive calls */
    587 register int  i;           /* Used for loops not involving calls to RMATCH() */
    588 register uint32_t c;       /* Character values not kept over RMATCH() calls */
    589 register BOOL utf;         /* Local copy of UTF flag for speed */
    590 
    591 BOOL minimize, possessive; /* Quantifier options */
    592 BOOL caseless;
    593 int condcode;
    594 
    595 /* When recursion is not being used, all "local" variables that have to be
    596 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
    597 frame on the stack here; subsequent instantiations are obtained from the heap
    598 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
    599 the top-level on the stack rather than malloc-ing them all gives a performance
    600 boost in many cases where there is not much "recursion". */
    601 
    602 #ifdef HEAP_MATCH_RECURSE
    603 heapframe *frame = (heapframe *)mb->match_frames_base;
    604 
    605 /* Copy in the original argument variables */
    606 
    607 frame->Xeptr = eptr;
    608 frame->Xecode = ecode;
    609 frame->Xmstart = mstart;
    610 frame->Xoffset_top = offset_top;
    611 frame->Xeptrb = eptrb;
    612 frame->Xrdepth = rdepth;
    613 
    614 /* This is where control jumps back to to effect "recursion" */
    615 
    616 HEAP_RECURSE:
    617 
    618 /* Macros make the argument variables come from the current frame */
    619 
    620 #define eptr               frame->Xeptr
    621 #define ecode              frame->Xecode
    622 #define mstart             frame->Xmstart
    623 #define offset_top         frame->Xoffset_top
    624 #define eptrb              frame->Xeptrb
    625 #define rdepth             frame->Xrdepth
    626 
    627 /* Ditto for the local variables */
    628 
    629 #ifdef SUPPORT_UNICODE
    630 #define charptr            frame->Xcharptr
    631 #define prop_value         frame->Xprop_value
    632 #define prop_type          frame->Xprop_type
    633 #define prop_fail_result   frame->Xprop_fail_result
    634 #define oclength           frame->Xoclength
    635 #define occhars            frame->Xocchars
    636 #endif
    637 
    638 
    639 #define callpat            frame->Xcallpat
    640 #define codelink           frame->Xcodelink
    641 #define data               frame->Xdata
    642 #define next_ecode         frame->Xnext_ecode
    643 #define pp                 frame->Xpp
    644 #define prev               frame->Xprev
    645 #define saved_eptr         frame->Xsaved_eptr
    646 
    647 #define new_recursive      frame->Xnew_recursive
    648 
    649 #define ctype              frame->Xctype
    650 #define fc                 frame->Xfc
    651 #define fi                 frame->Xfi
    652 #define length             frame->Xlength
    653 #define max                frame->Xmax
    654 #define min                frame->Xmin
    655 #define number             frame->Xnumber
    656 #define offset             frame->Xoffset
    657 #define op                 frame->Xop
    658 #define save_capture_last  frame->Xsave_capture_last
    659 #define save_offset1       frame->Xsave_offset1
    660 #define save_offset2       frame->Xsave_offset2
    661 #define save_offset3       frame->Xsave_offset3
    662 
    663 #define condition          frame->Xcondition
    664 #define cur_is_word        frame->Xcur_is_word
    665 #define prev_is_word       frame->Xprev_is_word
    666 
    667 #define newptrb            frame->Xnewptrb
    668 
    669 /* When normal stack-based recursion is being used for match(), local variables
    670 are allocated on the stack and get preserved during recursion in the usual way.
    671 In this environment, fi and i, and fc and c, can be the same variables. */
    672 
    673 #else         /* HEAP_MATCH_RECURSE not defined */
    674 #define fi i
    675 #define fc c
    676 
    677 /* Many of the following variables are used only in small blocks of the code.
    678 My normal style of coding would have declared them within each of those blocks.
    679 However, in order to accommodate the version of this code that uses an external
    680 "stack" implemented on the heap, it is easier to declare them all here, so the
    681 declarations can be cut out in a block. The only declarations within blocks
    682 below are for variables that do not have to be preserved over a recursive call
    683 to RMATCH(). */
    684 
    685 #ifdef SUPPORT_UNICODE
    686 PCRE2_SPTR charptr;
    687 #endif
    688 PCRE2_SPTR callpat;
    689 PCRE2_SPTR data;
    690 PCRE2_SPTR next_ecode;
    691 PCRE2_SPTR pp;
    692 PCRE2_SPTR prev;
    693 PCRE2_SPTR saved_eptr;
    694 
    695 PCRE2_SIZE length;
    696 PCRE2_SIZE offset;
    697 PCRE2_SIZE save_offset1, save_offset2, save_offset3;
    698 
    699 uint32_t number;
    700 uint32_t op;
    701 uint32_t save_capture_last;
    702 
    703 #ifdef SUPPORT_UNICODE
    704 uint32_t prop_value;
    705 int prop_type;
    706 int prop_fail_result;
    707 int oclength;
    708 PCRE2_UCHAR occhars[6];
    709 #endif
    710 
    711 int codelink;
    712 int ctype;
    713 int max;
    714 int min;
    715 
    716 BOOL condition;
    717 BOOL cur_is_word;
    718 BOOL prev_is_word;
    719 
    720 eptrblock newptrb;
    721 recursion_info new_recursive;
    722 #endif  /* HEAP_MATCH_RECURSE not defined */
    723 
    724 /* To save space on the stack and in the heap frame, I have doubled up on some
    725 of the local variables that are used only in localised parts of the code, but
    726 still need to be preserved over recursive calls of match(). These macros define
    727 the alternative names that are used. */
    728 
    729 #define allow_zero      cur_is_word
    730 #define cbegroup        condition
    731 #define code_offset     codelink
    732 #define condassert      condition
    733 #define foc             number
    734 #define matched_once    prev_is_word
    735 #define save_mark       data
    736 
    737 /* These statements are here to stop the compiler complaining about unitialized
    738 variables. */
    739 
    740 #ifdef SUPPORT_UNICODE
    741 prop_value = 0;
    742 prop_fail_result = 0;
    743 #endif
    744 
    745 
    746 /* This label is used for tail recursion, which is used in a few cases even
    747 when HEAP_MATCH_RECURSE is not defined, in order to reduce the amount of stack
    748 that is used. Thanks to Ian Taylor for noticing this possibility and sending
    749 the original patch. */
    750 
    751 TAIL_RECURSE:
    752 
    753 /* OK, now we can get on with the real code of the function. Recursive calls
    754 are specified by the macro RMATCH and RRETURN is used to return. When
    755 HEAP_MATCH_RECURSE is *not* defined, these just turn into a recursive call to
    756 match() and a "return", respectively. However, RMATCH isn't like a function
    757 call because it's quite a complicated macro. It has to be used in one
    758 particular way. This shouldn't, however, impact performance when true recursion
    759 is being used. */
    760 
    761 #ifdef SUPPORT_UNICODE
    762 utf = (mb->poptions & PCRE2_UTF) != 0;
    763 #else
    764 utf = FALSE;
    765 #endif
    766 
    767 /* First check that we haven't called match() too many times, or that we
    768 haven't exceeded the recursive call limit. */
    769 
    770 if (mb->match_call_count++ >= mb->match_limit) RRETURN(PCRE2_ERROR_MATCHLIMIT);
    771 if (rdepth >= mb->match_limit_recursion) RRETURN(PCRE2_ERROR_RECURSIONLIMIT);
    772 
    773 /* At the start of a group with an unlimited repeat that may match an empty
    774 string, the variable mb->match_function_type contains the MATCH_CBEGROUP bit.
    775 It is done this way to save having to use another function argument, which
    776 would take up space on the stack. See also MATCH_CONDASSERT below.
    777 
    778 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
    779 such remembered pointers, to be checked when we hit the closing ket, in order
    780 to break infinite loops that match no characters. When match() is called in
    781 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
    782 NOT be used with tail recursion, because the memory block that is used is on
    783 the stack, so a new one may be required for each match(). */
    784 
    785 if ((mb->match_function_type & MATCH_CBEGROUP) != 0)
    786   {
    787   newptrb.epb_saved_eptr = eptr;
    788   newptrb.epb_prev = eptrb;
    789   eptrb = &newptrb;
    790   mb->match_function_type &= ~MATCH_CBEGROUP;
    791   }
    792 
    793 /* Now, at last, we can start processing the opcodes. */
    794 
    795 for (;;)
    796   {
    797   minimize = possessive = FALSE;
    798   op = *ecode;
    799 
    800   switch(op)
    801     {
    802     case OP_MARK:
    803     mb->nomatch_mark = ecode + 2;
    804     mb->mark = NULL;    /* In case previously set by assertion */
    805     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
    806       eptrb, RM55);
    807     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    808          mb->mark == NULL) mb->mark = ecode + 2;
    809 
    810     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
    811     argument, and we must check whether that argument matches this MARK's
    812     argument. It is passed back in mb->start_match_ptr (an overloading of that
    813     variable). If it does match, we reset that variable to the current subject
    814     position and return MATCH_SKIP. Otherwise, pass back the return code
    815     unaltered. */
    816 
    817     else if (rrc == MATCH_SKIP_ARG &&
    818         PRIV(strcmp)(ecode + 2, mb->start_match_ptr) == 0)
    819       {
    820       mb->start_match_ptr = eptr;
    821       RRETURN(MATCH_SKIP);
    822       }
    823     RRETURN(rrc);
    824 
    825     case OP_FAIL:
    826     RRETURN(MATCH_NOMATCH);
    827 
    828     case OP_COMMIT:
    829     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
    830       eptrb, RM52);
    831     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    832     RRETURN(MATCH_COMMIT);
    833 
    834     case OP_PRUNE:
    835     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
    836       eptrb, RM51);
    837     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    838     RRETURN(MATCH_PRUNE);
    839 
    840     case OP_PRUNE_ARG:
    841     mb->nomatch_mark = ecode + 2;
    842     mb->mark = NULL;    /* In case previously set by assertion */
    843     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
    844       eptrb, RM56);
    845     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    846          mb->mark == NULL) mb->mark = ecode + 2;
    847     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    848     RRETURN(MATCH_PRUNE);
    849 
    850     case OP_SKIP:
    851     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
    852       eptrb, RM53);
    853     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    854     mb->start_match_ptr = eptr;   /* Pass back current position */
    855     RRETURN(MATCH_SKIP);
    856 
    857     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
    858     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
    859     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
    860     that failed and any that precede it (either they also failed, or were not
    861     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
    862     SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
    863     set to the count of the one that failed. */
    864 
    865     case OP_SKIP_ARG:
    866     mb->skip_arg_count++;
    867     if (mb->skip_arg_count <= mb->ignore_skip_arg)
    868       {
    869       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
    870       break;
    871       }
    872     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
    873       eptrb, RM57);
    874     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    875 
    876     /* Pass back the current skip name by overloading mb->start_match_ptr and
    877     returning the special MATCH_SKIP_ARG return code. This will either be
    878     caught by a matching MARK, or get to the top, where it causes a rematch
    879     with mb->ignore_skip_arg set to the value of mb->skip_arg_count. */
    880 
    881     mb->start_match_ptr = ecode + 2;
    882     RRETURN(MATCH_SKIP_ARG);
    883 
    884     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
    885     the branch in which it occurs can be determined. Overload the start of
    886     match pointer to do this. */
    887 
    888     case OP_THEN:
    889     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
    890       eptrb, RM54);
    891     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    892     mb->start_match_ptr = ecode;
    893     RRETURN(MATCH_THEN);
    894 
    895     case OP_THEN_ARG:
    896     mb->nomatch_mark = ecode + 2;
    897     mb->mark = NULL;    /* In case previously set by assertion */
    898     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
    899       mb, eptrb, RM58);
    900     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    901          mb->mark == NULL) mb->mark = ecode + 2;
    902     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    903     mb->start_match_ptr = ecode;
    904     RRETURN(MATCH_THEN);
    905 
    906     /* Handle an atomic group that does not contain any capturing parentheses.
    907     This can be handled like an assertion. Prior to 8.13, all atomic groups
    908     were handled this way. In 8.13, the code was changed as below for ONCE, so
    909     that backups pass through the group and thereby reset captured values.
    910     However, this uses a lot more stack, so in 8.20, atomic groups that do not
    911     contain any captures generate OP_ONCE_NC, which can be handled in the old,
    912     less stack intensive way.
    913 
    914     Check the alternative branches in turn - the matching won't pass the KET
    915     for this kind of subpattern. If any one branch matches, we carry on as at
    916     the end of a normal bracket, leaving the subject pointer, but resetting
    917     the start-of-match value in case it was changed by \K. */
    918 
    919     case OP_ONCE_NC:
    920     prev = ecode;
    921     saved_eptr = eptr;
    922     save_mark = mb->mark;
    923     do
    924       {
    925       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM64);
    926       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
    927         {
    928         mstart = mb->start_match_ptr;
    929         break;
    930         }
    931       if (rrc == MATCH_THEN)
    932         {
    933         next_ecode = ecode + GET(ecode,1);
    934         if (mb->start_match_ptr < next_ecode &&
    935             (*ecode == OP_ALT || *next_ecode == OP_ALT))
    936           rrc = MATCH_NOMATCH;
    937         }
    938 
    939       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    940       ecode += GET(ecode,1);
    941       mb->mark = save_mark;
    942       }
    943     while (*ecode == OP_ALT);
    944 
    945     /* If hit the end of the group (which could be repeated), fail */
    946 
    947     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
    948 
    949     /* Continue as from after the group, updating the offsets high water
    950     mark, since extracts may have been taken. */
    951 
    952     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
    953 
    954     offset_top = mb->end_offset_top;
    955     eptr = mb->end_match_ptr;
    956 
    957     /* For a non-repeating ket, just continue at this level. This also
    958     happens for a repeating ket if no characters were matched in the group.
    959     This is the forcible breaking of infinite loops as implemented in Perl
    960     5.005. */
    961 
    962     if (*ecode == OP_KET || eptr == saved_eptr)
    963       {
    964       ecode += 1+LINK_SIZE;
    965       break;
    966       }
    967 
    968     /* The repeating kets try the rest of the pattern or restart from the
    969     preceding bracket, in the appropriate order. The second "call" of match()
    970     uses tail recursion, to avoid using another stack frame. */
    971 
    972     if (*ecode == OP_KETRMIN)
    973       {
    974       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM65);
    975       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    976       ecode = prev;
    977       goto TAIL_RECURSE;
    978       }
    979     else  /* OP_KETRMAX */
    980       {
    981       RMATCH(eptr, prev, offset_top, mb, eptrb, RM66);
    982       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    983       ecode += 1 + LINK_SIZE;
    984       goto TAIL_RECURSE;
    985       }
    986     /* Control never gets here */
    987 
    988     /* Handle a capturing bracket, other than those that are possessive with an
    989     unlimited repeat. If there is space in the offset vector, save the current
    990     subject position in the working slot at the top of the vector. We mustn't
    991     change the current values of the data slot, because they may be set from a
    992     previous iteration of this group, and be referred to by a reference inside
    993     the group. A failure to match might occur after the group has succeeded,
    994     if something later on doesn't match. For this reason, we need to restore
    995     the working value and also the values of the final offsets, in case they
    996     were set by a previous iteration of the same bracket.
    997 
    998     If there isn't enough space in the offset vector, treat this as if it were
    999     a non-capturing bracket. Don't worry about setting the flag for the error
   1000     case here; that is handled in the code for KET. */
   1001 
   1002     case OP_CBRA:
   1003     case OP_SCBRA:
   1004     number = GET2(ecode, 1+LINK_SIZE);
   1005     offset = number << 1;
   1006 
   1007     if (offset < mb->offset_max)
   1008       {
   1009       save_offset1 = mb->ovector[offset];
   1010       save_offset2 = mb->ovector[offset+1];
   1011       save_offset3 = mb->ovector[mb->offset_end - number];
   1012       save_capture_last = mb->capture_last;
   1013       save_mark = mb->mark;
   1014 
   1015       mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
   1016 
   1017       for (;;)
   1018         {
   1019         if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
   1020         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
   1021           eptrb, RM1);
   1022         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
   1023 
   1024         /* If we backed up to a THEN, check whether it is within the current
   1025         branch by comparing the address of the THEN that is passed back with
   1026         the end of the branch. If it is within the current branch, and the
   1027         branch is one of two or more alternatives (it either starts or ends
   1028         with OP_ALT), we have reached the limit of THEN's action, so convert
   1029         the return code to NOMATCH, which will cause normal backtracking to
   1030         happen from now on. Otherwise, THEN is passed back to an outer
   1031         alternative. This implements Perl's treatment of parenthesized groups,
   1032         where a group not containing | does not affect the current alternative,
   1033         that is, (X) is NOT the same as (X|(*F)). */
   1034 
   1035         if (rrc == MATCH_THEN)
   1036           {
   1037           next_ecode = ecode + GET(ecode,1);
   1038           if (mb->start_match_ptr < next_ecode &&
   1039               (*ecode == OP_ALT || *next_ecode == OP_ALT))
   1040             rrc = MATCH_NOMATCH;
   1041           }
   1042 
   1043         /* Anything other than NOMATCH is passed back. */
   1044 
   1045         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1046         mb->capture_last = save_capture_last;
   1047         ecode += GET(ecode, 1);
   1048         mb->mark = save_mark;
   1049         if (*ecode != OP_ALT) break;
   1050         }
   1051 
   1052       mb->ovector[offset] = save_offset1;
   1053       mb->ovector[offset+1] = save_offset2;
   1054       mb->ovector[mb->offset_end - number] = save_offset3;
   1055 
   1056       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
   1057 
   1058       RRETURN(rrc);
   1059       }
   1060 
   1061     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
   1062     as a non-capturing bracket. */
   1063 
   1064     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1065     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1066 
   1067     /* Non-capturing or atomic group, except for possessive with unlimited
   1068     repeat and ONCE group with no captures. Loop for all the alternatives.
   1069 
   1070     When we get to the final alternative within the brackets, we used to return
   1071     the result of a recursive call to match() whatever happened so it was
   1072     possible to reduce stack usage by turning this into a tail recursion,
   1073     except in the case of a possibly empty group. However, now that there is
   1074     the possiblity of (*THEN) occurring in the final alternative, this
   1075     optimization is no longer always possible.
   1076 
   1077     We can optimize if we know there are no (*THEN)s in the pattern; at present
   1078     this is the best that can be done.
   1079 
   1080     MATCH_ONCE is returned when the end of an atomic group is successfully
   1081     reached, but subsequent matching fails. It passes back up the tree (causing
   1082     captured values to be reset) until the original atomic group level is
   1083     reached. This is tested by comparing mb->once_target with the start of the
   1084     group. At this point, the return is converted into MATCH_NOMATCH so that
   1085     previous backup points can be taken. */
   1086 
   1087     case OP_ONCE:
   1088     case OP_BRA:
   1089     case OP_SBRA:
   1090 
   1091     for (;;)
   1092       {
   1093       if (op >= OP_SBRA || op == OP_ONCE)
   1094         mb->match_function_type |= MATCH_CBEGROUP;
   1095 
   1096       /* If this is not a possibly empty group, and there are no (*THEN)s in
   1097       the pattern, and this is the final alternative, optimize as described
   1098       above. */
   1099 
   1100       else if (!mb->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
   1101         {
   1102         ecode += PRIV(OP_lengths)[*ecode];
   1103         goto TAIL_RECURSE;
   1104         }
   1105 
   1106       /* In all other cases, we have to make another call to match(). */
   1107 
   1108       save_mark = mb->mark;
   1109       save_capture_last = mb->capture_last;
   1110       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb, eptrb,
   1111         RM2);
   1112 
   1113       /* See comment in the code for capturing groups above about handling
   1114       THEN. */
   1115 
   1116       if (rrc == MATCH_THEN)
   1117         {
   1118         next_ecode = ecode + GET(ecode,1);
   1119         if (mb->start_match_ptr < next_ecode &&
   1120             (*ecode == OP_ALT || *next_ecode == OP_ALT))
   1121           rrc = MATCH_NOMATCH;
   1122         }
   1123 
   1124       if (rrc != MATCH_NOMATCH)
   1125         {
   1126         if (rrc == MATCH_ONCE)
   1127           {
   1128           PCRE2_SPTR scode = ecode;
   1129           if (*scode != OP_ONCE)           /* If not at start, find it */
   1130             {
   1131             while (*scode == OP_ALT) scode += GET(scode, 1);
   1132             scode -= GET(scode, 1);
   1133             }
   1134           if (mb->once_target == scode) rrc = MATCH_NOMATCH;
   1135           }
   1136         RRETURN(rrc);
   1137         }
   1138       ecode += GET(ecode, 1);
   1139       mb->mark = save_mark;
   1140       if (*ecode != OP_ALT) break;
   1141       mb->capture_last = save_capture_last;
   1142       }
   1143 
   1144     RRETURN(MATCH_NOMATCH);
   1145 
   1146     /* Handle possessive capturing brackets with an unlimited repeat. We come
   1147     here from BRAZERO with allow_zero set TRUE. The ovector values are
   1148     handled similarly to the normal case above. However, the matching is
   1149     different. The end of these brackets will always be OP_KETRPOS, which
   1150     returns MATCH_KETRPOS without going further in the pattern. By this means
   1151     we can handle the group by iteration rather than recursion, thereby
   1152     reducing the amount of stack needed. If the ovector is too small for
   1153     capturing, treat as non-capturing. */
   1154 
   1155     case OP_CBRAPOS:
   1156     case OP_SCBRAPOS:
   1157     allow_zero = FALSE;
   1158 
   1159     POSSESSIVE_CAPTURE:
   1160     number = GET2(ecode, 1+LINK_SIZE);
   1161     offset = number << 1;
   1162     if (offset >= mb->offset_max) goto POSSESSIVE_NON_CAPTURE;
   1163 
   1164     matched_once = FALSE;
   1165     code_offset = (int)(ecode - mb->start_code);
   1166 
   1167     save_offset1 = mb->ovector[offset];
   1168     save_offset2 = mb->ovector[offset+1];
   1169     save_offset3 = mb->ovector[mb->offset_end - number];
   1170     save_capture_last = mb->capture_last;
   1171 
   1172     /* Each time round the loop, save the current subject position for use
   1173     when the group matches. For MATCH_MATCH, the group has matched, so we
   1174     restart it with a new subject starting position, remembering that we had
   1175     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
   1176     usual. If we haven't matched any alternatives in any iteration, check to
   1177     see if a previous iteration matched. If so, the group has matched;
   1178     continue from afterwards. Otherwise it has failed; restore the previous
   1179     capture values before returning NOMATCH. */
   1180 
   1181     for (;;)
   1182       {
   1183       mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
   1184       if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
   1185       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
   1186         eptrb, RM63);
   1187       if (rrc == MATCH_KETRPOS)
   1188         {
   1189         offset_top = mb->end_offset_top;
   1190         ecode = mb->start_code + code_offset;
   1191         save_capture_last = mb->capture_last;
   1192         matched_once = TRUE;
   1193         mstart = mb->start_match_ptr;    /* In case \K changed it */
   1194         if (eptr == mb->end_match_ptr)   /* Matched an empty string */
   1195           {
   1196           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1197           break;
   1198           }
   1199         eptr = mb->end_match_ptr;
   1200         continue;
   1201         }
   1202 
   1203       /* See comment in the code for capturing groups above about handling
   1204       THEN. */
   1205 
   1206       if (rrc == MATCH_THEN)
   1207         {
   1208         next_ecode = ecode + GET(ecode,1);
   1209         if (mb->start_match_ptr < next_ecode &&
   1210             (*ecode == OP_ALT || *next_ecode == OP_ALT))
   1211           rrc = MATCH_NOMATCH;
   1212         }
   1213 
   1214       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1215       mb->capture_last = save_capture_last;
   1216       ecode += GET(ecode, 1);
   1217       if (*ecode != OP_ALT) break;
   1218       }
   1219 
   1220     if (!matched_once)
   1221       {
   1222       mb->ovector[offset] = save_offset1;
   1223       mb->ovector[offset+1] = save_offset2;
   1224       mb->ovector[mb->offset_end - number] = save_offset3;
   1225       }
   1226 
   1227     if (allow_zero || matched_once)
   1228       {
   1229       ecode += 1 + LINK_SIZE;
   1230       break;
   1231       }
   1232     RRETURN(MATCH_NOMATCH);
   1233 
   1234     /* Non-capturing possessive bracket with unlimited repeat. We come here
   1235     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
   1236     without the capturing complication. It is written out separately for speed
   1237     and cleanliness. */
   1238 
   1239     case OP_BRAPOS:
   1240     case OP_SBRAPOS:
   1241     allow_zero = FALSE;
   1242 
   1243     POSSESSIVE_NON_CAPTURE:
   1244     matched_once = FALSE;
   1245     code_offset = (int)(ecode - mb->start_code);
   1246     save_capture_last = mb->capture_last;
   1247 
   1248     for (;;)
   1249       {
   1250       if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
   1251       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
   1252         eptrb, RM48);
   1253       if (rrc == MATCH_KETRPOS)
   1254         {
   1255         offset_top = mb->end_offset_top;
   1256         ecode = mb->start_code + code_offset;
   1257         matched_once = TRUE;
   1258         mstart = mb->start_match_ptr;   /* In case \K reset it */
   1259         if (eptr == mb->end_match_ptr)  /* Matched an empty string */
   1260           {
   1261           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1262           break;
   1263           }
   1264         eptr = mb->end_match_ptr;
   1265         continue;
   1266         }
   1267 
   1268       /* See comment in the code for capturing groups above about handling
   1269       THEN. */
   1270 
   1271       if (rrc == MATCH_THEN)
   1272         {
   1273         next_ecode = ecode + GET(ecode,1);
   1274         if (mb->start_match_ptr < next_ecode &&
   1275             (*ecode == OP_ALT || *next_ecode == OP_ALT))
   1276           rrc = MATCH_NOMATCH;
   1277         }
   1278 
   1279       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1280       ecode += GET(ecode, 1);
   1281       if (*ecode != OP_ALT) break;
   1282       mb->capture_last = save_capture_last;
   1283       }
   1284 
   1285     if (matched_once || allow_zero)
   1286       {
   1287       ecode += 1 + LINK_SIZE;
   1288       break;
   1289       }
   1290     RRETURN(MATCH_NOMATCH);
   1291 
   1292     /* Control never reaches here. */
   1293 
   1294     /* Conditional group: compilation checked that there are no more than two
   1295     branches. If the condition is false, skipping the first branch takes us
   1296     past the end of the item if there is only one branch, but that's exactly
   1297     what we want. */
   1298 
   1299     case OP_COND:
   1300     case OP_SCOND:
   1301 
   1302     /* The variable codelink will be added to ecode when the condition is
   1303     false, to get to the second branch. Setting it to the offset to the ALT
   1304     or KET, then incrementing ecode achieves this effect. We now have ecode
   1305     pointing to the condition or callout. */
   1306 
   1307     codelink = GET(ecode, 1);   /* Offset to the second branch */
   1308     ecode += 1 + LINK_SIZE;     /* From this opcode */
   1309 
   1310     /* Because of the way auto-callout works during compile, a callout item is
   1311     inserted between OP_COND and an assertion condition. */
   1312 
   1313     if (*ecode == OP_CALLOUT || *ecode == OP_CALLOUT_STR)
   1314       {
   1315       unsigned int callout_length = (*ecode == OP_CALLOUT)
   1316           ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
   1317 
   1318       if (mb->callout != NULL)
   1319         {
   1320         pcre2_callout_block cb;
   1321         cb.version          = 1;
   1322         cb.capture_top      = offset_top/2;
   1323         cb.capture_last     = mb->capture_last & CAPLMASK;
   1324         cb.offset_vector    = mb->ovector;
   1325         cb.mark             = mb->nomatch_mark;
   1326         cb.subject          = mb->start_subject;
   1327         cb.subject_length   = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
   1328         cb.start_match      = (PCRE2_SIZE)(mstart - mb->start_subject);
   1329         cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
   1330         cb.pattern_position = GET(ecode, 1);
   1331         cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
   1332 
   1333         if (*ecode == OP_CALLOUT)
   1334           {
   1335           cb.callout_number = ecode[1 + 2*LINK_SIZE];
   1336           cb.callout_string_offset = 0;
   1337           cb.callout_string = NULL;
   1338           cb.callout_string_length = 0;
   1339           }
   1340         else
   1341           {
   1342           cb.callout_number = 0;
   1343           cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
   1344           cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
   1345           cb.callout_string_length =
   1346             callout_length - (1 + 4*LINK_SIZE) - 2;
   1347           }
   1348 
   1349         if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
   1350           RRETURN(MATCH_NOMATCH);
   1351         if (rrc < 0) RRETURN(rrc);
   1352         }
   1353 
   1354       /* Advance ecode past the callout, so it now points to the condition. We
   1355       must adjust codelink so that the value of ecode+codelink is unchanged. */
   1356 
   1357       ecode += callout_length;
   1358       codelink -= callout_length;
   1359       }
   1360 
   1361     /* Test the various possible conditions */
   1362 
   1363     condition = FALSE;
   1364     switch(condcode = *ecode)
   1365       {
   1366       case OP_RREF:                  /* Numbered group recursion test */
   1367       if (mb->recursive != NULL)     /* Not recursing => FALSE */
   1368         {
   1369         uint32_t recno = GET2(ecode, 1);   /* Recursion group number*/
   1370         condition = (recno == RREF_ANY || recno == mb->recursive->group_num);
   1371         }
   1372       break;
   1373 
   1374       case OP_DNRREF:       /* Duplicate named group recursion test */
   1375       if (mb->recursive != NULL)
   1376         {
   1377         int count = GET2(ecode, 1 + IMM2_SIZE);
   1378         PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
   1379         while (count-- > 0)
   1380           {
   1381           uint32_t recno = GET2(slot, 0);
   1382           condition = recno == mb->recursive->group_num;
   1383           if (condition) break;
   1384           slot += mb->name_entry_size;
   1385           }
   1386         }
   1387       break;
   1388 
   1389       case OP_CREF:                  /* Numbered group used test */
   1390       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
   1391       condition = offset < offset_top &&
   1392         mb->ovector[offset] != PCRE2_UNSET;
   1393       break;
   1394 
   1395       case OP_DNCREF:      /* Duplicate named group used test */
   1396         {
   1397         int count = GET2(ecode, 1 + IMM2_SIZE);
   1398         PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
   1399         while (count-- > 0)
   1400           {
   1401           offset = GET2(slot, 0) << 1;
   1402           condition = offset < offset_top &&
   1403             mb->ovector[offset] != PCRE2_UNSET;
   1404           if (condition) break;
   1405           slot += mb->name_entry_size;
   1406           }
   1407         }
   1408       break;
   1409 
   1410       case OP_FALSE:
   1411       case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
   1412       break;
   1413 
   1414       case OP_TRUE:
   1415       condition = TRUE;
   1416       break;
   1417 
   1418       /* The condition is an assertion. Call match() to evaluate it - setting
   1419       the MATCH_CONDASSERT bit in mb->match_function_type causes it to stop at
   1420       the end of an assertion. */
   1421 
   1422       default:
   1423       mb->match_function_type |= MATCH_CONDASSERT;
   1424       RMATCH(eptr, ecode, offset_top, mb, NULL, RM3);
   1425       if (rrc == MATCH_MATCH)
   1426         {
   1427         if (mb->end_offset_top > offset_top)
   1428           offset_top = mb->end_offset_top;  /* Captures may have happened */
   1429         condition = TRUE;
   1430 
   1431         /* Advance ecode past the assertion to the start of the first branch,
   1432         but adjust it so that the general choosing code below works. If the
   1433         assertion has a quantifier that allows zero repeats we must skip over
   1434         the BRAZERO. This is a lunatic thing to do, but somebody did! */
   1435 
   1436         if (*ecode == OP_BRAZERO) ecode++;
   1437         ecode += GET(ecode, 1);
   1438         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
   1439         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
   1440         }
   1441 
   1442       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
   1443       assertion; it is therefore treated as NOMATCH. Any other return is an
   1444       error. */
   1445 
   1446       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
   1447         {
   1448         RRETURN(rrc);         /* Need braces because of following else */
   1449         }
   1450       break;
   1451       }
   1452 
   1453     /* Choose branch according to the condition */
   1454 
   1455     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
   1456 
   1457     /* We are now at the branch that is to be obeyed. As there is only one, we
   1458     can use tail recursion to avoid using another stack frame, except when
   1459     there is unlimited repeat of a possibly empty group. In the latter case, a
   1460     recursive call to match() is always required, unless the second alternative
   1461     doesn't exist, in which case we can just plough on. Note that, for
   1462     compatibility with Perl, the | in a conditional group is NOT treated as
   1463     creating two alternatives. If a THEN is encountered in the branch, it
   1464     propagates out to the enclosing alternative (unless nested in a deeper set
   1465     of alternatives, of course). */
   1466 
   1467     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
   1468       {
   1469       if (op != OP_SCOND)
   1470         {
   1471         goto TAIL_RECURSE;
   1472         }
   1473 
   1474       mb->match_function_type |= MATCH_CBEGROUP;
   1475       RMATCH(eptr, ecode, offset_top, mb, eptrb, RM49);
   1476       RRETURN(rrc);
   1477       }
   1478 
   1479      /* Condition false & no alternative; continue after the group. */
   1480 
   1481     else
   1482       {
   1483       }
   1484     break;
   1485 
   1486 
   1487     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
   1488     to close any currently open capturing brackets. */
   1489 
   1490     case OP_CLOSE:
   1491     number = GET2(ecode, 1);   /* Must be less than 65536 */
   1492     offset = number << 1;
   1493     mb->capture_last = (mb->capture_last & OVFLMASK) | number;
   1494     if (offset >= mb->offset_max) mb->capture_last |= OVFLBIT; else
   1495       {
   1496       mb->ovector[offset] =
   1497         mb->ovector[mb->offset_end - number];
   1498       mb->ovector[offset+1] = eptr - mb->start_subject;
   1499 
   1500       /* If this group is at or above the current highwater mark, ensure that
   1501       any groups between the current high water mark and this group are marked
   1502       unset and then update the high water mark. */
   1503 
   1504       if (offset >= offset_top)
   1505         {
   1506         register PCRE2_SIZE *iptr = mb->ovector + offset_top;
   1507         register PCRE2_SIZE *iend = mb->ovector + offset;
   1508         while (iptr < iend) *iptr++ = PCRE2_UNSET;
   1509         offset_top = offset + 2;
   1510         }
   1511       }
   1512     ecode += 1 + IMM2_SIZE;
   1513     break;
   1514 
   1515 
   1516     /* End of the pattern, either real or forced. In an assertion ACCEPT,
   1517     update the last used pointer. */
   1518 
   1519     case OP_ASSERT_ACCEPT:
   1520     if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
   1521 
   1522     case OP_ACCEPT:
   1523     case OP_END:
   1524 
   1525     /* If we have matched an empty string, fail if not in an assertion and not
   1526     in a recursion if either PCRE2_NOTEMPTY is set, or if PCRE2_NOTEMPTY_ATSTART
   1527     is set and we have matched at the start of the subject. In both cases,
   1528     backtracking will then try other alternatives, if any. */
   1529 
   1530     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
   1531          mb->recursive == NULL &&
   1532          ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
   1533            ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
   1534              mstart == mb->start_subject + mb->start_offset)))
   1535       RRETURN(MATCH_NOMATCH);
   1536 
   1537     /* Otherwise, we have a match. */
   1538 
   1539     mb->end_match_ptr = eptr;           /* Record where we ended */
   1540     mb->end_offset_top = offset_top;    /* and how many extracts were taken */
   1541     mb->start_match_ptr = mstart;       /* and the start (\K can modify) */
   1542 
   1543     /* For some reason, the macros don't work properly if an expression is
   1544     given as the argument to RRETURN when the heap is in use. */
   1545 
   1546     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
   1547     RRETURN(rrc);
   1548 
   1549     /* Assertion brackets. Check the alternative branches in turn - the
   1550     matching won't pass the KET for an assertion. If any one branch matches,
   1551     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
   1552     start of each branch to move the current point backwards, so the code at
   1553     this level is identical to the lookahead case. When the assertion is part
   1554     of a condition, we want to return immediately afterwards. The caller of
   1555     this incarnation of the match() function will have set MATCH_CONDASSERT in
   1556     mb->match_function type, and one of these opcodes will be the first opcode
   1557     that is processed. We use a local variable that is preserved over calls to
   1558     match() to remember this case. */
   1559 
   1560     case OP_ASSERT:
   1561     case OP_ASSERTBACK:
   1562     save_mark = mb->mark;
   1563     if ((mb->match_function_type & MATCH_CONDASSERT) != 0)
   1564       {
   1565       condassert = TRUE;
   1566       mb->match_function_type &= ~MATCH_CONDASSERT;
   1567       }
   1568     else condassert = FALSE;
   1569 
   1570     /* Loop for each branch */
   1571 
   1572     do
   1573       {
   1574       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, NULL, RM4);
   1575 
   1576       /* A match means that the assertion is true; break out of the loop
   1577       that matches its alternatives. */
   1578 
   1579       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1580         {
   1581         mstart = mb->start_match_ptr;   /* In case \K reset it */
   1582         break;
   1583         }
   1584 
   1585       /* If not matched, restore the previous mark setting. */
   1586 
   1587       mb->mark = save_mark;
   1588 
   1589       /* See comment in the code for capturing groups above about handling
   1590       THEN. */
   1591 
   1592       if (rrc == MATCH_THEN)
   1593         {
   1594         next_ecode = ecode + GET(ecode,1);
   1595         if (mb->start_match_ptr < next_ecode &&
   1596             (*ecode == OP_ALT || *next_ecode == OP_ALT))
   1597           rrc = MATCH_NOMATCH;
   1598         }
   1599 
   1600       /* Anything other than NOMATCH causes the entire assertion to fail,
   1601       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
   1602       uncaptured THEN, which means they take their normal effect. This
   1603       consistent approach does not always have exactly the same effect as in
   1604       Perl. */
   1605 
   1606       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1607       ecode += GET(ecode, 1);
   1608       }
   1609     while (*ecode == OP_ALT);   /* Continue for next alternative */
   1610 
   1611     /* If we have tried all the alternative branches, the assertion has
   1612     failed. If not, we broke out after a match. */
   1613 
   1614     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
   1615 
   1616     /* If checking an assertion for a condition, return MATCH_MATCH. */
   1617 
   1618     if (condassert) RRETURN(MATCH_MATCH);
   1619 
   1620     /* Continue from after a successful assertion, updating the offsets high
   1621     water mark, since extracts may have been taken during the assertion. */
   1622 
   1623     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1624     ecode += 1 + LINK_SIZE;
   1625     offset_top = mb->end_offset_top;
   1626     continue;
   1627 
   1628     /* Negative assertion: all branches must fail to match for the assertion to
   1629     succeed. */
   1630 
   1631     case OP_ASSERT_NOT:
   1632     case OP_ASSERTBACK_NOT:
   1633     save_mark = mb->mark;
   1634     if ((mb->match_function_type & MATCH_CONDASSERT) != 0)
   1635       {
   1636       condassert = TRUE;
   1637       mb->match_function_type &= ~MATCH_CONDASSERT;
   1638       }
   1639     else condassert = FALSE;
   1640 
   1641     /* Loop for each alternative branch. */
   1642 
   1643     do
   1644       {
   1645       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, NULL, RM5);
   1646       mb->mark = save_mark;   /* Always restore the mark setting */
   1647 
   1648       switch(rrc)
   1649         {
   1650         case MATCH_MATCH:            /* A successful match means */
   1651         case MATCH_ACCEPT:           /* the assertion has failed. */
   1652         RRETURN(MATCH_NOMATCH);
   1653 
   1654         case MATCH_NOMATCH:          /* Carry on with next branch */
   1655         break;
   1656 
   1657         /* See comment in the code for capturing groups above about handling
   1658         THEN. */
   1659 
   1660         case MATCH_THEN:
   1661         next_ecode = ecode + GET(ecode,1);
   1662         if (mb->start_match_ptr < next_ecode &&
   1663             (*ecode == OP_ALT || *next_ecode == OP_ALT))
   1664           {
   1665           rrc = MATCH_NOMATCH;
   1666           break;
   1667           }
   1668         /* Otherwise fall through. */
   1669 
   1670         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
   1671         assertion to fail to match, without considering any more alternatives.
   1672         Failing to match means the assertion is true. This is a consistent
   1673         approach, but does not always have the same effect as in Perl. */
   1674 
   1675         case MATCH_COMMIT:
   1676         case MATCH_SKIP:
   1677         case MATCH_SKIP_ARG:
   1678         case MATCH_PRUNE:
   1679         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1680         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
   1681 
   1682         /* Anything else is an error */
   1683 
   1684         default:
   1685         RRETURN(rrc);
   1686         }
   1687 
   1688       /* Continue with next branch */
   1689 
   1690       ecode += GET(ecode,1);
   1691       }
   1692     while (*ecode == OP_ALT);
   1693 
   1694     /* All branches in the assertion failed to match. */
   1695 
   1696     NEG_ASSERT_TRUE:
   1697     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
   1698     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
   1699     continue;
   1700 
   1701     /* Move the subject pointer back. This occurs only at the start of
   1702     each branch of a lookbehind assertion. If we are too close to the start to
   1703     move back, this match function fails. When working with UTF-8 we move
   1704     back a number of characters, not bytes. */
   1705 
   1706     case OP_REVERSE:
   1707     i = GET(ecode, 1);
   1708 #ifdef SUPPORT_UNICODE
   1709     if (utf)
   1710       {
   1711       while (i-- > 0)
   1712         {
   1713         if (eptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
   1714         eptr--;
   1715         BACKCHAR(eptr);
   1716         }
   1717       }
   1718     else
   1719 #endif
   1720 
   1721     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
   1722 
   1723       {
   1724       if (i > eptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
   1725       eptr -= i;
   1726       }
   1727 
   1728     /* Save the earliest consulted character, then skip to next op code */
   1729 
   1730     if (eptr < mb->start_used_ptr) mb->start_used_ptr = eptr;
   1731     ecode += 1 + LINK_SIZE;
   1732     break;
   1733 
   1734     /* The callout item calls an external function, if one is provided, passing
   1735     details of the match so far. This is mainly for debugging, though the
   1736     function is able to force a failure. */
   1737 
   1738     case OP_CALLOUT:
   1739     case OP_CALLOUT_STR:
   1740       {
   1741       unsigned int callout_length = (*ecode == OP_CALLOUT)
   1742           ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
   1743 
   1744       if (mb->callout != NULL)
   1745         {
   1746         pcre2_callout_block cb;
   1747         cb.version          = 1;
   1748         cb.callout_number   = ecode[LINK_SIZE + 1];
   1749         cb.capture_top      = offset_top/2;
   1750         cb.capture_last     = mb->capture_last & CAPLMASK;
   1751         cb.offset_vector    = mb->ovector;
   1752         cb.mark             = mb->nomatch_mark;
   1753         cb.subject          = mb->start_subject;
   1754         cb.subject_length   = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
   1755         cb.start_match      = (PCRE2_SIZE)(mstart - mb->start_subject);
   1756         cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
   1757         cb.pattern_position = GET(ecode, 1);
   1758         cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
   1759 
   1760         if (*ecode == OP_CALLOUT)
   1761           {
   1762           cb.callout_number = ecode[1 + 2*LINK_SIZE];
   1763           cb.callout_string_offset = 0;
   1764           cb.callout_string = NULL;
   1765           cb.callout_string_length = 0;
   1766           }
   1767         else
   1768           {
   1769           cb.callout_number = 0;
   1770           cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
   1771           cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
   1772           cb.callout_string_length =
   1773             callout_length - (1 + 4*LINK_SIZE) - 2;
   1774           }
   1775 
   1776         if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
   1777           RRETURN(MATCH_NOMATCH);
   1778         if (rrc < 0) RRETURN(rrc);
   1779         }
   1780       ecode += callout_length;
   1781       }
   1782     break;
   1783 
   1784     /* Recursion either matches the current regex, or some subexpression. The
   1785     offset data is the offset to the starting bracket from the start of the
   1786     whole pattern. (This is so that it works from duplicated subpatterns.)
   1787 
   1788     The state of the capturing groups is preserved over recursion, and
   1789     re-instated afterwards. We don't know how many are started and not yet
   1790     finished (offset_top records the completed total) so we just have to save
   1791     all the potential data. There may be up to 65535 such values, which is too
   1792     large to put on the stack, but using malloc for small numbers seems
   1793     expensive. As a compromise, the stack is used when there are no more than
   1794     OP_RECURSE_STACK_SAVE_MAX values to store; otherwise malloc is used.
   1795 
   1796     There are also other values that have to be saved. We use a chained
   1797     sequence of blocks that actually live on the stack. Thanks to Robin Houston
   1798     for the original version of this logic. It has, however, been hacked around
   1799     a lot, so he is not to blame for the current way it works. */
   1800 
   1801     case OP_RECURSE:
   1802       {
   1803       ovecsave_frame *fr;
   1804       recursion_info *ri;
   1805       uint32_t recno;
   1806 
   1807       callpat = mb->start_code + GET(ecode, 1);
   1808       recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE);
   1809 
   1810       /* Check for repeating a pattern recursion without advancing the subject
   1811       pointer. This should catch convoluted mutual recursions. (Some simple
   1812       cases are caught at compile time.) */
   1813 
   1814       for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
   1815         if (recno == ri->group_num && eptr == ri->subject_position)
   1816           RRETURN(PCRE2_ERROR_RECURSELOOP);
   1817 
   1818       /* Add to "recursing stack" */
   1819 
   1820       new_recursive.group_num = recno;
   1821       new_recursive.saved_capture_last = mb->capture_last;
   1822       new_recursive.subject_position = eptr;
   1823       new_recursive.prevrec = mb->recursive;
   1824       mb->recursive = &new_recursive;
   1825 
   1826       /* Where to continue from afterwards */
   1827 
   1828       ecode += 1 + LINK_SIZE;
   1829 
   1830       /* When we are using the system stack for match() recursion we can call a
   1831       function that uses the system stack for preserving the ovector while
   1832       processing the pattern recursion, but only if the ovector is small
   1833       enough. */
   1834 
   1835 #ifndef HEAP_MATCH_RECURSE
   1836       if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
   1837         {
   1838         rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
   1839           eptrb, rdepth);
   1840         mb->recursive = new_recursive.prevrec;
   1841         if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
   1842 
   1843         /* Set where we got to in the subject, and reset the start, in case
   1844         it was changed by \K. This *is* propagated back out of a recursion,
   1845         for Perl compatibility. */
   1846 
   1847         eptr = mb->end_match_ptr;
   1848         mstart = mb->start_match_ptr;
   1849         break;   /* End of processing OP_RECURSE */
   1850         }
   1851 #endif
   1852       /* If the ovector is too big, or if we are using the heap for match()
   1853       recursion, we have to use the heap for saving the ovector. Used ovecsave
   1854       frames are kept on a chain and re-used. This makes a small improvement in
   1855       execution time on Linux. */
   1856 
   1857       if (mb->ovecsave_chain != NULL)
   1858         {
   1859         new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
   1860         mb->ovecsave_chain = mb->ovecsave_chain->next;
   1861         }
   1862       else
   1863         {
   1864         fr = (ovecsave_frame *)(mb->memctl.malloc(sizeof(ovecsave_frame *) +
   1865           mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
   1866         if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
   1867         new_recursive.ovec_save = fr->saved_ovec;
   1868         }
   1869 
   1870       memcpy(new_recursive.ovec_save, mb->ovector,
   1871         mb->offset_end * sizeof(PCRE2_SIZE));
   1872 
   1873       /* Do the recursion. After processing each alternative, restore the
   1874       ovector data and the last captured value. This code has the same overall
   1875       logic as the code in the op_recurse_ovecsave() function, but is adapted
   1876       to use RMATCH/RRETURN and to release the heap block containing the saved
   1877       ovector. */
   1878 
   1879       cbegroup = (*callpat >= OP_SBRA);
   1880       do
   1881         {
   1882         if (cbegroup) mb->match_function_type |= MATCH_CBEGROUP;
   1883         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
   1884           mb, eptrb, RM6);
   1885         memcpy(mb->ovector, new_recursive.ovec_save,
   1886             mb->offset_end * sizeof(PCRE2_SIZE));
   1887         mb->capture_last = new_recursive.saved_capture_last;
   1888         mb->recursive = new_recursive.prevrec;
   1889 
   1890         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1891           {
   1892           fr = (ovecsave_frame *)
   1893             ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
   1894           fr->next = mb->ovecsave_chain;
   1895           mb->ovecsave_chain = fr;
   1896 
   1897           /* Set where we got to in the subject, and reset the start, in case
   1898           it was changed by \K. This *is* propagated back out of a recursion,
   1899           for Perl compatibility. */
   1900 
   1901           eptr = mb->end_match_ptr;
   1902           mstart = mb->start_match_ptr;
   1903           goto RECURSION_MATCHED;        /* Exit loop; end processing */
   1904           }
   1905 
   1906         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
   1907         recursion; they cause a NOMATCH for the entire recursion. These codes
   1908         are defined in a range that can be tested for. */
   1909 
   1910         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
   1911           {
   1912           rrc = MATCH_NOMATCH;
   1913           goto RECURSION_RETURN;
   1914           }
   1915 
   1916         /* Any return code other than NOMATCH is an error. */
   1917 
   1918         if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
   1919         mb->recursive = &new_recursive;
   1920         callpat += GET(callpat, 1);
   1921         }
   1922       while (*callpat == OP_ALT);
   1923 
   1924       RECURSION_RETURN:
   1925       mb->recursive = new_recursive.prevrec;
   1926       fr = (ovecsave_frame *)
   1927         ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
   1928       fr->next = mb->ovecsave_chain;
   1929       mb->ovecsave_chain = fr;
   1930       RRETURN(rrc);
   1931       }
   1932 
   1933     RECURSION_MATCHED:
   1934     break;
   1935 
   1936     /* An alternation is the end of a branch; scan along to find the end of the
   1937     bracketed group and go to there. */
   1938 
   1939     case OP_ALT:
   1940     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1941     break;
   1942 
   1943     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
   1944     indicating that it may occur zero times. It may repeat infinitely, or not
   1945     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
   1946     with fixed upper repeat limits are compiled as a number of copies, with the
   1947     optional ones preceded by BRAZERO or BRAMINZERO. */
   1948 
   1949     case OP_BRAZERO:
   1950     next_ecode = ecode + 1;
   1951     RMATCH(eptr, next_ecode, offset_top, mb, eptrb, RM10);
   1952     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1953     do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
   1954     ecode = next_ecode + 1 + LINK_SIZE;
   1955     break;
   1956 
   1957     case OP_BRAMINZERO:
   1958     next_ecode = ecode + 1;
   1959     do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
   1960     RMATCH(eptr, next_ecode + 1+LINK_SIZE, offset_top, mb, eptrb, RM11);
   1961     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1962     ecode++;
   1963     break;
   1964 
   1965     case OP_SKIPZERO:
   1966     next_ecode = ecode+1;
   1967     do next_ecode += GET(next_ecode,1); while (*next_ecode == OP_ALT);
   1968     ecode = next_ecode + 1 + LINK_SIZE;
   1969     break;
   1970 
   1971     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
   1972     here; just jump to the group, with allow_zero set TRUE. */
   1973 
   1974     case OP_BRAPOSZERO:
   1975     op = *(++ecode);
   1976     allow_zero = TRUE;
   1977     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
   1978       goto POSSESSIVE_NON_CAPTURE;
   1979 
   1980     /* End of a group, repeated or non-repeating. */
   1981 
   1982     case OP_KET:
   1983     case OP_KETRMIN:
   1984     case OP_KETRMAX:
   1985     case OP_KETRPOS:
   1986     prev = ecode - GET(ecode, 1);
   1987 
   1988     /* If this was a group that remembered the subject start, in order to break
   1989     infinite repeats of empty string matches, retrieve the subject start from
   1990     the chain. Otherwise, set it NULL. */
   1991 
   1992     if (*prev >= OP_SBRA || *prev == OP_ONCE)
   1993       {
   1994       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
   1995       eptrb = eptrb->epb_prev;              /* Backup to previous group */
   1996       }
   1997     else saved_eptr = NULL;
   1998 
   1999     /* If we are at the end of an assertion group or a non-capturing atomic
   2000     group, stop matching and return MATCH_MATCH, but record the current high
   2001     water mark for use by positive assertions. We also need to record the match
   2002     start in case it was changed by \K. */
   2003 
   2004     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
   2005          *prev == OP_ONCE_NC)
   2006       {
   2007       mb->end_match_ptr = eptr;      /* For ONCE_NC */
   2008       mb->end_offset_top = offset_top;
   2009       mb->start_match_ptr = mstart;
   2010       if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
   2011       RRETURN(MATCH_MATCH);         /* Sets mb->mark */
   2012       }
   2013 
   2014     /* For capturing groups we have to check the group number back at the start
   2015     and if necessary complete handling an extraction by setting the offsets and
   2016     bumping the high water mark. Whole-pattern recursion is coded as a recurse
   2017     into group 0, so it won't be picked up here. Instead, we catch it when the
   2018     OP_END is reached. Other recursion is handled here. We just have to record
   2019     the current subject position and start match pointer and give a MATCH
   2020     return. */
   2021 
   2022     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
   2023         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
   2024       {
   2025       number = GET2(prev, 1+LINK_SIZE);
   2026       offset = number << 1;
   2027 
   2028       /* Handle a recursively called group. */
   2029 
   2030       if (mb->recursive != NULL && mb->recursive->group_num == number)
   2031         {
   2032         mb->end_match_ptr = eptr;
   2033         mb->start_match_ptr = mstart;
   2034         if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
   2035         RRETURN(MATCH_MATCH);
   2036         }
   2037 
   2038       /* Deal with capturing */
   2039 
   2040       mb->capture_last = (mb->capture_last & OVFLMASK) | number;
   2041       if (offset >= mb->offset_max) mb->capture_last |= OVFLBIT; else
   2042         {
   2043         /* If offset is greater than offset_top, it means that we are
   2044         "skipping" a capturing group, and that group's offsets must be marked
   2045         unset. In earlier versions of PCRE, all the offsets were unset at the
   2046         start of matching, but this doesn't work because atomic groups and
   2047         assertions can cause a value to be set that should later be unset.
   2048         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
   2049         part of the atomic group, but this is not on the final matching path,
   2050         so must be unset when 2 is set. (If there is no group 2, there is no
   2051         problem, because offset_top will then be 2, indicating no capture.) */
   2052 
   2053         if (offset > offset_top)
   2054           {
   2055           register PCRE2_SIZE *iptr = mb->ovector + offset_top;
   2056           register PCRE2_SIZE *iend = mb->ovector + offset;
   2057           while (iptr < iend) *iptr++ = PCRE2_UNSET;
   2058           }
   2059 
   2060         /* Now make the extraction */
   2061 
   2062         mb->ovector[offset] = mb->ovector[mb->offset_end - number];
   2063         mb->ovector[offset+1] = eptr - mb->start_subject;
   2064         if (offset_top <= offset) offset_top = offset + 2;
   2065         }
   2066       }
   2067 
   2068     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
   2069     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
   2070     at a time from the outer level, thus saving stack. This must precede the
   2071     empty string test - in this case that test is done at the outer level. */
   2072 
   2073     if (*ecode == OP_KETRPOS)
   2074       {
   2075       mb->start_match_ptr = mstart;    /* In case \K reset it */
   2076       mb->end_match_ptr = eptr;
   2077       mb->end_offset_top = offset_top;
   2078       if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
   2079       RRETURN(MATCH_KETRPOS);
   2080       }
   2081 
   2082     /* For an ordinary non-repeating ket, just continue at this level. This
   2083     also happens for a repeating ket if no characters were matched in the
   2084     group. This is the forcible breaking of infinite loops as implemented in
   2085     Perl 5.005. For a non-repeating atomic group that includes captures,
   2086     establish a backup point by processing the rest of the pattern at a lower
   2087     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
   2088     original OP_ONCE level, thereby bypassing intermediate backup points, but
   2089     resetting any captures that happened along the way. */
   2090 
   2091     if (*ecode == OP_KET || eptr == saved_eptr)
   2092       {
   2093       if (*prev == OP_ONCE)
   2094         {
   2095         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM12);
   2096         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2097         mb->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
   2098         RRETURN(MATCH_ONCE);
   2099         }
   2100       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
   2101       break;
   2102       }
   2103 
   2104     /* The normal repeating kets try the rest of the pattern or restart from
   2105     the preceding bracket, in the appropriate order. In the second case, we can
   2106     use tail recursion to avoid using another stack frame, unless we have an
   2107     an atomic group or an unlimited repeat of a group that can match an empty
   2108     string. */
   2109 
   2110     if (*ecode == OP_KETRMIN)
   2111       {
   2112       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM7);
   2113       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2114       if (*prev == OP_ONCE)
   2115         {
   2116         RMATCH(eptr, prev, offset_top, mb, eptrb, RM8);
   2117         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2118         mb->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
   2119         RRETURN(MATCH_ONCE);
   2120         }
   2121       if (*prev >= OP_SBRA)    /* Could match an empty string */
   2122         {
   2123         RMATCH(eptr, prev, offset_top, mb, eptrb, RM50);
   2124         RRETURN(rrc);
   2125         }
   2126       ecode = prev;
   2127       goto TAIL_RECURSE;
   2128       }
   2129     else  /* OP_KETRMAX */
   2130       {
   2131       RMATCH(eptr, prev, offset_top, mb, eptrb, RM13);
   2132       if (rrc == MATCH_ONCE && mb->once_target == prev) rrc = MATCH_NOMATCH;
   2133       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2134       if (*prev == OP_ONCE)
   2135         {
   2136         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM9);
   2137         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2138         mb->once_target = prev;
   2139         RRETURN(MATCH_ONCE);
   2140         }
   2141       ecode += 1 + LINK_SIZE;
   2142       goto TAIL_RECURSE;
   2143       }
   2144     /* Control never gets here */
   2145 
   2146     /* Not multiline mode: start of subject assertion, unless notbol. */
   2147 
   2148     case OP_CIRC:
   2149     if ((mb->moptions & PCRE2_NOTBOL) != 0 && eptr == mb->start_subject)
   2150       RRETURN(MATCH_NOMATCH);
   2151 
   2152     /* Start of subject assertion */
   2153 
   2154     case OP_SOD:
   2155     if (eptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
   2156     ecode++;
   2157     break;
   2158 
   2159     /* Multiline mode: start of subject unless notbol, or after any newline
   2160     except for one at the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
   2161 
   2162     case OP_CIRCM:
   2163     if ((mb->moptions & PCRE2_NOTBOL) != 0 && eptr == mb->start_subject)
   2164       RRETURN(MATCH_NOMATCH);
   2165     if (eptr != mb->start_subject &&
   2166         ((eptr == mb->end_subject &&
   2167            (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
   2168          !WAS_NEWLINE(eptr)))
   2169       RRETURN(MATCH_NOMATCH);
   2170     ecode++;
   2171     break;
   2172 
   2173     /* Start of match assertion */
   2174 
   2175     case OP_SOM:
   2176     if (eptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
   2177     ecode++;
   2178     break;
   2179 
   2180     /* Reset the start of match point */
   2181 
   2182     case OP_SET_SOM:
   2183     mstart = eptr;
   2184     ecode++;
   2185     break;
   2186 
   2187     /* Multiline mode: assert before any newline, or before end of subject
   2188     unless noteol is set. */
   2189 
   2190     case OP_DOLLM:
   2191     if (eptr < mb->end_subject)
   2192       {
   2193       if (!IS_NEWLINE(eptr))
   2194         {
   2195         if (mb->partial != 0 &&
   2196             eptr + 1 >= mb->end_subject &&
   2197             NLBLOCK->nltype == NLTYPE_FIXED &&
   2198             NLBLOCK->nllen == 2 &&
   2199             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2200           {
   2201           mb->hitend = TRUE;
   2202           if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   2203           }
   2204         RRETURN(MATCH_NOMATCH);
   2205         }
   2206       }
   2207     else
   2208       {
   2209       if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
   2210       SCHECK_PARTIAL();
   2211       }
   2212     ecode++;
   2213     break;
   2214 
   2215     /* Not multiline mode: assert before a terminating newline or before end of
   2216     subject unless noteol is set. */
   2217 
   2218     case OP_DOLL:
   2219     if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
   2220     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
   2221 
   2222     /* ... else fall through for endonly */
   2223 
   2224     /* End of subject assertion (\z) */
   2225 
   2226     case OP_EOD:
   2227     if (eptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
   2228     SCHECK_PARTIAL();
   2229     ecode++;
   2230     break;
   2231 
   2232     /* End of subject or ending \n assertion (\Z) */
   2233 
   2234     case OP_EODN:
   2235     ASSERT_NL_OR_EOS:
   2236     if (eptr < mb->end_subject &&
   2237         (!IS_NEWLINE(eptr) || eptr != mb->end_subject - mb->nllen))
   2238       {
   2239       if (mb->partial != 0 &&
   2240           eptr + 1 >= mb->end_subject &&
   2241           NLBLOCK->nltype == NLTYPE_FIXED &&
   2242           NLBLOCK->nllen == 2 &&
   2243           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2244         {
   2245         mb->hitend = TRUE;
   2246         if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   2247         }
   2248       RRETURN(MATCH_NOMATCH);
   2249       }
   2250 
   2251     /* Either at end of string or \n before end. */
   2252 
   2253     SCHECK_PARTIAL();
   2254     ecode++;
   2255     break;
   2256 
   2257     /* Word boundary assertions */
   2258 
   2259     case OP_NOT_WORD_BOUNDARY:
   2260     case OP_WORD_BOUNDARY:
   2261       {
   2262 
   2263       /* Find out if the previous and current characters are "word" characters.
   2264       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
   2265       be "non-word" characters. Remember the earliest consulted character for
   2266       partial matching. */
   2267 
   2268 #ifdef SUPPORT_UNICODE
   2269       if (utf)
   2270         {
   2271         /* Get status of previous character */
   2272 
   2273         if (eptr == mb->start_subject) prev_is_word = FALSE; else
   2274           {
   2275           PCRE2_SPTR lastptr = eptr - 1;
   2276           BACKCHAR(lastptr);
   2277           if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
   2278           GETCHAR(c, lastptr);
   2279           if ((mb->poptions & PCRE2_UCP) != 0)
   2280             {
   2281             if (c == '_') prev_is_word = TRUE; else
   2282               {
   2283               int cat = UCD_CATEGORY(c);
   2284               prev_is_word = (cat == ucp_L || cat == ucp_N);
   2285               }
   2286             }
   2287           else
   2288           prev_is_word = c < 256 && (mb->ctypes[c] & ctype_word) != 0;
   2289           }
   2290 
   2291         /* Get status of next character */
   2292 
   2293         if (eptr >= mb->end_subject)
   2294           {
   2295           SCHECK_PARTIAL();
   2296           cur_is_word = FALSE;
   2297           }
   2298         else
   2299           {
   2300           PCRE2_SPTR nextptr = eptr + 1;
   2301           FORWARDCHARTEST(nextptr, mb->end_subject);
   2302           if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
   2303           GETCHAR(c, eptr);
   2304           if ((mb->poptions & PCRE2_UCP) != 0)
   2305             {
   2306             if (c == '_') cur_is_word = TRUE; else
   2307               {
   2308               int cat = UCD_CATEGORY(c);
   2309               cur_is_word = (cat == ucp_L || cat == ucp_N);
   2310               }
   2311             }
   2312           else
   2313           cur_is_word = c < 256 && (mb->ctypes[c] & ctype_word) != 0;
   2314           }
   2315         }
   2316       else
   2317 #endif  /* SUPPORT UTF */
   2318 
   2319       /* Not in UTF-8 mode, but we may still have PCRE2_UCP set, and for
   2320       consistency with the behaviour of \w we do use it in this case. */
   2321 
   2322         {
   2323         /* Get status of previous character */
   2324 
   2325         if (eptr == mb->start_subject) prev_is_word = FALSE; else
   2326           {
   2327           if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
   2328 #ifdef SUPPORT_UNICODE
   2329           if ((mb->poptions & PCRE2_UCP) != 0)
   2330             {
   2331             c = eptr[-1];
   2332             if (c == '_') prev_is_word = TRUE; else
   2333               {
   2334               int cat = UCD_CATEGORY(c);
   2335               prev_is_word = (cat == ucp_L || cat == ucp_N);
   2336               }
   2337             }
   2338           else
   2339 #endif
   2340           prev_is_word = MAX_255(eptr[-1])
   2341             && ((mb->ctypes[eptr[-1]] & ctype_word) != 0);
   2342           }
   2343 
   2344         /* Get status of next character */
   2345 
   2346         if (eptr >= mb->end_subject)
   2347           {
   2348           SCHECK_PARTIAL();
   2349           cur_is_word = FALSE;
   2350           }
   2351         else
   2352           {
   2353           if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
   2354 #ifdef SUPPORT_UNICODE
   2355           if ((mb->poptions & PCRE2_UCP) != 0)
   2356             {
   2357             c = *eptr;
   2358             if (c == '_') cur_is_word = TRUE; else
   2359               {
   2360               int cat = UCD_CATEGORY(c);
   2361               cur_is_word = (cat == ucp_L || cat == ucp_N);
   2362               }
   2363             }
   2364           else
   2365 #endif
   2366           cur_is_word = MAX_255(*eptr)
   2367             && ((mb->ctypes[*eptr] & ctype_word) != 0);
   2368           }
   2369         }
   2370 
   2371       /* Now see if the situation is what we want */
   2372 
   2373       if ((*ecode++ == OP_WORD_BOUNDARY)?
   2374            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
   2375         RRETURN(MATCH_NOMATCH);
   2376       }
   2377     break;
   2378 
   2379     /* Match any single character type except newline; have to take care with
   2380     CRLF newlines and partial matching. */
   2381 
   2382     case OP_ANY:
   2383     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   2384     if (mb->partial != 0 &&
   2385         eptr + 1 >= mb->end_subject &&
   2386         NLBLOCK->nltype == NLTYPE_FIXED &&
   2387         NLBLOCK->nllen == 2 &&
   2388         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2389       {
   2390       mb->hitend = TRUE;
   2391       if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   2392       }
   2393 
   2394     /* Fall through */
   2395 
   2396     /* Match any single character whatsoever. */
   2397 
   2398     case OP_ALLANY:
   2399     if (eptr >= mb->end_subject)   /* DO NOT merge the eptr++ here; it must */
   2400       {                            /* not be updated before SCHECK_PARTIAL. */
   2401       SCHECK_PARTIAL();
   2402       RRETURN(MATCH_NOMATCH);
   2403       }
   2404     eptr++;
   2405 #ifdef SUPPORT_UNICODE
   2406     if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   2407 #endif
   2408     ecode++;
   2409     break;
   2410 
   2411     /* Match a single code unit, even in UTF-8 mode. This opcode really does
   2412     match any code unit, even newline. (It really should be called ANYCODEUNIT,
   2413     of course - the byte name is from pre-16 bit days.) */
   2414 
   2415     case OP_ANYBYTE:
   2416     if (eptr >= mb->end_subject)   /* DO NOT merge the eptr++ here; it must */
   2417       {                            /* not be updated before SCHECK_PARTIAL. */
   2418       SCHECK_PARTIAL();
   2419       RRETURN(MATCH_NOMATCH);
   2420       }
   2421     eptr++;
   2422     ecode++;
   2423     break;
   2424 
   2425     case OP_NOT_DIGIT:
   2426     if (eptr >= mb->end_subject)
   2427       {
   2428       SCHECK_PARTIAL();
   2429       RRETURN(MATCH_NOMATCH);
   2430       }
   2431     GETCHARINCTEST(c, eptr);
   2432     if (
   2433 #ifdef SUPPORT_WIDE_CHARS
   2434        c < 256 &&
   2435 #endif
   2436        (mb->ctypes[c] & ctype_digit) != 0
   2437        )
   2438       RRETURN(MATCH_NOMATCH);
   2439     ecode++;
   2440     break;
   2441 
   2442     case OP_DIGIT:
   2443     if (eptr >= mb->end_subject)
   2444       {
   2445       SCHECK_PARTIAL();
   2446       RRETURN(MATCH_NOMATCH);
   2447       }
   2448     GETCHARINCTEST(c, eptr);
   2449     if (
   2450 #ifdef SUPPORT_WIDE_CHARS
   2451        c > 255 ||
   2452 #endif
   2453        (mb->ctypes[c] & ctype_digit) == 0
   2454        )
   2455       RRETURN(MATCH_NOMATCH);
   2456     ecode++;
   2457     break;
   2458 
   2459     case OP_NOT_WHITESPACE:
   2460     if (eptr >= mb->end_subject)
   2461       {
   2462       SCHECK_PARTIAL();
   2463       RRETURN(MATCH_NOMATCH);
   2464       }
   2465     GETCHARINCTEST(c, eptr);
   2466     if (
   2467 #ifdef SUPPORT_WIDE_CHARS
   2468        c < 256 &&
   2469 #endif
   2470        (mb->ctypes[c] & ctype_space) != 0
   2471        )
   2472       RRETURN(MATCH_NOMATCH);
   2473     ecode++;
   2474     break;
   2475 
   2476     case OP_WHITESPACE:
   2477     if (eptr >= mb->end_subject)
   2478       {
   2479       SCHECK_PARTIAL();
   2480       RRETURN(MATCH_NOMATCH);
   2481       }
   2482     GETCHARINCTEST(c, eptr);
   2483     if (
   2484 #ifdef SUPPORT_WIDE_CHARS
   2485        c > 255 ||
   2486 #endif
   2487        (mb->ctypes[c] & ctype_space) == 0
   2488        )
   2489       RRETURN(MATCH_NOMATCH);
   2490     ecode++;
   2491     break;
   2492 
   2493     case OP_NOT_WORDCHAR:
   2494     if (eptr >= mb->end_subject)
   2495       {
   2496       SCHECK_PARTIAL();
   2497       RRETURN(MATCH_NOMATCH);
   2498       }
   2499     GETCHARINCTEST(c, eptr);
   2500     if (
   2501 #ifdef SUPPORT_WIDE_CHARS
   2502        c < 256 &&
   2503 #endif
   2504        (mb->ctypes[c] & ctype_word) != 0
   2505        )
   2506       RRETURN(MATCH_NOMATCH);
   2507     ecode++;
   2508     break;
   2509 
   2510     case OP_WORDCHAR:
   2511     if (eptr >= mb->end_subject)
   2512       {
   2513       SCHECK_PARTIAL();
   2514       RRETURN(MATCH_NOMATCH);
   2515       }
   2516     GETCHARINCTEST(c, eptr);
   2517     if (
   2518 #ifdef SUPPORT_WIDE_CHARS
   2519        c > 255 ||
   2520 #endif
   2521        (mb->ctypes[c] & ctype_word) == 0
   2522        )
   2523       RRETURN(MATCH_NOMATCH);
   2524     ecode++;
   2525     break;
   2526 
   2527     case OP_ANYNL:
   2528     if (eptr >= mb->end_subject)
   2529       {
   2530       SCHECK_PARTIAL();
   2531       RRETURN(MATCH_NOMATCH);
   2532       }
   2533     GETCHARINCTEST(c, eptr);
   2534     switch(c)
   2535       {
   2536       default: RRETURN(MATCH_NOMATCH);
   2537 
   2538       case CHAR_CR:
   2539       if (eptr >= mb->end_subject)
   2540         {
   2541         SCHECK_PARTIAL();
   2542         }
   2543       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
   2544       break;
   2545 
   2546       case CHAR_LF:
   2547       break;
   2548 
   2549       case CHAR_VT:
   2550       case CHAR_FF:
   2551       case CHAR_NEL:
   2552 #ifndef EBCDIC
   2553       case 0x2028:
   2554       case 0x2029:
   2555 #endif  /* Not EBCDIC */
   2556       if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
   2557       break;
   2558       }
   2559     ecode++;
   2560     break;
   2561 
   2562     case OP_NOT_HSPACE:
   2563     if (eptr >= mb->end_subject)
   2564       {
   2565       SCHECK_PARTIAL();
   2566       RRETURN(MATCH_NOMATCH);
   2567       }
   2568     GETCHARINCTEST(c, eptr);
   2569     switch(c)
   2570       {
   2571       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
   2572       default: break;
   2573       }
   2574     ecode++;
   2575     break;
   2576 
   2577     case OP_HSPACE:
   2578     if (eptr >= mb->end_subject)
   2579       {
   2580       SCHECK_PARTIAL();
   2581       RRETURN(MATCH_NOMATCH);
   2582       }
   2583     GETCHARINCTEST(c, eptr);
   2584     switch(c)
   2585       {
   2586       HSPACE_CASES: break;  /* Byte and multibyte cases */
   2587       default: RRETURN(MATCH_NOMATCH);
   2588       }
   2589     ecode++;
   2590     break;
   2591 
   2592     case OP_NOT_VSPACE:
   2593     if (eptr >= mb->end_subject)
   2594       {
   2595       SCHECK_PARTIAL();
   2596       RRETURN(MATCH_NOMATCH);
   2597       }
   2598     GETCHARINCTEST(c, eptr);
   2599     switch(c)
   2600       {
   2601       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   2602       default: break;
   2603       }
   2604     ecode++;
   2605     break;
   2606 
   2607     case OP_VSPACE:
   2608     if (eptr >= mb->end_subject)
   2609       {
   2610       SCHECK_PARTIAL();
   2611       RRETURN(MATCH_NOMATCH);
   2612       }
   2613     GETCHARINCTEST(c, eptr);
   2614     switch(c)
   2615       {
   2616       VSPACE_CASES: break;
   2617       default: RRETURN(MATCH_NOMATCH);
   2618       }
   2619     ecode++;
   2620     break;
   2621 
   2622 #ifdef SUPPORT_UNICODE
   2623     /* Check the next character by Unicode property. We will get here only
   2624     if the support is in the binary; otherwise a compile-time error occurs. */
   2625 
   2626     case OP_PROP:
   2627     case OP_NOTPROP:
   2628     if (eptr >= mb->end_subject)
   2629       {
   2630       SCHECK_PARTIAL();
   2631       RRETURN(MATCH_NOMATCH);
   2632       }
   2633     GETCHARINCTEST(c, eptr);
   2634       {
   2635       const uint32_t *cp;
   2636       const ucd_record *prop = GET_UCD(c);
   2637 
   2638       switch(ecode[1])
   2639         {
   2640         case PT_ANY:
   2641         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
   2642         break;
   2643 
   2644         case PT_LAMP:
   2645         if ((prop->chartype == ucp_Lu ||
   2646              prop->chartype == ucp_Ll ||
   2647              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
   2648           RRETURN(MATCH_NOMATCH);
   2649         break;
   2650 
   2651         case PT_GC:
   2652         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
   2653           RRETURN(MATCH_NOMATCH);
   2654         break;
   2655 
   2656         case PT_PC:
   2657         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
   2658           RRETURN(MATCH_NOMATCH);
   2659         break;
   2660 
   2661         case PT_SC:
   2662         if ((ecode[2] != prop->script) == (op == OP_PROP))
   2663           RRETURN(MATCH_NOMATCH);
   2664         break;
   2665 
   2666         /* These are specials */
   2667 
   2668         case PT_ALNUM:
   2669         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   2670              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
   2671           RRETURN(MATCH_NOMATCH);
   2672         break;
   2673 
   2674         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   2675         which means that Perl space and POSIX space are now identical. PCRE
   2676         was changed at release 8.34. */
   2677 
   2678         case PT_SPACE:    /* Perl space */
   2679         case PT_PXSPACE:  /* POSIX space */
   2680         switch(c)
   2681           {
   2682           HSPACE_CASES:
   2683           VSPACE_CASES:
   2684           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
   2685           break;
   2686 
   2687           default:
   2688           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
   2689             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
   2690           break;
   2691           }
   2692         break;
   2693 
   2694         case PT_WORD:
   2695         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   2696              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   2697              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
   2698           RRETURN(MATCH_NOMATCH);
   2699         break;
   2700 
   2701         case PT_CLIST:
   2702         cp = PRIV(ucd_caseless_sets) + ecode[2];
   2703         for (;;)
   2704           {
   2705           if (c < *cp)
   2706             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
   2707           if (c == *cp++)
   2708             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
   2709           }
   2710         break;
   2711 
   2712         case PT_UCNC:
   2713         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   2714              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   2715              c >= 0xe000) == (op == OP_NOTPROP))
   2716           RRETURN(MATCH_NOMATCH);
   2717         break;
   2718 
   2719         /* This should never occur */
   2720 
   2721         default:
   2722         RRETURN(PCRE2_ERROR_INTERNAL);
   2723         }
   2724 
   2725       ecode += 3;
   2726       }
   2727     break;
   2728 
   2729     /* Match an extended Unicode sequence. We will get here only if the support
   2730     is in the binary; otherwise a compile-time error occurs. */
   2731 
   2732     case OP_EXTUNI:
   2733     if (eptr >= mb->end_subject)
   2734       {
   2735       SCHECK_PARTIAL();
   2736       RRETURN(MATCH_NOMATCH);
   2737       }
   2738     else
   2739       {
   2740       int lgb, rgb;
   2741       GETCHARINCTEST(c, eptr);
   2742       lgb = UCD_GRAPHBREAK(c);
   2743       while (eptr < mb->end_subject)
   2744         {
   2745         int len = 1;
   2746         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   2747         rgb = UCD_GRAPHBREAK(c);
   2748         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   2749         lgb = rgb;
   2750         eptr += len;
   2751         }
   2752       }
   2753     CHECK_PARTIAL();
   2754     ecode++;
   2755     break;
   2756 #endif  /* SUPPORT_UNICODE */
   2757 
   2758 
   2759     /* Match a back reference, possibly repeatedly. Look past the end of the
   2760     item to see if there is repeat information following.
   2761 
   2762     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
   2763     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
   2764     and OP_DNREFI are used. In this case we must scan the list of groups to
   2765     which the name refers, and use the first one that is set. */
   2766 
   2767     case OP_DNREF:
   2768     case OP_DNREFI:
   2769     caseless = op == OP_DNREFI;
   2770       {
   2771       int count = GET2(ecode, 1+IMM2_SIZE);
   2772       PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
   2773       ecode += 1 + 2*IMM2_SIZE;
   2774 
   2775       /* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
   2776       code. */
   2777 
   2778       offset = 0;
   2779       while (count-- > 0)
   2780         {
   2781         offset = GET2(slot, 0) << 1;
   2782         if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET) break;
   2783         slot += mb->name_entry_size;
   2784         }
   2785       }
   2786     goto REF_REPEAT;
   2787 
   2788     case OP_REF:
   2789     case OP_REFI:
   2790     caseless = op == OP_REFI;
   2791     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
   2792     ecode += 1 + IMM2_SIZE;
   2793 
   2794     /* Set up for repetition, or handle the non-repeated case */
   2795 
   2796     REF_REPEAT:
   2797     switch (*ecode)
   2798       {
   2799       case OP_CRSTAR:
   2800       case OP_CRMINSTAR:
   2801       case OP_CRPLUS:
   2802       case OP_CRMINPLUS:
   2803       case OP_CRQUERY:
   2804       case OP_CRMINQUERY:
   2805       c = *ecode++ - OP_CRSTAR;
   2806       minimize = (c & 1) != 0;
   2807       min = rep_min[c];                 /* Pick up values from tables; */
   2808       max = rep_max[c];                 /* zero for max => infinity */
   2809       if (max == 0) max = INT_MAX;
   2810       break;
   2811 
   2812       case OP_CRRANGE:
   2813       case OP_CRMINRANGE:
   2814       minimize = (*ecode == OP_CRMINRANGE);
   2815       min = GET2(ecode, 1);
   2816       max = GET2(ecode, 1 + IMM2_SIZE);
   2817       if (max == 0) max = INT_MAX;
   2818       ecode += 1 + 2 * IMM2_SIZE;
   2819       break;
   2820 
   2821       default:                  /* No repeat follows */
   2822         {
   2823         int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
   2824         if (rc != 0)
   2825           {
   2826           if (rc > 0) eptr = mb->end_subject;   /* Partial match */
   2827           CHECK_PARTIAL();
   2828           RRETURN(MATCH_NOMATCH);
   2829           }
   2830         }
   2831       eptr += length;
   2832       continue;              /* With the main loop */
   2833       }
   2834 
   2835     /* Handle repeated back references. If a set group has length zero, just
   2836     continue with the main loop, because it matches however many times. For an
   2837     unset reference, if the minimum is zero, we can also just continue. We an
   2838     also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
   2839     group be have as a zero-length group. For any other unset cases, carrying
   2840     on will result in NOMATCH. */
   2841 
   2842     if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
   2843       {
   2844       if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
   2845       }
   2846     else  /* Group is not set */
   2847       {
   2848       if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
   2849         continue;
   2850       }
   2851 
   2852     /* First, ensure the minimum number of matches are present. We get back
   2853     the length of the reference string explicitly rather than passing the
   2854     address of eptr, so that eptr can be a register variable. */
   2855 
   2856     for (i = 1; i <= min; i++)
   2857       {
   2858       PCRE2_SIZE slength;
   2859       int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
   2860       if (rc != 0)
   2861         {
   2862         if (rc > 0) eptr = mb->end_subject;   /* Partial match */
   2863         CHECK_PARTIAL();
   2864         RRETURN(MATCH_NOMATCH);
   2865         }
   2866       eptr += slength;
   2867       }
   2868 
   2869     /* If min = max, continue at the same level without recursion.
   2870     They are not both allowed to be zero. */
   2871 
   2872     if (min == max) continue;
   2873 
   2874     /* If minimizing, keep trying and advancing the pointer */
   2875 
   2876     if (minimize)
   2877       {
   2878       for (fi = min;; fi++)
   2879         {
   2880         int rc;
   2881         PCRE2_SIZE slength;
   2882         RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
   2883         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2884         if (fi >= max) RRETURN(MATCH_NOMATCH);
   2885         rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
   2886         if (rc != 0)
   2887           {
   2888           if (rc > 0) eptr = mb->end_subject;   /* Partial match */
   2889           CHECK_PARTIAL();
   2890           RRETURN(MATCH_NOMATCH);
   2891           }
   2892         eptr += slength;
   2893         }
   2894       /* Control never gets here */
   2895       }
   2896 
   2897     /* If maximizing, find the longest string and work backwards, as long as
   2898     the matched lengths for each iteration are the same. */
   2899 
   2900     else
   2901       {
   2902       BOOL samelengths = TRUE;
   2903       pp = eptr;
   2904       length = mb->ovector[offset+1] - mb->ovector[offset];
   2905 
   2906       for (i = min; i < max; i++)
   2907         {
   2908         PCRE2_SIZE slength;
   2909         int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
   2910 
   2911         if (rc != 0)
   2912           {
   2913           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
   2914           the soft partial matching case. */
   2915 
   2916           if (rc > 0 && mb->partial != 0 &&
   2917               mb->end_subject > mb->start_used_ptr)
   2918             {
   2919             mb->hitend = TRUE;
   2920             if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   2921             }
   2922           break;
   2923           }
   2924 
   2925         if (slength != length) samelengths = FALSE;
   2926         eptr += slength;
   2927         }
   2928 
   2929       /* If the length matched for each repetition is the same as the length of
   2930       the captured group, we can easily work backwards. This is the normal
   2931       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
   2932       characters whose lengths (in terms of code units) differ. However, this
   2933       is very rare, so we handle it by re-matching fewer and fewer times. */
   2934 
   2935       if (samelengths)
   2936         {
   2937         while (eptr >= pp)
   2938           {
   2939           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
   2940           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2941           eptr -= length;
   2942           }
   2943         }
   2944 
   2945       /* The rare case of non-matching lengths. Re-scan the repetition for each
   2946       iteration. We know that match_ref() will succeed every time. */
   2947 
   2948       else
   2949         {
   2950         max = i;
   2951         for (;;)
   2952           {
   2953           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
   2954           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2955           if (eptr == pp) break;  /* Failed after minimal repetition */
   2956           eptr = pp;
   2957           max--;
   2958           for (i = min; i < max; i++)
   2959             {
   2960             PCRE2_SIZE slength;
   2961             (void)match_ref(offset, offset_top, eptr, mb, caseless, &slength);
   2962             eptr += slength;
   2963             }
   2964           }
   2965         }
   2966 
   2967       RRETURN(MATCH_NOMATCH);
   2968       }
   2969     /* Control never gets here */
   2970 
   2971     /* Match a bit-mapped character class, possibly repeatedly. This op code is
   2972     used when all the characters in the class have values in the range 0-255,
   2973     and either the matching is caseful, or the characters are in the range
   2974     0-127 when UTF-8 processing is enabled. The only difference between
   2975     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
   2976     encountered.
   2977 
   2978     First, look past the end of the item to see if there is repeat information
   2979     following. Then obey similar code to character type repeats - written out
   2980     again for speed. */
   2981 
   2982     case OP_NCLASS:
   2983     case OP_CLASS:
   2984       {
   2985       /* The data variable is saved across frames, so the byte map needs to
   2986       be stored there. */
   2987 #define BYTE_MAP ((uint8_t *)data)
   2988       data = ecode + 1;                /* Save for matching */
   2989       ecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
   2990 
   2991       switch (*ecode)
   2992         {
   2993         case OP_CRSTAR:
   2994         case OP_CRMINSTAR:
   2995         case OP_CRPLUS:
   2996         case OP_CRMINPLUS:
   2997         case OP_CRQUERY:
   2998         case OP_CRMINQUERY:
   2999         case OP_CRPOSSTAR:
   3000         case OP_CRPOSPLUS:
   3001         case OP_CRPOSQUERY:
   3002         c = *ecode++ - OP_CRSTAR;
   3003         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
   3004         else possessive = TRUE;
   3005         min = rep_min[c];                 /* Pick up values from tables; */
   3006         max = rep_max[c];                 /* zero for max => infinity */
   3007         if (max == 0) max = INT_MAX;
   3008         break;
   3009 
   3010         case OP_CRRANGE:
   3011         case OP_CRMINRANGE:
   3012         case OP_CRPOSRANGE:
   3013         minimize = (*ecode == OP_CRMINRANGE);
   3014         possessive = (*ecode == OP_CRPOSRANGE);
   3015         min = GET2(ecode, 1);
   3016         max = GET2(ecode, 1 + IMM2_SIZE);
   3017         if (max == 0) max = INT_MAX;
   3018         ecode += 1 + 2 * IMM2_SIZE;
   3019         break;
   3020 
   3021         default:               /* No repeat follows */
   3022         min = max = 1;
   3023         break;
   3024         }
   3025 
   3026       /* First, ensure the minimum number of matches are present. */
   3027 
   3028 #ifdef SUPPORT_UNICODE
   3029       if (utf)
   3030         {
   3031         for (i = 1; i <= min; i++)
   3032           {
   3033           if (eptr >= mb->end_subject)
   3034             {
   3035             SCHECK_PARTIAL();
   3036             RRETURN(MATCH_NOMATCH);
   3037             }
   3038           GETCHARINC(c, eptr);
   3039           if (c > 255)
   3040             {
   3041             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   3042             }
   3043           else
   3044             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   3045           }
   3046         }
   3047       else
   3048 #endif
   3049       /* Not UTF mode */
   3050         {
   3051         for (i = 1; i <= min; i++)
   3052           {
   3053           if (eptr >= mb->end_subject)
   3054             {
   3055             SCHECK_PARTIAL();
   3056             RRETURN(MATCH_NOMATCH);
   3057             }
   3058           c = *eptr++;
   3059 #if PCRE2_CODE_UNIT_WIDTH != 8
   3060           if (c > 255)
   3061             {
   3062             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   3063             }
   3064           else
   3065 #endif
   3066             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   3067           }
   3068         }
   3069 
   3070       /* If max == min we can continue with the main loop without the
   3071       need to recurse. */
   3072 
   3073       if (min == max) continue;
   3074 
   3075       /* If minimizing, keep testing the rest of the expression and advancing
   3076       the pointer while it matches the class. */
   3077 
   3078       if (minimize)
   3079         {
   3080 #ifdef SUPPORT_UNICODE
   3081         if (utf)
   3082           {
   3083           for (fi = min;; fi++)
   3084             {
   3085             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM16);
   3086             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3087             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3088             if (eptr >= mb->end_subject)
   3089               {
   3090               SCHECK_PARTIAL();
   3091               RRETURN(MATCH_NOMATCH);
   3092               }
   3093             GETCHARINC(c, eptr);
   3094             if (c > 255)
   3095               {
   3096               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   3097               }
   3098             else
   3099               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   3100             }
   3101           }
   3102         else
   3103 #endif
   3104         /* Not UTF mode */
   3105           {
   3106           for (fi = min;; fi++)
   3107             {
   3108             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM17);
   3109             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3110             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3111             if (eptr >= mb->end_subject)
   3112               {
   3113               SCHECK_PARTIAL();
   3114               RRETURN(MATCH_NOMATCH);
   3115               }
   3116             c = *eptr++;
   3117 #if PCRE2_CODE_UNIT_WIDTH != 8
   3118             if (c > 255)
   3119               {
   3120               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   3121               }
   3122             else
   3123 #endif
   3124               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   3125             }
   3126           }
   3127         /* Control never gets here */
   3128         }
   3129 
   3130       /* If maximizing, find the longest possible run, then work backwards. */
   3131 
   3132       else
   3133         {
   3134         pp = eptr;
   3135 
   3136 #ifdef SUPPORT_UNICODE
   3137         if (utf)
   3138           {
   3139           for (i = min; i < max; i++)
   3140             {
   3141             int len = 1;
   3142             if (eptr >= mb->end_subject)
   3143               {
   3144               SCHECK_PARTIAL();
   3145               break;
   3146               }
   3147             GETCHARLEN(c, eptr, len);
   3148             if (c > 255)
   3149               {
   3150               if (op == OP_CLASS) break;
   3151               }
   3152             else
   3153               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
   3154             eptr += len;
   3155             }
   3156 
   3157           if (possessive) continue;    /* No backtracking */
   3158 
   3159           for (;;)
   3160             {
   3161             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM18);
   3162             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3163             if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3164             BACKCHAR(eptr);
   3165             }
   3166           }
   3167         else
   3168 #endif
   3169           /* Not UTF mode */
   3170           {
   3171           for (i = min; i < max; i++)
   3172             {
   3173             if (eptr >= mb->end_subject)
   3174               {
   3175               SCHECK_PARTIAL();
   3176               break;
   3177               }
   3178             c = *eptr;
   3179 #if PCRE2_CODE_UNIT_WIDTH != 8
   3180             if (c > 255)
   3181               {
   3182               if (op == OP_CLASS) break;
   3183               }
   3184             else
   3185 #endif
   3186               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
   3187             eptr++;
   3188             }
   3189 
   3190           if (possessive) continue;    /* No backtracking */
   3191 
   3192           while (eptr >= pp)
   3193             {
   3194             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM19);
   3195             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3196             eptr--;
   3197             }
   3198           }
   3199 
   3200         RRETURN(MATCH_NOMATCH);
   3201         }
   3202 #undef BYTE_MAP
   3203       }
   3204     /* Control never gets here */
   3205 
   3206 
   3207     /* Match an extended character class. In the 8-bit library, this opcode is
   3208     encountered only when UTF-8 mode mode is supported. In the 16-bit and
   3209     32-bit libraries, codepoints greater than 255 may be encountered even when
   3210     UTF is not supported. */
   3211 
   3212 #ifdef SUPPORT_WIDE_CHARS
   3213     case OP_XCLASS:
   3214       {
   3215       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
   3216       ecode += GET(ecode, 1);                      /* Advance past the item */
   3217 
   3218       switch (*ecode)
   3219         {
   3220         case OP_CRSTAR:
   3221         case OP_CRMINSTAR:
   3222         case OP_CRPLUS:
   3223         case OP_CRMINPLUS:
   3224         case OP_CRQUERY:
   3225         case OP_CRMINQUERY:
   3226         case OP_CRPOSSTAR:
   3227         case OP_CRPOSPLUS:
   3228         case OP_CRPOSQUERY:
   3229         c = *ecode++ - OP_CRSTAR;
   3230         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
   3231         else possessive = TRUE;
   3232         min = rep_min[c];                 /* Pick up values from tables; */
   3233         max = rep_max[c];                 /* zero for max => infinity */
   3234         if (max == 0) max = INT_MAX;
   3235         break;
   3236 
   3237         case OP_CRRANGE:
   3238         case OP_CRMINRANGE:
   3239         case OP_CRPOSRANGE:
   3240         minimize = (*ecode == OP_CRMINRANGE);
   3241         possessive = (*ecode == OP_CRPOSRANGE);
   3242         min = GET2(ecode, 1);
   3243         max = GET2(ecode, 1 + IMM2_SIZE);
   3244         if (max == 0) max = INT_MAX;
   3245         ecode += 1 + 2 * IMM2_SIZE;
   3246         break;
   3247 
   3248         default:               /* No repeat follows */
   3249         min = max = 1;
   3250         break;
   3251         }
   3252 
   3253       /* First, ensure the minimum number of matches are present. */
   3254 
   3255       for (i = 1; i <= min; i++)
   3256         {
   3257         if (eptr >= mb->end_subject)
   3258           {
   3259           SCHECK_PARTIAL();
   3260           RRETURN(MATCH_NOMATCH);
   3261           }
   3262         GETCHARINCTEST(c, eptr);
   3263         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
   3264         }
   3265 
   3266       /* If max == min we can continue with the main loop without the
   3267       need to recurse. */
   3268 
   3269       if (min == max) continue;
   3270 
   3271       /* If minimizing, keep testing the rest of the expression and advancing
   3272       the pointer while it matches the class. */
   3273 
   3274       if (minimize)
   3275         {
   3276         for (fi = min;; fi++)
   3277           {
   3278           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM20);
   3279           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3280           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3281           if (eptr >= mb->end_subject)
   3282             {
   3283             SCHECK_PARTIAL();
   3284             RRETURN(MATCH_NOMATCH);
   3285             }
   3286           GETCHARINCTEST(c, eptr);
   3287           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
   3288           }
   3289         /* Control never gets here */
   3290         }
   3291 
   3292       /* If maximizing, find the longest possible run, then work backwards. */
   3293 
   3294       else
   3295         {
   3296         pp = eptr;
   3297         for (i = min; i < max; i++)
   3298           {
   3299           int len = 1;
   3300           if (eptr >= mb->end_subject)
   3301             {
   3302             SCHECK_PARTIAL();
   3303             break;
   3304             }
   3305 #ifdef SUPPORT_UNICODE
   3306           GETCHARLENTEST(c, eptr, len);
   3307 #else
   3308           c = *eptr;
   3309 #endif
   3310           if (!PRIV(xclass)(c, data, utf)) break;
   3311           eptr += len;
   3312           }
   3313 
   3314         if (possessive) continue;    /* No backtracking */
   3315 
   3316         for(;;)
   3317           {
   3318           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
   3319           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3320           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3321 #ifdef SUPPORT_UNICODE
   3322           if (utf) BACKCHAR(eptr);
   3323 #endif
   3324           }
   3325         RRETURN(MATCH_NOMATCH);
   3326         }
   3327 
   3328       /* Control never gets here */
   3329       }
   3330 #endif    /* End of XCLASS */
   3331 
   3332     /* Match a single character, casefully */
   3333 
   3334     case OP_CHAR:
   3335 #ifdef SUPPORT_UNICODE
   3336     if (utf)
   3337       {
   3338       length = 1;
   3339       ecode++;
   3340       GETCHARLEN(fc, ecode, length);
   3341       if (length > (PCRE2_SIZE)(mb->end_subject - eptr))
   3342         {
   3343         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
   3344         RRETURN(MATCH_NOMATCH);
   3345         }
   3346       for (; length > 0; length--)
   3347         {
   3348         if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
   3349         }
   3350       }
   3351     else
   3352 #endif
   3353     /* Not UTF mode */
   3354       {
   3355       if (mb->end_subject - eptr < 1)
   3356         {
   3357         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
   3358         RRETURN(MATCH_NOMATCH);
   3359         }
   3360       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
   3361       ecode += 2;
   3362       }
   3363     break;
   3364 
   3365     /* Match a single character, caselessly. If we are at the end of the
   3366     subject, give up immediately. */
   3367 
   3368     case OP_CHARI:
   3369     if (eptr >= mb->end_subject)
   3370       {
   3371       SCHECK_PARTIAL();
   3372       RRETURN(MATCH_NOMATCH);
   3373       }
   3374 
   3375 #ifdef SUPPORT_UNICODE
   3376     if (utf)
   3377       {
   3378       length = 1;
   3379       ecode++;
   3380       GETCHARLEN(fc, ecode, length);
   3381 
   3382       /* If the pattern character's value is < 128, we have only one byte, and
   3383       we know that its other case must also be one byte long, so we can use the
   3384       fast lookup table. We know that there is at least one byte left in the
   3385       subject. */
   3386 
   3387       if (fc < 128)
   3388         {
   3389         uint32_t cc = UCHAR21(eptr);
   3390         if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
   3391         ecode++;
   3392         eptr++;
   3393         }
   3394 
   3395       /* Otherwise we must pick up the subject character. Note that we cannot
   3396       use the value of "length" to check for sufficient bytes left, because the
   3397       other case of the character may have more or fewer bytes.  */
   3398 
   3399       else
   3400         {
   3401         uint32_t dc;
   3402         GETCHARINC(dc, eptr);
   3403         ecode += length;
   3404 
   3405         /* If we have Unicode property support, we can use it to test the other
   3406         case of the character, if there is one. */
   3407 
   3408         if (fc != dc)
   3409           {
   3410 #ifdef SUPPORT_UNICODE
   3411           if (dc != UCD_OTHERCASE(fc))
   3412 #endif
   3413             RRETURN(MATCH_NOMATCH);
   3414           }
   3415         }
   3416       }
   3417     else
   3418 #endif   /* SUPPORT_UNICODE */
   3419 
   3420     /* Not UTF mode */
   3421       {
   3422       if (TABLE_GET(ecode[1], mb->lcc, ecode[1])
   3423           != TABLE_GET(*eptr, mb->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
   3424       eptr++;
   3425       ecode += 2;
   3426       }
   3427     break;
   3428 
   3429     /* Match a single character repeatedly. */
   3430 
   3431     case OP_EXACT:
   3432     case OP_EXACTI:
   3433     min = max = GET2(ecode, 1);
   3434     ecode += 1 + IMM2_SIZE;
   3435     goto REPEATCHAR;
   3436 
   3437     case OP_POSUPTO:
   3438     case OP_POSUPTOI:
   3439     possessive = TRUE;
   3440     /* Fall through */
   3441 
   3442     case OP_UPTO:
   3443     case OP_UPTOI:
   3444     case OP_MINUPTO:
   3445     case OP_MINUPTOI:
   3446     min = 0;
   3447     max = GET2(ecode, 1);
   3448     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
   3449     ecode += 1 + IMM2_SIZE;
   3450     goto REPEATCHAR;
   3451 
   3452     case OP_POSSTAR:
   3453     case OP_POSSTARI:
   3454     possessive = TRUE;
   3455     min = 0;
   3456     max = INT_MAX;
   3457     ecode++;
   3458     goto REPEATCHAR;
   3459 
   3460     case OP_POSPLUS:
   3461     case OP_POSPLUSI:
   3462     possessive = TRUE;
   3463     min = 1;
   3464     max = INT_MAX;
   3465     ecode++;
   3466     goto REPEATCHAR;
   3467 
   3468     case OP_POSQUERY:
   3469     case OP_POSQUERYI:
   3470     possessive = TRUE;
   3471     min = 0;
   3472     max = 1;
   3473     ecode++;
   3474     goto REPEATCHAR;
   3475 
   3476     case OP_STAR:
   3477     case OP_STARI:
   3478     case OP_MINSTAR:
   3479     case OP_MINSTARI:
   3480     case OP_PLUS:
   3481     case OP_PLUSI:
   3482     case OP_MINPLUS:
   3483     case OP_MINPLUSI:
   3484     case OP_QUERY:
   3485     case OP_QUERYI:
   3486     case OP_MINQUERY:
   3487     case OP_MINQUERYI:
   3488     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
   3489     minimize = (c & 1) != 0;
   3490     min = rep_min[c];                 /* Pick up values from tables; */
   3491     max = rep_max[c];                 /* zero for max => infinity */
   3492     if (max == 0) max = INT_MAX;
   3493 
   3494     /* Common code for all repeated single-character matches. We first check
   3495     for the minimum number of characters. If the minimum equals the maximum, we
   3496     are done. Otherwise, if minimizing, check the rest of the pattern for a
   3497     match; if there isn't one, advance up to the maximum, one character at a
   3498     time.
   3499 
   3500     If maximizing, advance up to the maximum number of matching characters,
   3501     until eptr is past the end of the maximum run. If possessive, we are
   3502     then done (no backing up). Otherwise, match at this position; anything
   3503     other than no match is immediately returned. For nomatch, back up one
   3504     character, unless we are matching \R and the last thing matched was
   3505     \r\n, in which case, back up two bytes. When we reach the first optional
   3506     character position, we can save stack by doing a tail recurse.
   3507 
   3508     The various UTF/non-UTF and caseful/caseless cases are handled separately,
   3509     for speed. */
   3510 
   3511     REPEATCHAR:
   3512 #ifdef SUPPORT_UNICODE
   3513     if (utf)
   3514       {
   3515       length = 1;
   3516       charptr = ecode;
   3517       GETCHARLEN(fc, ecode, length);
   3518       ecode += length;
   3519 
   3520       /* Handle multibyte character matching specially here. There is
   3521       support for caseless matching if UCP support is present. */
   3522 
   3523       if (length > 1)
   3524         {
   3525         uint32_t othercase;
   3526         if (op >= OP_STARI &&     /* Caseless */
   3527             (othercase = UCD_OTHERCASE(fc)) != fc)
   3528           oclength = PRIV(ord2utf)(othercase, occhars);
   3529         else oclength = 0;
   3530 
   3531         for (i = 1; i <= min; i++)
   3532           {
   3533           if (eptr <= mb->end_subject - length &&
   3534             memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
   3535           else if (oclength > 0 &&
   3536                    eptr <= mb->end_subject - oclength &&
   3537                    memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
   3538           else
   3539             {
   3540             CHECK_PARTIAL();
   3541             RRETURN(MATCH_NOMATCH);
   3542             }
   3543           }
   3544 
   3545         if (min == max) continue;
   3546 
   3547         if (minimize)
   3548           {
   3549           for (fi = min;; fi++)
   3550             {
   3551             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM22);
   3552             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3553             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3554             if (eptr <= mb->end_subject - length &&
   3555               memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
   3556             else if (oclength > 0 &&
   3557                      eptr <= mb->end_subject - oclength &&
   3558                      memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
   3559             else
   3560               {
   3561               CHECK_PARTIAL();
   3562               RRETURN(MATCH_NOMATCH);
   3563               }
   3564             }
   3565           /* Control never gets here */
   3566           }
   3567 
   3568         else  /* Maximize */
   3569           {
   3570           pp = eptr;
   3571           for (i = min; i < max; i++)
   3572             {
   3573             if (eptr <= mb->end_subject - length &&
   3574                 memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
   3575             else if (oclength > 0 &&
   3576                      eptr <= mb->end_subject - oclength &&
   3577                      memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
   3578             else
   3579               {
   3580               CHECK_PARTIAL();
   3581               break;
   3582               }
   3583             }
   3584 
   3585           if (possessive) continue;    /* No backtracking */
   3586 
   3587           /* After \C in UTF mode, pp might be in the middle of a Unicode
   3588           character. Use <= pp to ensure backtracking doesn't go too far. */
   3589 
   3590           for(;;)
   3591             {
   3592             if (eptr <= pp) goto TAIL_RECURSE;
   3593             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23);
   3594             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3595             eptr--;
   3596             BACKCHAR(eptr);
   3597             }
   3598           }
   3599         /* Control never gets here */
   3600         }
   3601 
   3602       /* If the length of a UTF-8 character is 1, we fall through here, and
   3603       obey the code as for non-UTF-8 characters below, though in this case the
   3604       value of fc will always be < 128. */
   3605       }
   3606     else
   3607 #endif  /* SUPPORT_UNICODE */
   3608 
   3609       /* When not in UTF-8 mode, load a single-byte character. */
   3610       fc = *ecode++;
   3611 
   3612     /* The value of fc at this point is always one character, though we may
   3613     or may not be in UTF mode. The code is duplicated for the caseless and
   3614     caseful cases, for speed, since matching characters is likely to be quite
   3615     common. First, ensure the minimum number of matches are present. If min =
   3616     max, continue at the same level without recursing. Otherwise, if
   3617     minimizing, keep trying the rest of the expression and advancing one
   3618     matching character if failing, up to the maximum. Alternatively, if
   3619     maximizing, find the maximum number of characters and work backwards. */
   3620 
   3621     if (op >= OP_STARI)  /* Caseless */
   3622       {
   3623 #if PCRE2_CODE_UNIT_WIDTH == 8
   3624       /* fc must be < 128 if UTF is enabled. */
   3625       foc = mb->fcc[fc];
   3626 #else
   3627 #ifdef SUPPORT_UNICODE
   3628       if (utf && fc > 127)
   3629         foc = UCD_OTHERCASE(fc);
   3630       else
   3631 #endif /* SUPPORT_UNICODE */
   3632         foc = TABLE_GET(fc, mb->fcc, fc);
   3633 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
   3634 
   3635       for (i = 1; i <= min; i++)
   3636         {
   3637         uint32_t cc;                 /* Faster than PCRE2_UCHAR */
   3638         if (eptr >= mb->end_subject)
   3639           {
   3640           SCHECK_PARTIAL();
   3641           RRETURN(MATCH_NOMATCH);
   3642           }
   3643         cc = UCHAR21TEST(eptr);
   3644         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
   3645         eptr++;
   3646         }
   3647       if (min == max) continue;
   3648       if (minimize)
   3649         {
   3650         for (fi = min;; fi++)
   3651           {
   3652           uint32_t cc;               /* Faster than PCRE2_UCHAR */
   3653           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM24);
   3654           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3655           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3656           if (eptr >= mb->end_subject)
   3657             {
   3658             SCHECK_PARTIAL();
   3659             RRETURN(MATCH_NOMATCH);
   3660             }
   3661           cc = UCHAR21TEST(eptr);
   3662           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
   3663           eptr++;
   3664           }
   3665         /* Control never gets here */
   3666         }
   3667       else  /* Maximize */
   3668         {
   3669         pp = eptr;
   3670         for (i = min; i < max; i++)
   3671           {
   3672           uint32_t cc;               /* Faster than PCRE2_UCHAR */
   3673           if (eptr >= mb->end_subject)
   3674             {
   3675             SCHECK_PARTIAL();
   3676             break;
   3677             }
   3678           cc = UCHAR21TEST(eptr);
   3679           if (fc != cc && foc != cc) break;
   3680           eptr++;
   3681           }
   3682         if (possessive) continue;       /* No backtracking */
   3683         for (;;)
   3684           {
   3685           if (eptr == pp) goto TAIL_RECURSE;
   3686           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM25);
   3687           eptr--;
   3688           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3689           }
   3690         /* Control never gets here */
   3691         }
   3692       }
   3693 
   3694     /* Caseful comparisons (includes all multi-byte characters) */
   3695 
   3696     else
   3697       {
   3698       for (i = 1; i <= min; i++)
   3699         {
   3700         if (eptr >= mb->end_subject)
   3701           {
   3702           SCHECK_PARTIAL();
   3703           RRETURN(MATCH_NOMATCH);
   3704           }
   3705         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
   3706         }
   3707 
   3708       if (min == max) continue;
   3709 
   3710       if (minimize)
   3711         {
   3712         for (fi = min;; fi++)
   3713           {
   3714           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM26);
   3715           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3716           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3717           if (eptr >= mb->end_subject)
   3718             {
   3719             SCHECK_PARTIAL();
   3720             RRETURN(MATCH_NOMATCH);
   3721             }
   3722           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
   3723           }
   3724         /* Control never gets here */
   3725         }
   3726       else  /* Maximize */
   3727         {
   3728         pp = eptr;
   3729         for (i = min; i < max; i++)
   3730           {
   3731           if (eptr >= mb->end_subject)
   3732             {
   3733             SCHECK_PARTIAL();
   3734             break;
   3735             }
   3736           if (fc != UCHAR21TEST(eptr)) break;
   3737           eptr++;
   3738           }
   3739         if (possessive) continue;    /* No backtracking */
   3740         for (;;)
   3741           {
   3742           if (eptr == pp) goto TAIL_RECURSE;
   3743           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM27);
   3744           eptr--;
   3745           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3746           }
   3747         /* Control never gets here */
   3748         }
   3749       }
   3750     /* Control never gets here */
   3751 
   3752     /* Match a negated single one-byte character. The character we are
   3753     checking can be multibyte. */
   3754 
   3755     case OP_NOT:
   3756     case OP_NOTI:
   3757     if (eptr >= mb->end_subject)
   3758       {
   3759       SCHECK_PARTIAL();
   3760       RRETURN(MATCH_NOMATCH);
   3761       }
   3762 #ifdef SUPPORT_UNICODE
   3763     if (utf)
   3764       {
   3765       register uint32_t ch, och;
   3766 
   3767       ecode++;
   3768       GETCHARINC(ch, ecode);
   3769       GETCHARINC(c, eptr);
   3770 
   3771       if (op == OP_NOT)
   3772         {
   3773         if (ch == c) RRETURN(MATCH_NOMATCH);
   3774         }
   3775       else
   3776         {
   3777         if (ch > 127)
   3778           och = UCD_OTHERCASE(ch);
   3779         else
   3780           och = TABLE_GET(ch, mb->fcc, ch);
   3781         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
   3782         }
   3783       }
   3784     else
   3785 #endif  /* SUPPORT_UNICODE */
   3786       {
   3787       register uint32_t ch = ecode[1];
   3788       c = *eptr++;
   3789       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == c))
   3790         RRETURN(MATCH_NOMATCH);
   3791       ecode += 2;
   3792       }
   3793     break;
   3794 
   3795     /* Match a negated single one-byte character repeatedly. This is almost a
   3796     repeat of the code for a repeated single character, but I haven't found a
   3797     nice way of commoning these up that doesn't require a test of the
   3798     positive/negative option for each character match. Maybe that wouldn't add
   3799     very much to the time taken, but character matching *is* what this is all
   3800     about... */
   3801 
   3802     case OP_NOTEXACT:
   3803     case OP_NOTEXACTI:
   3804     min = max = GET2(ecode, 1);
   3805     ecode += 1 + IMM2_SIZE;
   3806     goto REPEATNOTCHAR;
   3807 
   3808     case OP_NOTUPTO:
   3809     case OP_NOTUPTOI:
   3810     case OP_NOTMINUPTO:
   3811     case OP_NOTMINUPTOI:
   3812     min = 0;
   3813     max = GET2(ecode, 1);
   3814     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
   3815     ecode += 1 + IMM2_SIZE;
   3816     goto REPEATNOTCHAR;
   3817 
   3818     case OP_NOTPOSSTAR:
   3819     case OP_NOTPOSSTARI:
   3820     possessive = TRUE;
   3821     min = 0;
   3822     max = INT_MAX;
   3823     ecode++;
   3824     goto REPEATNOTCHAR;
   3825 
   3826     case OP_NOTPOSPLUS:
   3827     case OP_NOTPOSPLUSI:
   3828     possessive = TRUE;
   3829     min = 1;
   3830     max = INT_MAX;
   3831     ecode++;
   3832     goto REPEATNOTCHAR;
   3833 
   3834     case OP_NOTPOSQUERY:
   3835     case OP_NOTPOSQUERYI:
   3836     possessive = TRUE;
   3837     min = 0;
   3838     max = 1;
   3839     ecode++;
   3840     goto REPEATNOTCHAR;
   3841 
   3842     case OP_NOTPOSUPTO:
   3843     case OP_NOTPOSUPTOI:
   3844     possessive = TRUE;
   3845     min = 0;
   3846     max = GET2(ecode, 1);
   3847     ecode += 1 + IMM2_SIZE;
   3848     goto REPEATNOTCHAR;
   3849 
   3850     case OP_NOTSTAR:
   3851     case OP_NOTSTARI:
   3852     case OP_NOTMINSTAR:
   3853     case OP_NOTMINSTARI:
   3854     case OP_NOTPLUS:
   3855     case OP_NOTPLUSI:
   3856     case OP_NOTMINPLUS:
   3857     case OP_NOTMINPLUSI:
   3858     case OP_NOTQUERY:
   3859     case OP_NOTQUERYI:
   3860     case OP_NOTMINQUERY:
   3861     case OP_NOTMINQUERYI:
   3862     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
   3863     minimize = (c & 1) != 0;
   3864     min = rep_min[c];                 /* Pick up values from tables; */
   3865     max = rep_max[c];                 /* zero for max => infinity */
   3866     if (max == 0) max = INT_MAX;
   3867 
   3868     /* Common code for all repeated single-byte matches. */
   3869 
   3870     REPEATNOTCHAR:
   3871     GETCHARINCTEST(fc, ecode);
   3872 
   3873     /* The code is duplicated for the caseless and caseful cases, for speed,
   3874     since matching characters is likely to be quite common. First, ensure the
   3875     minimum number of matches are present. If min = max, continue at the same
   3876     level without recursing. Otherwise, if minimizing, keep trying the rest of
   3877     the expression and advancing one matching character if failing, up to the
   3878     maximum. Alternatively, if maximizing, find the maximum number of
   3879     characters and work backwards. */
   3880 
   3881     if (op >= OP_NOTSTARI)     /* Caseless */
   3882       {
   3883 #ifdef SUPPORT_UNICODE
   3884       if (utf && fc > 127)
   3885         foc = UCD_OTHERCASE(fc);
   3886       else
   3887 #endif /* SUPPORT_UNICODE */
   3888         foc = TABLE_GET(fc, mb->fcc, fc);
   3889 
   3890 #ifdef SUPPORT_UNICODE
   3891       if (utf)
   3892         {
   3893         register uint32_t d;
   3894         for (i = 1; i <= min; i++)
   3895           {
   3896           if (eptr >= mb->end_subject)
   3897             {
   3898             SCHECK_PARTIAL();
   3899             RRETURN(MATCH_NOMATCH);
   3900             }
   3901           GETCHARINC(d, eptr);
   3902           if (fc == d || (uint32_t)foc == d) RRETURN(MATCH_NOMATCH);
   3903           }
   3904         }
   3905       else
   3906 #endif  /* SUPPORT_UNICODE */
   3907       /* Not UTF mode */
   3908         {
   3909         for (i = 1; i <= min; i++)
   3910           {
   3911           if (eptr >= mb->end_subject)
   3912             {
   3913             SCHECK_PARTIAL();
   3914             RRETURN(MATCH_NOMATCH);
   3915             }
   3916           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
   3917           eptr++;
   3918           }
   3919         }
   3920 
   3921       if (min == max) continue;
   3922 
   3923       if (minimize)
   3924         {
   3925 #ifdef SUPPORT_UNICODE
   3926         if (utf)
   3927           {
   3928           register uint32_t d;
   3929           for (fi = min;; fi++)
   3930             {
   3931             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM28);
   3932             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3933             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3934             if (eptr >= mb->end_subject)
   3935               {
   3936               SCHECK_PARTIAL();
   3937               RRETURN(MATCH_NOMATCH);
   3938               }
   3939             GETCHARINC(d, eptr);
   3940             if (fc == d || (uint32_t)foc == d) RRETURN(MATCH_NOMATCH);
   3941             }
   3942           }
   3943         else
   3944 #endif  /*SUPPORT_UNICODE */
   3945         /* Not UTF mode */
   3946           {
   3947           for (fi = min;; fi++)
   3948             {
   3949             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM29);
   3950             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3951             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3952             if (eptr >= mb->end_subject)
   3953               {
   3954               SCHECK_PARTIAL();
   3955               RRETURN(MATCH_NOMATCH);
   3956               }
   3957             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
   3958             eptr++;
   3959             }
   3960           }
   3961         /* Control never gets here */
   3962         }
   3963 
   3964       /* Maximize case */
   3965 
   3966       else
   3967         {
   3968         pp = eptr;
   3969 
   3970 #ifdef SUPPORT_UNICODE
   3971         if (utf)
   3972           {
   3973           register uint32_t d;
   3974           for (i = min; i < max; i++)
   3975             {
   3976             int len = 1;
   3977             if (eptr >= mb->end_subject)
   3978               {
   3979               SCHECK_PARTIAL();
   3980               break;
   3981               }
   3982             GETCHARLEN(d, eptr, len);
   3983             if (fc == d || (uint32_t)foc == d) break;
   3984             eptr += len;
   3985             }
   3986           if (possessive) continue;    /* No backtracking */
   3987 
   3988           /* After \C in UTF mode, pp might be in the middle of a Unicode
   3989           character. Use <= pp to ensure backtracking doesn't go too far. */
   3990 
   3991           for(;;)
   3992             {
   3993             if (eptr <= pp) goto TAIL_RECURSE;
   3994             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30);
   3995             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3996             eptr--;
   3997             BACKCHAR(eptr);
   3998             }
   3999           }
   4000         else
   4001 #endif  /* SUPPORT_UNICODE */
   4002         /* Not UTF mode */
   4003           {
   4004           for (i = min; i < max; i++)
   4005             {
   4006             if (eptr >= mb->end_subject)
   4007               {
   4008               SCHECK_PARTIAL();
   4009               break;
   4010               }
   4011             if (fc == *eptr || foc == *eptr) break;
   4012             eptr++;
   4013             }
   4014           if (possessive) continue;    /* No backtracking */
   4015           for (;;)
   4016             {
   4017             if (eptr == pp) goto TAIL_RECURSE;
   4018             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM31);
   4019             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4020             eptr--;
   4021             }
   4022           }
   4023         /* Control never gets here */
   4024         }
   4025       }
   4026 
   4027     /* Caseful comparisons */
   4028 
   4029     else
   4030       {
   4031 #ifdef SUPPORT_UNICODE
   4032       if (utf)
   4033         {
   4034         register uint32_t d;
   4035         for (i = 1; i <= min; i++)
   4036           {
   4037           if (eptr >= mb->end_subject)
   4038             {
   4039             SCHECK_PARTIAL();
   4040             RRETURN(MATCH_NOMATCH);
   4041             }
   4042           GETCHARINC(d, eptr);
   4043           if (fc == d) RRETURN(MATCH_NOMATCH);
   4044           }
   4045         }
   4046       else
   4047 #endif
   4048       /* Not UTF mode */
   4049         {
   4050         for (i = 1; i <= min; i++)
   4051           {
   4052           if (eptr >= mb->end_subject)
   4053             {
   4054             SCHECK_PARTIAL();
   4055             RRETURN(MATCH_NOMATCH);
   4056             }
   4057           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
   4058           }
   4059         }
   4060 
   4061       if (min == max) continue;
   4062 
   4063       if (minimize)
   4064         {
   4065 #ifdef SUPPORT_UNICODE
   4066         if (utf)
   4067           {
   4068           register uint32_t d;
   4069           for (fi = min;; fi++)
   4070             {
   4071             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM32);
   4072             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4073             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4074             if (eptr >= mb->end_subject)
   4075               {
   4076               SCHECK_PARTIAL();
   4077               RRETURN(MATCH_NOMATCH);
   4078               }
   4079             GETCHARINC(d, eptr);
   4080             if (fc == d) RRETURN(MATCH_NOMATCH);
   4081             }
   4082           }
   4083         else
   4084 #endif
   4085         /* Not UTF mode */
   4086           {
   4087           for (fi = min;; fi++)
   4088             {
   4089             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM33);
   4090             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4091             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4092             if (eptr >= mb->end_subject)
   4093               {
   4094               SCHECK_PARTIAL();
   4095               RRETURN(MATCH_NOMATCH);
   4096               }
   4097             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
   4098             }
   4099           }
   4100         /* Control never gets here */
   4101         }
   4102 
   4103       /* Maximize case */
   4104 
   4105       else
   4106         {
   4107         pp = eptr;
   4108 
   4109 #ifdef SUPPORT_UNICODE
   4110         if (utf)
   4111           {
   4112           register uint32_t d;
   4113           for (i = min; i < max; i++)
   4114             {
   4115             int len = 1;
   4116             if (eptr >= mb->end_subject)
   4117               {
   4118               SCHECK_PARTIAL();
   4119               break;
   4120               }
   4121             GETCHARLEN(d, eptr, len);
   4122             if (fc == d) break;
   4123             eptr += len;
   4124             }
   4125           if (possessive) continue;    /* No backtracking */
   4126 
   4127           /* After \C in UTF mode, pp might be in the middle of a Unicode
   4128           character. Use <= pp to ensure backtracking doesn't go too far. */
   4129 
   4130           for(;;)
   4131             {
   4132             if (eptr <= pp) goto TAIL_RECURSE;
   4133             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34);
   4134             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4135             eptr--;
   4136             BACKCHAR(eptr);
   4137             }
   4138           }
   4139         else
   4140 #endif
   4141         /* Not UTF mode */
   4142           {
   4143           for (i = min; i < max; i++)
   4144             {
   4145             if (eptr >= mb->end_subject)
   4146               {
   4147               SCHECK_PARTIAL();
   4148               break;
   4149               }
   4150             if (fc == *eptr) break;
   4151             eptr++;
   4152             }
   4153           if (possessive) continue;    /* No backtracking */
   4154           for (;;)
   4155             {
   4156             if (eptr == pp) goto TAIL_RECURSE;
   4157             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM35);
   4158             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4159             eptr--;
   4160             }
   4161           }
   4162         /* Control never gets here */
   4163         }
   4164       }
   4165     /* Control never gets here */
   4166 
   4167     /* Match a single character type repeatedly; several different opcodes
   4168     share code. This is very similar to the code for single characters, but we
   4169     repeat it in the interests of efficiency. */
   4170 
   4171     case OP_TYPEEXACT:
   4172     min = max = GET2(ecode, 1);
   4173     minimize = TRUE;
   4174     ecode += 1 + IMM2_SIZE;
   4175     goto REPEATTYPE;
   4176 
   4177     case OP_TYPEUPTO:
   4178     case OP_TYPEMINUPTO:
   4179     min = 0;
   4180     max = GET2(ecode, 1);
   4181     minimize = *ecode == OP_TYPEMINUPTO;
   4182     ecode += 1 + IMM2_SIZE;
   4183     goto REPEATTYPE;
   4184 
   4185     case OP_TYPEPOSSTAR:
   4186     possessive = TRUE;
   4187     min = 0;
   4188     max = INT_MAX;
   4189     ecode++;
   4190     goto REPEATTYPE;
   4191 
   4192     case OP_TYPEPOSPLUS:
   4193     possessive = TRUE;
   4194     min = 1;
   4195     max = INT_MAX;
   4196     ecode++;
   4197     goto REPEATTYPE;
   4198 
   4199     case OP_TYPEPOSQUERY:
   4200     possessive = TRUE;
   4201     min = 0;
   4202     max = 1;
   4203     ecode++;
   4204     goto REPEATTYPE;
   4205 
   4206     case OP_TYPEPOSUPTO:
   4207     possessive = TRUE;
   4208     min = 0;
   4209     max = GET2(ecode, 1);
   4210     ecode += 1 + IMM2_SIZE;
   4211     goto REPEATTYPE;
   4212 
   4213     case OP_TYPESTAR:
   4214     case OP_TYPEMINSTAR:
   4215     case OP_TYPEPLUS:
   4216     case OP_TYPEMINPLUS:
   4217     case OP_TYPEQUERY:
   4218     case OP_TYPEMINQUERY:
   4219     c = *ecode++ - OP_TYPESTAR;
   4220     minimize = (c & 1) != 0;
   4221     min = rep_min[c];                 /* Pick up values from tables; */
   4222     max = rep_max[c];                 /* zero for max => infinity */
   4223     if (max == 0) max = INT_MAX;
   4224 
   4225     /* Common code for all repeated single character type matches. Note that
   4226     in UTF-8 mode, '.' matches a character of any length, but for the other
   4227     character types, the valid characters are all one-byte long. */
   4228 
   4229     REPEATTYPE:
   4230     ctype = *ecode++;      /* Code for the character type */
   4231 
   4232 #ifdef SUPPORT_UNICODE
   4233     if (ctype == OP_PROP || ctype == OP_NOTPROP)
   4234       {
   4235       prop_fail_result = ctype == OP_NOTPROP;
   4236       prop_type = *ecode++;
   4237       prop_value = *ecode++;
   4238       }
   4239     else prop_type = -1;
   4240 #endif
   4241 
   4242     /* First, ensure the minimum number of matches are present. Use inline
   4243     code for maximizing the speed, and do the type test once at the start
   4244     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
   4245     is tidier. Also separate the UCP code, which can be the same for both UTF-8
   4246     and single-bytes. */
   4247 
   4248     if (min > 0)
   4249       {
   4250 #ifdef SUPPORT_UNICODE
   4251       if (prop_type >= 0)
   4252         {
   4253         switch(prop_type)
   4254           {
   4255           case PT_ANY:
   4256           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4257           for (i = 1; i <= min; i++)
   4258             {
   4259             if (eptr >= mb->end_subject)
   4260               {
   4261               SCHECK_PARTIAL();
   4262               RRETURN(MATCH_NOMATCH);
   4263               }
   4264             GETCHARINCTEST(c, eptr);
   4265             }
   4266           break;
   4267 
   4268           case PT_LAMP:
   4269           for (i = 1; i <= min; i++)
   4270             {
   4271             int chartype;
   4272             if (eptr >= mb->end_subject)
   4273               {
   4274               SCHECK_PARTIAL();
   4275               RRETURN(MATCH_NOMATCH);
   4276               }
   4277             GETCHARINCTEST(c, eptr);
   4278             chartype = UCD_CHARTYPE(c);
   4279             if ((chartype == ucp_Lu ||
   4280                  chartype == ucp_Ll ||
   4281                  chartype == ucp_Lt) == prop_fail_result)
   4282               RRETURN(MATCH_NOMATCH);
   4283             }
   4284           break;
   4285 
   4286           case PT_GC:
   4287           for (i = 1; i <= min; i++)
   4288             {
   4289             if (eptr >= mb->end_subject)
   4290               {
   4291               SCHECK_PARTIAL();
   4292               RRETURN(MATCH_NOMATCH);
   4293               }
   4294             GETCHARINCTEST(c, eptr);
   4295             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
   4296               RRETURN(MATCH_NOMATCH);
   4297             }
   4298           break;
   4299 
   4300           case PT_PC:
   4301           for (i = 1; i <= min; i++)
   4302             {
   4303             if (eptr >= mb->end_subject)
   4304               {
   4305               SCHECK_PARTIAL();
   4306               RRETURN(MATCH_NOMATCH);
   4307               }
   4308             GETCHARINCTEST(c, eptr);
   4309             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
   4310               RRETURN(MATCH_NOMATCH);
   4311             }
   4312           break;
   4313 
   4314           case PT_SC:
   4315           for (i = 1; i <= min; i++)
   4316             {
   4317             if (eptr >= mb->end_subject)
   4318               {
   4319               SCHECK_PARTIAL();
   4320               RRETURN(MATCH_NOMATCH);
   4321               }
   4322             GETCHARINCTEST(c, eptr);
   4323             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
   4324               RRETURN(MATCH_NOMATCH);
   4325             }
   4326           break;
   4327 
   4328           case PT_ALNUM:
   4329           for (i = 1; i <= min; i++)
   4330             {
   4331             int category;
   4332             if (eptr >= mb->end_subject)
   4333               {
   4334               SCHECK_PARTIAL();
   4335               RRETURN(MATCH_NOMATCH);
   4336               }
   4337             GETCHARINCTEST(c, eptr);
   4338             category = UCD_CATEGORY(c);
   4339             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   4340               RRETURN(MATCH_NOMATCH);
   4341             }
   4342           break;
   4343 
   4344           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   4345           which means that Perl space and POSIX space are now identical. PCRE
   4346           was changed at release 8.34. */
   4347 
   4348           case PT_SPACE:    /* Perl space */
   4349           case PT_PXSPACE:  /* POSIX space */
   4350           for (i = 1; i <= min; i++)
   4351             {
   4352             if (eptr >= mb->end_subject)
   4353               {
   4354               SCHECK_PARTIAL();
   4355               RRETURN(MATCH_NOMATCH);
   4356               }
   4357             GETCHARINCTEST(c, eptr);
   4358             switch(c)
   4359               {
   4360               HSPACE_CASES:
   4361               VSPACE_CASES:
   4362               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4363               break;
   4364 
   4365               default:
   4366               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   4367                 RRETURN(MATCH_NOMATCH);
   4368               break;
   4369               }
   4370             }
   4371           break;
   4372 
   4373           case PT_WORD:
   4374           for (i = 1; i <= min; i++)
   4375             {
   4376             int category;
   4377             if (eptr >= mb->end_subject)
   4378               {
   4379               SCHECK_PARTIAL();
   4380               RRETURN(MATCH_NOMATCH);
   4381               }
   4382             GETCHARINCTEST(c, eptr);
   4383             category = UCD_CATEGORY(c);
   4384             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
   4385                    == prop_fail_result)
   4386               RRETURN(MATCH_NOMATCH);
   4387             }
   4388           break;
   4389 
   4390           case PT_CLIST:
   4391           for (i = 1; i <= min; i++)
   4392             {
   4393             const uint32_t *cp;
   4394             if (eptr >= mb->end_subject)
   4395               {
   4396               SCHECK_PARTIAL();
   4397               RRETURN(MATCH_NOMATCH);
   4398               }
   4399             GETCHARINCTEST(c, eptr);
   4400             cp = PRIV(ucd_caseless_sets) + prop_value;
   4401             for (;;)
   4402               {
   4403               if (c < *cp)
   4404                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
   4405               if (c == *cp++)
   4406                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
   4407               }
   4408             }
   4409           break;
   4410 
   4411           case PT_UCNC:
   4412           for (i = 1; i <= min; i++)
   4413             {
   4414             if (eptr >= mb->end_subject)
   4415               {
   4416               SCHECK_PARTIAL();
   4417               RRETURN(MATCH_NOMATCH);
   4418               }
   4419             GETCHARINCTEST(c, eptr);
   4420             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   4421                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   4422                  c >= 0xe000) == prop_fail_result)
   4423               RRETURN(MATCH_NOMATCH);
   4424             }
   4425           break;
   4426 
   4427           /* This should not occur */
   4428 
   4429           default:
   4430           RRETURN(PCRE2_ERROR_INTERNAL);
   4431           }
   4432         }
   4433 
   4434       /* Match extended Unicode sequences. We will get here only if the
   4435       support is in the binary; otherwise a compile-time error occurs. */
   4436 
   4437       else if (ctype == OP_EXTUNI)
   4438         {
   4439         for (i = 1; i <= min; i++)
   4440           {
   4441           if (eptr >= mb->end_subject)
   4442             {
   4443             SCHECK_PARTIAL();
   4444             RRETURN(MATCH_NOMATCH);
   4445             }
   4446           else
   4447             {
   4448             int lgb, rgb;
   4449             GETCHARINCTEST(c, eptr);
   4450             lgb = UCD_GRAPHBREAK(c);
   4451            while (eptr < mb->end_subject)
   4452               {
   4453               int len = 1;
   4454               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   4455               rgb = UCD_GRAPHBREAK(c);
   4456               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   4457               lgb = rgb;
   4458               eptr += len;
   4459               }
   4460             }
   4461           CHECK_PARTIAL();
   4462           }
   4463         }
   4464 
   4465       else
   4466 #endif     /* SUPPORT_UNICODE */
   4467 
   4468 /* Handle all other cases when the coding is UTF-8 */
   4469 
   4470 #ifdef SUPPORT_UNICODE
   4471       if (utf) switch(ctype)
   4472         {
   4473         case OP_ANY:
   4474         for (i = 1; i <= min; i++)
   4475           {
   4476           if (eptr >= mb->end_subject)
   4477             {
   4478             SCHECK_PARTIAL();
   4479             RRETURN(MATCH_NOMATCH);
   4480             }
   4481           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   4482           if (mb->partial != 0 &&
   4483               eptr + 1 >= mb->end_subject &&
   4484               NLBLOCK->nltype == NLTYPE_FIXED &&
   4485               NLBLOCK->nllen == 2 &&
   4486               UCHAR21(eptr) == NLBLOCK->nl[0])
   4487             {
   4488             mb->hitend = TRUE;
   4489             if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   4490             }
   4491           eptr++;
   4492           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   4493           }
   4494         break;
   4495 
   4496         case OP_ALLANY:
   4497         for (i = 1; i <= min; i++)
   4498           {
   4499           if (eptr >= mb->end_subject)
   4500             {
   4501             SCHECK_PARTIAL();
   4502             RRETURN(MATCH_NOMATCH);
   4503             }
   4504           eptr++;
   4505           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   4506           }
   4507         break;
   4508 
   4509         case OP_ANYBYTE:
   4510         if (eptr > mb->end_subject - min) RRETURN(MATCH_NOMATCH);
   4511         eptr += min;
   4512         break;
   4513 
   4514         case OP_ANYNL:
   4515         for (i = 1; i <= min; i++)
   4516           {
   4517           if (eptr >= mb->end_subject)
   4518             {
   4519             SCHECK_PARTIAL();
   4520             RRETURN(MATCH_NOMATCH);
   4521             }
   4522           GETCHARINC(c, eptr);
   4523           switch(c)
   4524             {
   4525             default: RRETURN(MATCH_NOMATCH);
   4526 
   4527             case CHAR_CR:
   4528             if (eptr < mb->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
   4529             break;
   4530 
   4531             case CHAR_LF:
   4532             break;
   4533 
   4534             case CHAR_VT:
   4535             case CHAR_FF:
   4536             case CHAR_NEL:
   4537 #ifndef EBCDIC
   4538             case 0x2028:
   4539             case 0x2029:
   4540 #endif  /* Not EBCDIC */
   4541             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
   4542             break;
   4543             }
   4544           }
   4545         break;
   4546 
   4547         case OP_NOT_HSPACE:
   4548         for (i = 1; i <= min; i++)
   4549           {
   4550           if (eptr >= mb->end_subject)
   4551             {
   4552             SCHECK_PARTIAL();
   4553             RRETURN(MATCH_NOMATCH);
   4554             }
   4555           GETCHARINC(c, eptr);
   4556           switch(c)
   4557             {
   4558             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
   4559             default: break;
   4560             }
   4561           }
   4562         break;
   4563 
   4564         case OP_HSPACE:
   4565         for (i = 1; i <= min; i++)
   4566           {
   4567           if (eptr >= mb->end_subject)
   4568             {
   4569             SCHECK_PARTIAL();
   4570             RRETURN(MATCH_NOMATCH);
   4571             }
   4572           GETCHARINC(c, eptr);
   4573           switch(c)
   4574             {
   4575             HSPACE_CASES: break;  /* Byte and multibyte cases */
   4576             default: RRETURN(MATCH_NOMATCH);
   4577             }
   4578           }
   4579         break;
   4580 
   4581         case OP_NOT_VSPACE:
   4582         for (i = 1; i <= min; i++)
   4583           {
   4584           if (eptr >= mb->end_subject)
   4585             {
   4586             SCHECK_PARTIAL();
   4587             RRETURN(MATCH_NOMATCH);
   4588             }
   4589           GETCHARINC(c, eptr);
   4590           switch(c)
   4591             {
   4592             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   4593             default: break;
   4594             }
   4595           }
   4596         break;
   4597 
   4598         case OP_VSPACE:
   4599         for (i = 1; i <= min; i++)
   4600           {
   4601           if (eptr >= mb->end_subject)
   4602             {
   4603             SCHECK_PARTIAL();
   4604             RRETURN(MATCH_NOMATCH);
   4605             }
   4606           GETCHARINC(c, eptr);
   4607           switch(c)
   4608             {
   4609             VSPACE_CASES: break;
   4610             default: RRETURN(MATCH_NOMATCH);
   4611             }
   4612           }
   4613         break;
   4614 
   4615         case OP_NOT_DIGIT:
   4616         for (i = 1; i <= min; i++)
   4617           {
   4618           if (eptr >= mb->end_subject)
   4619             {
   4620             SCHECK_PARTIAL();
   4621             RRETURN(MATCH_NOMATCH);
   4622             }
   4623           GETCHARINC(c, eptr);
   4624           if (c < 128 && (mb->ctypes[c] & ctype_digit) != 0)
   4625             RRETURN(MATCH_NOMATCH);
   4626           }
   4627         break;
   4628 
   4629         case OP_DIGIT:
   4630         for (i = 1; i <= min; i++)
   4631           {
   4632           uint32_t cc;
   4633           if (eptr >= mb->end_subject)
   4634             {
   4635             SCHECK_PARTIAL();
   4636             RRETURN(MATCH_NOMATCH);
   4637             }
   4638           cc = UCHAR21(eptr);
   4639           if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
   4640             RRETURN(MATCH_NOMATCH);
   4641           eptr++;
   4642           /* No need to skip more bytes - we know it's a 1-byte character */
   4643           }
   4644         break;
   4645 
   4646         case OP_NOT_WHITESPACE:
   4647         for (i = 1; i <= min; i++)
   4648           {
   4649           uint32_t cc;
   4650           if (eptr >= mb->end_subject)
   4651             {
   4652             SCHECK_PARTIAL();
   4653             RRETURN(MATCH_NOMATCH);
   4654             }
   4655           cc = UCHAR21(eptr);
   4656           if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
   4657             RRETURN(MATCH_NOMATCH);
   4658           eptr++;
   4659           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   4660           }
   4661         break;
   4662 
   4663         case OP_WHITESPACE:
   4664         for (i = 1; i <= min; i++)
   4665           {
   4666           uint32_t cc;
   4667           if (eptr >= mb->end_subject)
   4668             {
   4669             SCHECK_PARTIAL();
   4670             RRETURN(MATCH_NOMATCH);
   4671             }
   4672           cc = UCHAR21(eptr);
   4673           if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
   4674             RRETURN(MATCH_NOMATCH);
   4675           eptr++;
   4676           /* No need to skip more bytes - we know it's a 1-byte character */
   4677           }
   4678         break;
   4679 
   4680         case OP_NOT_WORDCHAR:
   4681         for (i = 1; i <= min; i++)
   4682           {
   4683           uint32_t cc;
   4684           if (eptr >= mb->end_subject)
   4685             {
   4686             SCHECK_PARTIAL();
   4687             RRETURN(MATCH_NOMATCH);
   4688             }
   4689           cc = UCHAR21(eptr);
   4690           if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
   4691             RRETURN(MATCH_NOMATCH);
   4692           eptr++;
   4693           ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   4694           }
   4695         break;
   4696 
   4697         case OP_WORDCHAR:
   4698         for (i = 1; i <= min; i++)
   4699           {
   4700           uint32_t cc;
   4701           if (eptr >= mb->end_subject)
   4702             {
   4703             SCHECK_PARTIAL();
   4704             RRETURN(MATCH_NOMATCH);
   4705             }
   4706           cc = UCHAR21(eptr);
   4707           if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
   4708             RRETURN(MATCH_NOMATCH);
   4709           eptr++;
   4710           /* No need to skip more bytes - we know it's a 1-byte character */
   4711           }
   4712         break;
   4713 
   4714         default:
   4715         RRETURN(PCRE2_ERROR_INTERNAL);
   4716         }  /* End switch(ctype) */
   4717 
   4718       else
   4719 #endif     /* SUPPORT_UNICODE */
   4720 
   4721       /* Code for the non-UTF-8 case for minimum matching of operators other
   4722       than OP_PROP and OP_NOTPROP. */
   4723 
   4724       switch(ctype)
   4725         {
   4726         case OP_ANY:
   4727         for (i = 1; i <= min; i++)
   4728           {
   4729           if (eptr >= mb->end_subject)
   4730             {
   4731             SCHECK_PARTIAL();
   4732             RRETURN(MATCH_NOMATCH);
   4733             }
   4734           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   4735           if (mb->partial != 0 &&
   4736               eptr + 1 >= mb->end_subject &&
   4737               NLBLOCK->nltype == NLTYPE_FIXED &&
   4738               NLBLOCK->nllen == 2 &&
   4739               *eptr == NLBLOCK->nl[0])
   4740             {
   4741             mb->hitend = TRUE;
   4742             if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   4743             }
   4744           eptr++;
   4745           }
   4746         break;
   4747 
   4748         case OP_ALLANY:
   4749         if (eptr > mb->end_subject - min)
   4750           {
   4751           SCHECK_PARTIAL();
   4752           RRETURN(MATCH_NOMATCH);
   4753           }
   4754         eptr += min;
   4755         break;
   4756 
   4757         case OP_ANYBYTE:
   4758         if (eptr > mb->end_subject - min)
   4759           {
   4760           SCHECK_PARTIAL();
   4761           RRETURN(MATCH_NOMATCH);
   4762           }
   4763         eptr += min;
   4764         break;
   4765 
   4766         case OP_ANYNL:
   4767         for (i = 1; i <= min; i++)
   4768           {
   4769           if (eptr >= mb->end_subject)
   4770             {
   4771             SCHECK_PARTIAL();
   4772             RRETURN(MATCH_NOMATCH);
   4773             }
   4774           switch(*eptr++)
   4775             {
   4776             default: RRETURN(MATCH_NOMATCH);
   4777 
   4778             case CHAR_CR:
   4779             if (eptr < mb->end_subject && *eptr == CHAR_LF) eptr++;
   4780             break;
   4781 
   4782             case CHAR_LF:
   4783             break;
   4784 
   4785             case CHAR_VT:
   4786             case CHAR_FF:
   4787             case CHAR_NEL:
   4788 #if PCRE2_CODE_UNIT_WIDTH != 8
   4789             case 0x2028:
   4790             case 0x2029:
   4791 #endif
   4792             if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
   4793             break;
   4794             }
   4795           }
   4796         break;
   4797 
   4798         case OP_NOT_HSPACE:
   4799         for (i = 1; i <= min; i++)
   4800           {
   4801           if (eptr >= mb->end_subject)
   4802             {
   4803             SCHECK_PARTIAL();
   4804             RRETURN(MATCH_NOMATCH);
   4805             }
   4806           switch(*eptr++)
   4807             {
   4808             default: break;
   4809             HSPACE_BYTE_CASES:
   4810 #if PCRE2_CODE_UNIT_WIDTH != 8
   4811             HSPACE_MULTIBYTE_CASES:
   4812 #endif
   4813             RRETURN(MATCH_NOMATCH);
   4814             }
   4815           }
   4816         break;
   4817 
   4818         case OP_HSPACE:
   4819         for (i = 1; i <= min; i++)
   4820           {
   4821           if (eptr >= mb->end_subject)
   4822             {
   4823             SCHECK_PARTIAL();
   4824             RRETURN(MATCH_NOMATCH);
   4825             }
   4826           switch(*eptr++)
   4827             {
   4828             default: RRETURN(MATCH_NOMATCH);
   4829             HSPACE_BYTE_CASES:
   4830 #if PCRE2_CODE_UNIT_WIDTH != 8
   4831             HSPACE_MULTIBYTE_CASES:
   4832 #endif
   4833             break;
   4834             }
   4835           }
   4836         break;
   4837 
   4838         case OP_NOT_VSPACE:
   4839         for (i = 1; i <= min; i++)
   4840           {
   4841           if (eptr >= mb->end_subject)
   4842             {
   4843             SCHECK_PARTIAL();
   4844             RRETURN(MATCH_NOMATCH);
   4845             }
   4846           switch(*eptr++)
   4847             {
   4848             VSPACE_BYTE_CASES:
   4849 #if PCRE2_CODE_UNIT_WIDTH != 8
   4850             VSPACE_MULTIBYTE_CASES:
   4851 #endif
   4852             RRETURN(MATCH_NOMATCH);
   4853             default: break;
   4854             }
   4855           }
   4856         break;
   4857 
   4858         case OP_VSPACE:
   4859         for (i = 1; i <= min; i++)
   4860           {
   4861           if (eptr >= mb->end_subject)
   4862             {
   4863             SCHECK_PARTIAL();
   4864             RRETURN(MATCH_NOMATCH);
   4865             }
   4866           switch(*eptr++)
   4867             {
   4868             default: RRETURN(MATCH_NOMATCH);
   4869             VSPACE_BYTE_CASES:
   4870 #if PCRE2_CODE_UNIT_WIDTH != 8
   4871             VSPACE_MULTIBYTE_CASES:
   4872 #endif
   4873             break;
   4874             }
   4875           }
   4876         break;
   4877 
   4878         case OP_NOT_DIGIT:
   4879         for (i = 1; i <= min; i++)
   4880           {
   4881           if (eptr >= mb->end_subject)
   4882             {
   4883             SCHECK_PARTIAL();
   4884             RRETURN(MATCH_NOMATCH);
   4885             }
   4886           if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_digit) != 0)
   4887             RRETURN(MATCH_NOMATCH);
   4888           eptr++;
   4889           }
   4890         break;
   4891 
   4892         case OP_DIGIT:
   4893         for (i = 1; i <= min; i++)
   4894           {
   4895           if (eptr >= mb->end_subject)
   4896             {
   4897             SCHECK_PARTIAL();
   4898             RRETURN(MATCH_NOMATCH);
   4899             }
   4900           if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_digit) == 0)
   4901             RRETURN(MATCH_NOMATCH);
   4902           eptr++;
   4903           }
   4904         break;
   4905 
   4906         case OP_NOT_WHITESPACE:
   4907         for (i = 1; i <= min; i++)
   4908           {
   4909           if (eptr >= mb->end_subject)
   4910             {
   4911             SCHECK_PARTIAL();
   4912             RRETURN(MATCH_NOMATCH);
   4913             }
   4914           if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_space) != 0)
   4915             RRETURN(MATCH_NOMATCH);
   4916           eptr++;
   4917           }
   4918         break;
   4919 
   4920         case OP_WHITESPACE:
   4921         for (i = 1; i <= min; i++)
   4922           {
   4923           if (eptr >= mb->end_subject)
   4924             {
   4925             SCHECK_PARTIAL();
   4926             RRETURN(MATCH_NOMATCH);
   4927             }
   4928           if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_space) == 0)
   4929             RRETURN(MATCH_NOMATCH);
   4930           eptr++;
   4931           }
   4932         break;
   4933 
   4934         case OP_NOT_WORDCHAR:
   4935         for (i = 1; i <= min; i++)
   4936           {
   4937           if (eptr >= mb->end_subject)
   4938             {
   4939             SCHECK_PARTIAL();
   4940             RRETURN(MATCH_NOMATCH);
   4941             }
   4942           if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_word) != 0)
   4943             RRETURN(MATCH_NOMATCH);
   4944           eptr++;
   4945           }
   4946         break;
   4947 
   4948         case OP_WORDCHAR:
   4949         for (i = 1; i <= min; i++)
   4950           {
   4951           if (eptr >= mb->end_subject)
   4952             {
   4953             SCHECK_PARTIAL();
   4954             RRETURN(MATCH_NOMATCH);
   4955             }
   4956           if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_word) == 0)
   4957             RRETURN(MATCH_NOMATCH);
   4958           eptr++;
   4959           }
   4960         break;
   4961 
   4962         default:
   4963         RRETURN(PCRE2_ERROR_INTERNAL);
   4964         }
   4965       }
   4966 
   4967     /* If min = max, continue at the same level without recursing */
   4968 
   4969     if (min == max) continue;
   4970 
   4971     /* If minimizing, we have to test the rest of the pattern before each
   4972     subsequent match. Again, separate the UTF-8 case for speed, and also
   4973     separate the UCP cases. */
   4974 
   4975     if (minimize)
   4976       {
   4977 #ifdef SUPPORT_UNICODE
   4978       if (prop_type >= 0)
   4979         {
   4980         switch(prop_type)
   4981           {
   4982           case PT_ANY:
   4983           for (fi = min;; fi++)
   4984             {
   4985             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM36);
   4986             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4987             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4988             if (eptr >= mb->end_subject)
   4989               {
   4990               SCHECK_PARTIAL();
   4991               RRETURN(MATCH_NOMATCH);
   4992               }
   4993             GETCHARINCTEST(c, eptr);
   4994             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4995             }
   4996           /* Control never gets here */
   4997 
   4998           case PT_LAMP:
   4999           for (fi = min;; fi++)
   5000             {
   5001             int chartype;
   5002             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM37);
   5003             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5004             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5005             if (eptr >= mb->end_subject)
   5006               {
   5007               SCHECK_PARTIAL();
   5008               RRETURN(MATCH_NOMATCH);
   5009               }
   5010             GETCHARINCTEST(c, eptr);
   5011             chartype = UCD_CHARTYPE(c);
   5012             if ((chartype == ucp_Lu ||
   5013                  chartype == ucp_Ll ||
   5014                  chartype == ucp_Lt) == prop_fail_result)
   5015               RRETURN(MATCH_NOMATCH);
   5016             }
   5017           /* Control never gets here */
   5018 
   5019           case PT_GC:
   5020           for (fi = min;; fi++)
   5021             {
   5022             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM38);
   5023             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5024             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5025             if (eptr >= mb->end_subject)
   5026               {
   5027               SCHECK_PARTIAL();
   5028               RRETURN(MATCH_NOMATCH);
   5029               }
   5030             GETCHARINCTEST(c, eptr);
   5031             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
   5032               RRETURN(MATCH_NOMATCH);
   5033             }
   5034           /* Control never gets here */
   5035 
   5036           case PT_PC:
   5037           for (fi = min;; fi++)
   5038             {
   5039             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM39);
   5040             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5041             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5042             if (eptr >= mb->end_subject)
   5043               {
   5044               SCHECK_PARTIAL();
   5045               RRETURN(MATCH_NOMATCH);
   5046               }
   5047             GETCHARINCTEST(c, eptr);
   5048             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
   5049               RRETURN(MATCH_NOMATCH);
   5050             }
   5051           /* Control never gets here */
   5052 
   5053           case PT_SC:
   5054           for (fi = min;; fi++)
   5055             {
   5056             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM40);
   5057             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5058             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5059             if (eptr >= mb->end_subject)
   5060               {
   5061               SCHECK_PARTIAL();
   5062               RRETURN(MATCH_NOMATCH);
   5063               }
   5064             GETCHARINCTEST(c, eptr);
   5065             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
   5066               RRETURN(MATCH_NOMATCH);
   5067             }
   5068           /* Control never gets here */
   5069 
   5070           case PT_ALNUM:
   5071           for (fi = min;; fi++)
   5072             {
   5073             int category;
   5074             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM59);
   5075             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5076             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5077             if (eptr >= mb->end_subject)
   5078               {
   5079               SCHECK_PARTIAL();
   5080               RRETURN(MATCH_NOMATCH);
   5081               }
   5082             GETCHARINCTEST(c, eptr);
   5083             category = UCD_CATEGORY(c);
   5084             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   5085               RRETURN(MATCH_NOMATCH);
   5086             }
   5087           /* Control never gets here */
   5088 
   5089           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   5090           which means that Perl space and POSIX space are now identical. PCRE
   5091           was changed at release 8.34. */
   5092 
   5093           case PT_SPACE:    /* Perl space */
   5094           case PT_PXSPACE:  /* POSIX space */
   5095           for (fi = min;; fi++)
   5096             {
   5097             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM61);
   5098             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5099             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5100             if (eptr >= mb->end_subject)
   5101               {
   5102               SCHECK_PARTIAL();
   5103               RRETURN(MATCH_NOMATCH);
   5104               }
   5105             GETCHARINCTEST(c, eptr);
   5106             switch(c)
   5107               {
   5108               HSPACE_CASES:
   5109               VSPACE_CASES:
   5110               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   5111               break;
   5112 
   5113               default:
   5114               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   5115                 RRETURN(MATCH_NOMATCH);
   5116               break;
   5117               }
   5118             }
   5119           /* Control never gets here */
   5120 
   5121           case PT_WORD:
   5122           for (fi = min;; fi++)
   5123             {
   5124             int category;
   5125             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM62);
   5126             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5127             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5128             if (eptr >= mb->end_subject)
   5129               {
   5130               SCHECK_PARTIAL();
   5131               RRETURN(MATCH_NOMATCH);
   5132               }
   5133             GETCHARINCTEST(c, eptr);
   5134             category = UCD_CATEGORY(c);
   5135             if ((category == ucp_L ||
   5136                  category == ucp_N ||
   5137                  c == CHAR_UNDERSCORE)
   5138                    == prop_fail_result)
   5139               RRETURN(MATCH_NOMATCH);
   5140             }
   5141           /* Control never gets here */
   5142 
   5143           case PT_CLIST:
   5144           for (fi = min;; fi++)
   5145             {
   5146             const uint32_t *cp;
   5147             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM67);
   5148             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5149             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5150             if (eptr >= mb->end_subject)
   5151               {
   5152               SCHECK_PARTIAL();
   5153               RRETURN(MATCH_NOMATCH);
   5154               }
   5155             GETCHARINCTEST(c, eptr);
   5156             cp = PRIV(ucd_caseless_sets) + prop_value;
   5157             for (;;)
   5158               {
   5159               if (c < *cp)
   5160                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
   5161               if (c == *cp++)
   5162                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
   5163               }
   5164             }
   5165           /* Control never gets here */
   5166 
   5167           case PT_UCNC:
   5168           for (fi = min;; fi++)
   5169             {
   5170             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM60);
   5171             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5172             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5173             if (eptr >= mb->end_subject)
   5174               {
   5175               SCHECK_PARTIAL();
   5176               RRETURN(MATCH_NOMATCH);
   5177               }
   5178             GETCHARINCTEST(c, eptr);
   5179             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   5180                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   5181                  c >= 0xe000) == prop_fail_result)
   5182               RRETURN(MATCH_NOMATCH);
   5183             }
   5184           /* Control never gets here */
   5185 
   5186           /* This should never occur */
   5187           default:
   5188           RRETURN(PCRE2_ERROR_INTERNAL);
   5189           }
   5190         }
   5191 
   5192       /* Match extended Unicode sequences. We will get here only if the
   5193       support is in the binary; otherwise a compile-time error occurs. */
   5194 
   5195       else if (ctype == OP_EXTUNI)
   5196         {
   5197         for (fi = min;; fi++)
   5198           {
   5199           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM41);
   5200           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5201           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5202           if (eptr >= mb->end_subject)
   5203             {
   5204             SCHECK_PARTIAL();
   5205             RRETURN(MATCH_NOMATCH);
   5206             }
   5207           else
   5208             {
   5209             int lgb, rgb;
   5210             GETCHARINCTEST(c, eptr);
   5211             lgb = UCD_GRAPHBREAK(c);
   5212             while (eptr < mb->end_subject)
   5213               {
   5214               int len = 1;
   5215               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   5216               rgb = UCD_GRAPHBREAK(c);
   5217               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5218               lgb = rgb;
   5219               eptr += len;
   5220               }
   5221             }
   5222           CHECK_PARTIAL();
   5223           }
   5224         }
   5225       else
   5226 #endif     /* SUPPORT_UNICODE */
   5227 
   5228 #ifdef SUPPORT_UNICODE
   5229       if (utf)
   5230         {
   5231         for (fi = min;; fi++)
   5232           {
   5233           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM42);
   5234           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5235           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5236           if (eptr >= mb->end_subject)
   5237             {
   5238             SCHECK_PARTIAL();
   5239             RRETURN(MATCH_NOMATCH);
   5240             }
   5241           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   5242             RRETURN(MATCH_NOMATCH);
   5243           GETCHARINC(c, eptr);
   5244           switch(ctype)
   5245             {
   5246             case OP_ANY:               /* This is the non-NL case */
   5247             if (mb->partial != 0 &&    /* Take care with CRLF partial */
   5248                 eptr >= mb->end_subject &&
   5249                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5250                 NLBLOCK->nllen == 2 &&
   5251                 c == NLBLOCK->nl[0])
   5252               {
   5253               mb->hitend = TRUE;
   5254               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   5255               }
   5256             break;
   5257 
   5258             case OP_ALLANY:
   5259             case OP_ANYBYTE:
   5260             break;
   5261 
   5262             case OP_ANYNL:
   5263             switch(c)
   5264               {
   5265               default: RRETURN(MATCH_NOMATCH);
   5266               case CHAR_CR:
   5267               if (eptr < mb->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
   5268               break;
   5269 
   5270               case CHAR_LF:
   5271               break;
   5272 
   5273               case CHAR_VT:
   5274               case CHAR_FF:
   5275               case CHAR_NEL:
   5276 #ifndef EBCDIC
   5277               case 0x2028:
   5278               case 0x2029:
   5279 #endif  /* Not EBCDIC */
   5280               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
   5281               break;
   5282               }
   5283             break;
   5284 
   5285             case OP_NOT_HSPACE:
   5286             switch(c)
   5287               {
   5288               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
   5289               default: break;
   5290               }
   5291             break;
   5292 
   5293             case OP_HSPACE:
   5294             switch(c)
   5295               {
   5296               HSPACE_CASES: break;
   5297               default: RRETURN(MATCH_NOMATCH);
   5298               }
   5299             break;
   5300 
   5301             case OP_NOT_VSPACE:
   5302             switch(c)
   5303               {
   5304               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   5305               default: break;
   5306               }
   5307             break;
   5308 
   5309             case OP_VSPACE:
   5310             switch(c)
   5311               {
   5312               VSPACE_CASES: break;
   5313               default: RRETURN(MATCH_NOMATCH);
   5314               }
   5315             break;
   5316 
   5317             case OP_NOT_DIGIT:
   5318             if (c < 256 && (mb->ctypes[c] & ctype_digit) != 0)
   5319               RRETURN(MATCH_NOMATCH);
   5320             break;
   5321 
   5322             case OP_DIGIT:
   5323             if (c >= 256 || (mb->ctypes[c] & ctype_digit) == 0)
   5324               RRETURN(MATCH_NOMATCH);
   5325             break;
   5326 
   5327             case OP_NOT_WHITESPACE:
   5328             if (c < 256 && (mb->ctypes[c] & ctype_space) != 0)
   5329               RRETURN(MATCH_NOMATCH);
   5330             break;
   5331 
   5332             case OP_WHITESPACE:
   5333             if (c >= 256 || (mb->ctypes[c] & ctype_space) == 0)
   5334               RRETURN(MATCH_NOMATCH);
   5335             break;
   5336 
   5337             case OP_NOT_WORDCHAR:
   5338             if (c < 256 && (mb->ctypes[c] & ctype_word) != 0)
   5339               RRETURN(MATCH_NOMATCH);
   5340             break;
   5341 
   5342             case OP_WORDCHAR:
   5343             if (c >= 256 || (mb->ctypes[c] & ctype_word) == 0)
   5344               RRETURN(MATCH_NOMATCH);
   5345             break;
   5346 
   5347             default:
   5348             RRETURN(PCRE2_ERROR_INTERNAL);
   5349             }
   5350           }
   5351         }
   5352       else
   5353 #endif
   5354       /* Not UTF mode */
   5355         {
   5356         for (fi = min;; fi++)
   5357           {
   5358           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM43);
   5359           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5360           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5361           if (eptr >= mb->end_subject)
   5362             {
   5363             SCHECK_PARTIAL();
   5364             RRETURN(MATCH_NOMATCH);
   5365             }
   5366           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   5367             RRETURN(MATCH_NOMATCH);
   5368           c = *eptr++;
   5369           switch(ctype)
   5370             {
   5371             case OP_ANY:               /* This is the non-NL case */
   5372             if (mb->partial != 0 &&    /* Take care with CRLF partial */
   5373                 eptr >= mb->end_subject &&
   5374                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5375                 NLBLOCK->nllen == 2 &&
   5376                 c == NLBLOCK->nl[0])
   5377               {
   5378               mb->hitend = TRUE;
   5379               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   5380               }
   5381             break;
   5382 
   5383             case OP_ALLANY:
   5384             case OP_ANYBYTE:
   5385             break;
   5386 
   5387             case OP_ANYNL:
   5388             switch(c)
   5389               {
   5390               default: RRETURN(MATCH_NOMATCH);
   5391               case CHAR_CR:
   5392               if (eptr < mb->end_subject && *eptr == CHAR_LF) eptr++;
   5393               break;
   5394 
   5395               case CHAR_LF:
   5396               break;
   5397 
   5398               case CHAR_VT:
   5399               case CHAR_FF:
   5400               case CHAR_NEL:
   5401 #if PCRE2_CODE_UNIT_WIDTH != 8
   5402               case 0x2028:
   5403               case 0x2029:
   5404 #endif
   5405               if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
   5406               break;
   5407               }
   5408             break;
   5409 
   5410             case OP_NOT_HSPACE:
   5411             switch(c)
   5412               {
   5413               default: break;
   5414               HSPACE_BYTE_CASES:
   5415 #if PCRE2_CODE_UNIT_WIDTH != 8
   5416               HSPACE_MULTIBYTE_CASES:
   5417 #endif
   5418               RRETURN(MATCH_NOMATCH);
   5419               }
   5420             break;
   5421 
   5422             case OP_HSPACE:
   5423             switch(c)
   5424               {
   5425               default: RRETURN(MATCH_NOMATCH);
   5426               HSPACE_BYTE_CASES:
   5427 #if PCRE2_CODE_UNIT_WIDTH != 8
   5428               HSPACE_MULTIBYTE_CASES:
   5429 #endif
   5430               break;
   5431               }
   5432             break;
   5433 
   5434             case OP_NOT_VSPACE:
   5435             switch(c)
   5436               {
   5437               default: break;
   5438               VSPACE_BYTE_CASES:
   5439 #if PCRE2_CODE_UNIT_WIDTH != 8
   5440               VSPACE_MULTIBYTE_CASES:
   5441 #endif
   5442               RRETURN(MATCH_NOMATCH);
   5443               }
   5444             break;
   5445 
   5446             case OP_VSPACE:
   5447             switch(c)
   5448               {
   5449               default: RRETURN(MATCH_NOMATCH);
   5450               VSPACE_BYTE_CASES:
   5451 #if PCRE2_CODE_UNIT_WIDTH != 8
   5452               VSPACE_MULTIBYTE_CASES:
   5453 #endif
   5454               break;
   5455               }
   5456             break;
   5457 
   5458             case OP_NOT_DIGIT:
   5459             if (MAX_255(c) && (mb->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
   5460             break;
   5461 
   5462             case OP_DIGIT:
   5463             if (!MAX_255(c) || (mb->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
   5464             break;
   5465 
   5466             case OP_NOT_WHITESPACE:
   5467             if (MAX_255(c) && (mb->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
   5468             break;
   5469 
   5470             case OP_WHITESPACE:
   5471             if (!MAX_255(c) || (mb->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
   5472             break;
   5473 
   5474             case OP_NOT_WORDCHAR:
   5475             if (MAX_255(c) && (mb->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
   5476             break;
   5477 
   5478             case OP_WORDCHAR:
   5479             if (!MAX_255(c) || (mb->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
   5480             break;
   5481 
   5482             default:
   5483             RRETURN(PCRE2_ERROR_INTERNAL);
   5484             }
   5485           }
   5486         }
   5487       /* Control never gets here */
   5488       }
   5489 
   5490     /* If maximizing, it is worth using inline code for speed, doing the type
   5491     test once at the start (i.e. keep it out of the loop). Again, keep the
   5492     UTF-8 and UCP stuff separate. */
   5493 
   5494     else
   5495       {
   5496       pp = eptr;  /* Remember where we started */
   5497 
   5498 #ifdef SUPPORT_UNICODE
   5499       if (prop_type >= 0)
   5500         {
   5501         switch(prop_type)
   5502           {
   5503           case PT_ANY:
   5504           for (i = min; i < max; i++)
   5505             {
   5506             int len = 1;
   5507             if (eptr >= mb->end_subject)
   5508               {
   5509               SCHECK_PARTIAL();
   5510               break;
   5511               }
   5512             GETCHARLENTEST(c, eptr, len);
   5513             if (prop_fail_result) break;
   5514             eptr+= len;
   5515             }
   5516           break;
   5517 
   5518           case PT_LAMP:
   5519           for (i = min; i < max; i++)
   5520             {
   5521             int chartype;
   5522             int len = 1;
   5523             if (eptr >= mb->end_subject)
   5524               {
   5525               SCHECK_PARTIAL();
   5526               break;
   5527               }
   5528             GETCHARLENTEST(c, eptr, len);
   5529             chartype = UCD_CHARTYPE(c);
   5530             if ((chartype == ucp_Lu ||
   5531                  chartype == ucp_Ll ||
   5532                  chartype == ucp_Lt) == prop_fail_result)
   5533               break;
   5534             eptr+= len;
   5535             }
   5536           break;
   5537 
   5538           case PT_GC:
   5539           for (i = min; i < max; i++)
   5540             {
   5541             int len = 1;
   5542             if (eptr >= mb->end_subject)
   5543               {
   5544               SCHECK_PARTIAL();
   5545               break;
   5546               }
   5547             GETCHARLENTEST(c, eptr, len);
   5548             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
   5549             eptr+= len;
   5550             }
   5551           break;
   5552 
   5553           case PT_PC:
   5554           for (i = min; i < max; i++)
   5555             {
   5556             int len = 1;
   5557             if (eptr >= mb->end_subject)
   5558               {
   5559               SCHECK_PARTIAL();
   5560               break;
   5561               }
   5562             GETCHARLENTEST(c, eptr, len);
   5563             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
   5564             eptr+= len;
   5565             }
   5566           break;
   5567 
   5568           case PT_SC:
   5569           for (i = min; i < max; i++)
   5570             {
   5571             int len = 1;
   5572             if (eptr >= mb->end_subject)
   5573               {
   5574               SCHECK_PARTIAL();
   5575               break;
   5576               }
   5577             GETCHARLENTEST(c, eptr, len);
   5578             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
   5579             eptr+= len;
   5580             }
   5581           break;
   5582 
   5583           case PT_ALNUM:
   5584           for (i = min; i < max; i++)
   5585             {
   5586             int category;
   5587             int len = 1;
   5588             if (eptr >= mb->end_subject)
   5589               {
   5590               SCHECK_PARTIAL();
   5591               break;
   5592               }
   5593             GETCHARLENTEST(c, eptr, len);
   5594             category = UCD_CATEGORY(c);
   5595             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   5596               break;
   5597             eptr+= len;
   5598             }
   5599           break;
   5600 
   5601           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   5602           which means that Perl space and POSIX space are now identical. PCRE
   5603           was changed at release 8.34. */
   5604 
   5605           case PT_SPACE:    /* Perl space */
   5606           case PT_PXSPACE:  /* POSIX space */
   5607           for (i = min; i < max; i++)
   5608             {
   5609             int len = 1;
   5610             if (eptr >= mb->end_subject)
   5611               {
   5612               SCHECK_PARTIAL();
   5613               break;
   5614               }
   5615             GETCHARLENTEST(c, eptr, len);
   5616             switch(c)
   5617               {
   5618               HSPACE_CASES:
   5619               VSPACE_CASES:
   5620               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
   5621               break;
   5622 
   5623               default:
   5624               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   5625                 goto ENDLOOP99;   /* Break the loop */
   5626               break;
   5627               }
   5628             eptr+= len;
   5629             }
   5630           ENDLOOP99:
   5631           break;
   5632 
   5633           case PT_WORD:
   5634           for (i = min; i < max; i++)
   5635             {
   5636             int category;
   5637             int len = 1;
   5638             if (eptr >= mb->end_subject)
   5639               {
   5640               SCHECK_PARTIAL();
   5641               break;
   5642               }
   5643             GETCHARLENTEST(c, eptr, len);
   5644             category = UCD_CATEGORY(c);
   5645             if ((category == ucp_L || category == ucp_N ||
   5646                  c == CHAR_UNDERSCORE) == prop_fail_result)
   5647               break;
   5648             eptr+= len;
   5649             }
   5650           break;
   5651 
   5652           case PT_CLIST:
   5653           for (i = min; i < max; i++)
   5654             {
   5655             const uint32_t *cp;
   5656             int len = 1;
   5657             if (eptr >= mb->end_subject)
   5658               {
   5659               SCHECK_PARTIAL();
   5660               break;
   5661               }
   5662             GETCHARLENTEST(c, eptr, len);
   5663             cp = PRIV(ucd_caseless_sets) + prop_value;
   5664             for (;;)
   5665               {
   5666               if (c < *cp)
   5667                 { if (prop_fail_result) break; else goto GOT_MAX; }
   5668               if (c == *cp++)
   5669                 { if (prop_fail_result) goto GOT_MAX; else break; }
   5670               }
   5671             eptr += len;
   5672             }
   5673           GOT_MAX:
   5674           break;
   5675 
   5676           case PT_UCNC:
   5677           for (i = min; i < max; i++)
   5678             {
   5679             int len = 1;
   5680             if (eptr >= mb->end_subject)
   5681               {
   5682               SCHECK_PARTIAL();
   5683               break;
   5684               }
   5685             GETCHARLENTEST(c, eptr, len);
   5686             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   5687                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   5688                  c >= 0xe000) == prop_fail_result)
   5689               break;
   5690             eptr += len;
   5691             }
   5692           break;
   5693 
   5694           default:
   5695           RRETURN(PCRE2_ERROR_INTERNAL);
   5696           }
   5697 
   5698         /* eptr is now past the end of the maximum run */
   5699 
   5700         if (possessive) continue;    /* No backtracking */
   5701 
   5702         /* After \C in UTF mode, pp might be in the middle of a Unicode
   5703         character. Use <= pp to ensure backtracking doesn't go too far. */
   5704 
   5705         for(;;)
   5706           {
   5707           if (eptr <= pp) goto TAIL_RECURSE;
   5708           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44);
   5709           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5710           eptr--;
   5711           if (utf) BACKCHAR(eptr);
   5712           }
   5713         }
   5714 
   5715       /* Match extended Unicode grapheme clusters. We will get here only if the
   5716       support is in the binary; otherwise a compile-time error occurs. */
   5717 
   5718       else if (ctype == OP_EXTUNI)
   5719         {
   5720         for (i = min; i < max; i++)
   5721           {
   5722           if (eptr >= mb->end_subject)
   5723             {
   5724             SCHECK_PARTIAL();
   5725             break;
   5726             }
   5727           else
   5728             {
   5729             int lgb, rgb;
   5730             GETCHARINCTEST(c, eptr);
   5731             lgb = UCD_GRAPHBREAK(c);
   5732             while (eptr < mb->end_subject)
   5733               {
   5734               int len = 1;
   5735               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   5736               rgb = UCD_GRAPHBREAK(c);
   5737               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5738               lgb = rgb;
   5739               eptr += len;
   5740               }
   5741             }
   5742           CHECK_PARTIAL();
   5743           }
   5744 
   5745         /* eptr is now past the end of the maximum run */
   5746 
   5747         if (possessive) continue;    /* No backtracking */
   5748 
   5749         /* We use <= pp rather than == pp to detect the start of the run while
   5750         backtracking because the use of \C in UTF mode can cause BACKCHAR to
   5751         move back past pp. This is just palliative; the use of \C in UTF mode
   5752         is fraught with danger. */
   5753 
   5754         for(;;)
   5755           {
   5756           int lgb, rgb;
   5757           PCRE2_SPTR fptr;
   5758 
   5759           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
   5760           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
   5761           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5762 
   5763           /* Backtracking over an extended grapheme cluster involves inspecting
   5764           the previous two characters (if present) to see if a break is
   5765           permitted between them. */
   5766 
   5767           eptr--;
   5768           if (!utf) c = *eptr; else
   5769             {
   5770             BACKCHAR(eptr);
   5771             GETCHAR(c, eptr);
   5772             }
   5773           rgb = UCD_GRAPHBREAK(c);
   5774 
   5775           for (;;)
   5776             {
   5777             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
   5778             fptr = eptr - 1;
   5779             if (!utf) c = *fptr; else
   5780               {
   5781               BACKCHAR(fptr);
   5782               GETCHAR(c, fptr);
   5783               }
   5784             lgb = UCD_GRAPHBREAK(c);
   5785             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5786             eptr = fptr;
   5787             rgb = lgb;
   5788             }
   5789           }
   5790         }
   5791 
   5792       else
   5793 #endif   /* SUPPORT_UNICODE */
   5794 
   5795 #ifdef SUPPORT_UNICODE
   5796       if (utf)
   5797         {
   5798         switch(ctype)
   5799           {
   5800           case OP_ANY:
   5801           for (i = min; i < max; i++)
   5802             {
   5803             if (eptr >= mb->end_subject)
   5804               {
   5805               SCHECK_PARTIAL();
   5806               break;
   5807               }
   5808             if (IS_NEWLINE(eptr)) break;
   5809             if (mb->partial != 0 &&    /* Take care with CRLF partial */
   5810                 eptr + 1 >= mb->end_subject &&
   5811                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5812                 NLBLOCK->nllen == 2 &&
   5813                 UCHAR21(eptr) == NLBLOCK->nl[0])
   5814               {
   5815               mb->hitend = TRUE;
   5816               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   5817               }
   5818             eptr++;
   5819             ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   5820             }
   5821           break;
   5822 
   5823           case OP_ALLANY:
   5824           if (max < INT_MAX)
   5825             {
   5826             for (i = min; i < max; i++)
   5827               {
   5828               if (eptr >= mb->end_subject)
   5829                 {
   5830                 SCHECK_PARTIAL();
   5831                 break;
   5832                 }
   5833               eptr++;
   5834               ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
   5835               }
   5836             }
   5837           else
   5838             {
   5839             eptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
   5840             SCHECK_PARTIAL();
   5841             }
   5842           break;
   5843 
   5844           /* The byte case is the same as non-UTF8 */
   5845 
   5846           case OP_ANYBYTE:
   5847           c = max - min;
   5848           if (c > (uint32_t)(mb->end_subject - eptr))
   5849             {
   5850             eptr = mb->end_subject;
   5851             SCHECK_PARTIAL();
   5852             }
   5853           else eptr += c;
   5854           break;
   5855 
   5856           case OP_ANYNL:
   5857           for (i = min; i < max; i++)
   5858             {
   5859             int len = 1;
   5860             if (eptr >= mb->end_subject)
   5861               {
   5862               SCHECK_PARTIAL();
   5863               break;
   5864               }
   5865             GETCHARLEN(c, eptr, len);
   5866             if (c == CHAR_CR)
   5867               {
   5868               if (++eptr >= mb->end_subject) break;
   5869               if (UCHAR21(eptr) == CHAR_LF) eptr++;
   5870               }
   5871             else
   5872               {
   5873               if (c != CHAR_LF &&
   5874                   (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
   5875                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
   5876 #ifndef EBCDIC
   5877                     && c != 0x2028 && c != 0x2029
   5878 #endif  /* Not EBCDIC */
   5879                     )))
   5880                 break;
   5881               eptr += len;
   5882               }
   5883             }
   5884           break;
   5885 
   5886           case OP_NOT_HSPACE:
   5887           case OP_HSPACE:
   5888           for (i = min; i < max; i++)
   5889             {
   5890             BOOL gotspace;
   5891             int len = 1;
   5892             if (eptr >= mb->end_subject)
   5893               {
   5894               SCHECK_PARTIAL();
   5895               break;
   5896               }
   5897             GETCHARLEN(c, eptr, len);
   5898             switch(c)
   5899               {
   5900               HSPACE_CASES: gotspace = TRUE; break;
   5901               default: gotspace = FALSE; break;
   5902               }
   5903             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
   5904             eptr += len;
   5905             }
   5906           break;
   5907 
   5908           case OP_NOT_VSPACE:
   5909           case OP_VSPACE:
   5910           for (i = min; i < max; i++)
   5911             {
   5912             BOOL gotspace;
   5913             int len = 1;
   5914             if (eptr >= mb->end_subject)
   5915               {
   5916               SCHECK_PARTIAL();
   5917               break;
   5918               }
   5919             GETCHARLEN(c, eptr, len);
   5920             switch(c)
   5921               {
   5922               VSPACE_CASES: gotspace = TRUE; break;
   5923               default: gotspace = FALSE; break;
   5924               }
   5925             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
   5926             eptr += len;
   5927             }
   5928           break;
   5929 
   5930           case OP_NOT_DIGIT:
   5931           for (i = min; i < max; i++)
   5932             {
   5933             int len = 1;
   5934             if (eptr >= mb->end_subject)
   5935               {
   5936               SCHECK_PARTIAL();
   5937               break;
   5938               }
   5939             GETCHARLEN(c, eptr, len);
   5940             if (c < 256 && (mb->ctypes[c] & ctype_digit) != 0) break;
   5941             eptr+= len;
   5942             }
   5943           break;
   5944 
   5945           case OP_DIGIT:
   5946           for (i = min; i < max; i++)
   5947             {
   5948             int len = 1;
   5949             if (eptr >= mb->end_subject)
   5950               {
   5951               SCHECK_PARTIAL();
   5952               break;
   5953               }
   5954             GETCHARLEN(c, eptr, len);
   5955             if (c >= 256 ||(mb->ctypes[c] & ctype_digit) == 0) break;
   5956             eptr+= len;
   5957             }
   5958           break;
   5959 
   5960           case OP_NOT_WHITESPACE:
   5961           for (i = min; i < max; i++)
   5962             {
   5963             int len = 1;
   5964             if (eptr >= mb->end_subject)
   5965               {
   5966               SCHECK_PARTIAL();
   5967               break;
   5968               }
   5969             GETCHARLEN(c, eptr, len);
   5970             if (c < 256 && (mb->ctypes[c] & ctype_space) != 0) break;
   5971             eptr+= len;
   5972             }
   5973           break;
   5974 
   5975           case OP_WHITESPACE:
   5976           for (i = min; i < max; i++)
   5977             {
   5978             int len = 1;
   5979             if (eptr >= mb->end_subject)
   5980               {
   5981               SCHECK_PARTIAL();
   5982               break;
   5983               }
   5984             GETCHARLEN(c, eptr, len);
   5985             if (c >= 256 ||(mb->ctypes[c] & ctype_space) == 0) break;
   5986             eptr+= len;
   5987             }
   5988           break;
   5989 
   5990           case OP_NOT_WORDCHAR:
   5991           for (i = min; i < max; i++)
   5992             {
   5993             int len = 1;
   5994             if (eptr >= mb->end_subject)
   5995               {
   5996               SCHECK_PARTIAL();
   5997               break;
   5998               }
   5999             GETCHARLEN(c, eptr, len);
   6000             if (c < 256 && (mb->ctypes[c] & ctype_word) != 0) break;
   6001             eptr+= len;
   6002             }
   6003           break;
   6004 
   6005           case OP_WORDCHAR:
   6006           for (i = min; i < max; i++)
   6007             {
   6008             int len = 1;
   6009             if (eptr >= mb->end_subject)
   6010               {
   6011               SCHECK_PARTIAL();
   6012               break;
   6013               }
   6014             GETCHARLEN(c, eptr, len);
   6015             if (c >= 256 || (mb->ctypes[c] & ctype_word) == 0) break;
   6016             eptr+= len;
   6017             }
   6018           break;
   6019 
   6020           default:
   6021           RRETURN(PCRE2_ERROR_INTERNAL);
   6022           }
   6023 
   6024         if (possessive) continue;    /* No backtracking */
   6025 
   6026         /* After \C in UTF mode, pp might be in the middle of a Unicode
   6027         character. Use <= pp to ensure backtracking doesn't go too far. */
   6028 
   6029         for(;;)
   6030           {
   6031           if (eptr <= pp) goto TAIL_RECURSE;
   6032           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46);
   6033           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   6034           eptr--;
   6035           BACKCHAR(eptr);
   6036           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
   6037               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
   6038           }
   6039         }
   6040       else
   6041 #endif  /* SUPPORT_UNICODE */
   6042       /* Not UTF mode */
   6043         {
   6044         switch(ctype)
   6045           {
   6046           case OP_ANY:
   6047           for (i = min; i < max; i++)
   6048             {
   6049             if (eptr >= mb->end_subject)
   6050               {
   6051               SCHECK_PARTIAL();
   6052               break;
   6053               }
   6054             if (IS_NEWLINE(eptr)) break;
   6055             if (mb->partial != 0 &&    /* Take care with CRLF partial */
   6056                 eptr + 1 >= mb->end_subject &&
   6057                 NLBLOCK->nltype == NLTYPE_FIXED &&
   6058                 NLBLOCK->nllen == 2 &&
   6059                 *eptr == NLBLOCK->nl[0])
   6060               {
   6061               mb->hitend = TRUE;
   6062               if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
   6063               }
   6064             eptr++;
   6065             }
   6066           break;
   6067 
   6068           case OP_ALLANY:
   6069           case OP_ANYBYTE:
   6070           c = max - min;
   6071           if (c > (uint32_t)(mb->end_subject - eptr))
   6072             {
   6073             eptr = mb->end_subject;
   6074             SCHECK_PARTIAL();
   6075             }
   6076           else eptr += c;
   6077           break;
   6078 
   6079           case OP_ANYNL:
   6080           for (i = min; i < max; i++)
   6081             {
   6082             if (eptr >= mb->end_subject)
   6083               {
   6084               SCHECK_PARTIAL();
   6085               break;
   6086               }
   6087             c = *eptr;
   6088             if (c == CHAR_CR)
   6089               {
   6090               if (++eptr >= mb->end_subject) break;
   6091               if (*eptr == CHAR_LF) eptr++;
   6092               }
   6093             else
   6094               {
   6095               if (c != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
   6096                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
   6097 #if PCRE2_CODE_UNIT_WIDTH != 8
   6098                  && c != 0x2028 && c != 0x2029
   6099 #endif
   6100                  ))) break;
   6101               eptr++;
   6102               }
   6103             }
   6104           break;
   6105 
   6106           case OP_NOT_HSPACE:
   6107           for (i = min; i < max; i++)
   6108             {
   6109             if (eptr >= mb->end_subject)
   6110               {
   6111               SCHECK_PARTIAL();
   6112               break;
   6113               }
   6114             switch(*eptr)
   6115               {
   6116               default: eptr++; break;
   6117               HSPACE_BYTE_CASES:
   6118 #if PCRE2_CODE_UNIT_WIDTH != 8
   6119               HSPACE_MULTIBYTE_CASES:
   6120 #endif
   6121               goto ENDLOOP00;
   6122               }
   6123             }
   6124           ENDLOOP00:
   6125           break;
   6126 
   6127           case OP_HSPACE:
   6128           for (i = min; i < max; i++)
   6129             {
   6130             if (eptr >= mb->end_subject)
   6131               {
   6132               SCHECK_PARTIAL();
   6133               break;
   6134               }
   6135             switch(*eptr)
   6136               {
   6137               default: goto ENDLOOP01;
   6138               HSPACE_BYTE_CASES:
   6139 #if PCRE2_CODE_UNIT_WIDTH != 8
   6140               HSPACE_MULTIBYTE_CASES:
   6141 #endif
   6142               eptr++; break;
   6143               }
   6144             }
   6145           ENDLOOP01:
   6146           break;
   6147 
   6148           case OP_NOT_VSPACE:
   6149           for (i = min; i < max; i++)
   6150             {
   6151             if (eptr >= mb->end_subject)
   6152               {
   6153               SCHECK_PARTIAL();
   6154               break;
   6155               }
   6156             switch(*eptr)
   6157               {
   6158               default: eptr++; break;
   6159               VSPACE_BYTE_CASES:
   6160 #if PCRE2_CODE_UNIT_WIDTH != 8
   6161               VSPACE_MULTIBYTE_CASES:
   6162 #endif
   6163               goto ENDLOOP02;
   6164               }
   6165             }
   6166           ENDLOOP02:
   6167           break;
   6168 
   6169           case OP_VSPACE:
   6170           for (i = min; i < max; i++)
   6171             {
   6172             if (eptr >= mb->end_subject)
   6173               {
   6174               SCHECK_PARTIAL();
   6175               break;
   6176               }
   6177             switch(*eptr)
   6178               {
   6179               default: goto ENDLOOP03;
   6180               VSPACE_BYTE_CASES:
   6181 #if PCRE2_CODE_UNIT_WIDTH != 8
   6182               VSPACE_MULTIBYTE_CASES:
   6183 #endif
   6184               eptr++; break;
   6185               }
   6186             }
   6187           ENDLOOP03:
   6188           break;
   6189 
   6190           case OP_NOT_DIGIT:
   6191           for (i = min; i < max; i++)
   6192             {
   6193             if (eptr >= mb->end_subject)
   6194               {
   6195               SCHECK_PARTIAL();
   6196               break;
   6197               }
   6198             if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_digit) != 0) break;
   6199             eptr++;
   6200             }
   6201           break;
   6202 
   6203           case OP_DIGIT:
   6204           for (i = min; i < max; i++)
   6205             {
   6206             if (eptr >= mb->end_subject)
   6207               {
   6208               SCHECK_PARTIAL();
   6209               break;
   6210               }
   6211             if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_digit) == 0) break;
   6212             eptr++;
   6213             }
   6214           break;
   6215 
   6216           case OP_NOT_WHITESPACE:
   6217           for (i = min; i < max; i++)
   6218             {
   6219             if (eptr >= mb->end_subject)
   6220               {
   6221               SCHECK_PARTIAL();
   6222               break;
   6223               }
   6224             if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_space) != 0) break;
   6225             eptr++;
   6226             }
   6227           break;
   6228 
   6229           case OP_WHITESPACE:
   6230           for (i = min; i < max; i++)
   6231             {
   6232             if (eptr >= mb->end_subject)
   6233               {
   6234               SCHECK_PARTIAL();
   6235               break;
   6236               }
   6237             if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_space) == 0) break;
   6238             eptr++;
   6239             }
   6240           break;
   6241 
   6242           case OP_NOT_WORDCHAR:
   6243           for (i = min; i < max; i++)
   6244             {
   6245             if (eptr >= mb->end_subject)
   6246               {
   6247               SCHECK_PARTIAL();
   6248               break;
   6249               }
   6250             if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_word) != 0) break;
   6251             eptr++;
   6252             }
   6253           break;
   6254 
   6255           case OP_WORDCHAR:
   6256           for (i = min; i < max; i++)
   6257             {
   6258             if (eptr >= mb->end_subject)
   6259               {
   6260               SCHECK_PARTIAL();
   6261               break;
   6262               }
   6263             if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_word) == 0) break;
   6264             eptr++;
   6265             }
   6266           break;
   6267 
   6268           default:
   6269           RRETURN(PCRE2_ERROR_INTERNAL);
   6270           }
   6271 
   6272         if (possessive) continue;    /* No backtracking */
   6273         for (;;)
   6274           {
   6275           if (eptr == pp) goto TAIL_RECURSE;
   6276           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM47);
   6277           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   6278           eptr--;
   6279           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
   6280               eptr[-1] == CHAR_CR) eptr--;
   6281           }
   6282         }
   6283 
   6284       /* Control never gets here */
   6285       }
   6286 
   6287     /* There's been some horrible disaster. Arrival here can only mean there is
   6288     something seriously wrong in the code above or the OP_xxx definitions. */
   6289 
   6290     default:
   6291     RRETURN(PCRE2_ERROR_INTERNAL);
   6292     }
   6293 
   6294   /* Do not stick any code in here without much thought; it is assumed
   6295   that "continue" in the code above comes out to here to repeat the main
   6296   loop. */
   6297 
   6298   }             /* End of main loop */
   6299 /* Control never reaches here */
   6300 
   6301 
   6302 /* When compiling to use the heap rather than the stack for recursive calls to
   6303 match(), the RRETURN() macro jumps here. The number that is saved in
   6304 frame->Xwhere indicates which label we actually want to return to. */
   6305 
   6306 #ifdef HEAP_MATCH_RECURSE
   6307 #define LBL(val) case val: goto L_RM##val;
   6308 HEAP_RETURN:
   6309 switch (frame->Xwhere)
   6310   {
   6311   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
   6312   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
   6313   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
   6314   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
   6315   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
   6316   LBL(65) LBL(66) LBL(68)
   6317 #ifdef SUPPORT_WIDE_CHARS
   6318   LBL(20) LBL(21)
   6319 #endif
   6320 #ifdef SUPPORT_UNICODE
   6321   LBL(16) LBL(18)
   6322   LBL(22) LBL(23) LBL(28) LBL(30)
   6323   LBL(32) LBL(34) LBL(42) LBL(46)
   6324   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
   6325   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
   6326 #endif  /* SUPPORT_UNICODE */
   6327   default:
   6328   return PCRE2_ERROR_INTERNAL;
   6329   }
   6330 #undef LBL
   6331 #endif  /* HEAP_MATCH_RECURSE */
   6332 }
   6333 
   6334 
   6335 /***************************************************************************
   6336 ****************************************************************************
   6337                    RECURSION IN THE match() FUNCTION
   6338 
   6339 Undefine all the macros that were defined above to handle this. */
   6340 
   6341 #ifdef HEAP_MATCH_RECURSE
   6342 #undef eptr
   6343 #undef ecode
   6344 #undef mstart
   6345 #undef offset_top
   6346 #undef eptrb
   6347 #undef flags
   6348 
   6349 #undef callpat
   6350 #undef charptr
   6351 #undef data
   6352 #undef next_ecode
   6353 #undef pp
   6354 #undef prev
   6355 #undef saved_eptr
   6356 
   6357 #undef new_recursive
   6358 
   6359 #undef cur_is_word
   6360 #undef condition
   6361 #undef prev_is_word
   6362 
   6363 #undef ctype
   6364 #undef length
   6365 #undef max
   6366 #undef min
   6367 #undef number
   6368 #undef offset
   6369 #undef op
   6370 #undef save_capture_last
   6371 #undef save_offset1
   6372 #undef save_offset2
   6373 #undef save_offset3
   6374 
   6375 #undef newptrb
   6376 #endif  /* HEAP_MATCH_RECURSE */
   6377 
   6378 /* These two are defined as macros in both cases */
   6379 
   6380 #undef fc
   6381 #undef fi
   6382 
   6383 /***************************************************************************
   6384 ***************************************************************************/
   6385 
   6386 
   6387 #ifdef HEAP_MATCH_RECURSE
   6388 /*************************************************
   6389 *          Release allocated heap frames         *
   6390 *************************************************/
   6391 
   6392 /* This function releases all the allocated frames. The base frame is on the
   6393 machine stack, and so must not be freed.
   6394 
   6395 Argument:
   6396   frame_base    the address of the base frame
   6397   mb            the match block
   6398 
   6399 Returns:  nothing
   6400 */
   6401 
   6402 static void
   6403 release_match_heapframes (heapframe *frame_base, match_block *mb)
   6404 {
   6405 heapframe *nextframe = frame_base->Xnextframe;
   6406 while (nextframe != NULL)
   6407   {
   6408   heapframe *oldframe = nextframe;
   6409   nextframe = nextframe->Xnextframe;
   6410   mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data);
   6411   }
   6412 }
   6413 #endif  /* HEAP_MATCH_RECURSE */
   6414 
   6415 
   6416 
   6417 /*************************************************
   6418 *           Match a Regular Expression           *
   6419 *************************************************/
   6420 
   6421 /* This function applies a compiled pattern to a subject string and picks out
   6422 portions of the string if it matches. Two elements in the vector are set for
   6423 each substring: the offsets to the start and end of the substring.
   6424 
   6425 Arguments:
   6426   code            points to the compiled expression
   6427   subject         points to the subject string
   6428   length          length of subject string (may contain binary zeros)
   6429   start_offset    where to start in the subject string
   6430   options         option bits
   6431   match_data      points to a match_data block
   6432   mcontext        points a PCRE2 context
   6433 
   6434 Returns:          > 0 => success; value is the number of ovector pairs filled
   6435                   = 0 => success, but ovector is not big enough
   6436                    -1 => failed to match (PCRE2_ERROR_NOMATCH)
   6437                    -2 => partial match (PCRE2_ERROR_PARTIAL)
   6438                  < -2 => some kind of unexpected problem
   6439 */
   6440 
   6441 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
   6442 pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
   6443   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
   6444   pcre2_match_context *mcontext)
   6445 {
   6446 int rc;
   6447 int ocount;
   6448 
   6449 const uint8_t *start_bits = NULL;
   6450 
   6451 const pcre2_real_code *re = (const pcre2_real_code *)code;
   6452 
   6453 BOOL anchored;
   6454 BOOL firstline;
   6455 BOOL has_first_cu = FALSE;
   6456 BOOL has_req_cu = FALSE;
   6457 BOOL startline;
   6458 BOOL using_temporary_offsets = FALSE;
   6459 BOOL utf;
   6460 
   6461 PCRE2_UCHAR first_cu = 0;
   6462 PCRE2_UCHAR first_cu2 = 0;
   6463 PCRE2_UCHAR req_cu = 0;
   6464 PCRE2_UCHAR req_cu2 = 0;
   6465 
   6466 PCRE2_SPTR bumpalong_limit;
   6467 PCRE2_SPTR end_subject;
   6468 PCRE2_SPTR start_match = subject + start_offset;
   6469 PCRE2_SPTR req_cu_ptr = start_match - 1;
   6470 PCRE2_SPTR start_partial = NULL;
   6471 PCRE2_SPTR match_partial = NULL;
   6472 
   6473 /* We need to have mb pointing to a match block, because the IS_NEWLINE macro
   6474 is used below, and it expects NLBLOCK to be defined as a pointer. */
   6475 
   6476 match_block actual_match_block;
   6477 match_block *mb = &actual_match_block;
   6478 
   6479 #ifdef HEAP_MATCH_RECURSE
   6480 heapframe frame_zero;
   6481 frame_zero.Xprevframe = NULL;            /* Marks the top level */
   6482 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
   6483 mb->match_frames_base = &frame_zero;
   6484 #endif
   6485 
   6486 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
   6487 subject string. */
   6488 
   6489 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
   6490 end_subject = subject + length;
   6491 
   6492 /* Plausibility checks */
   6493 
   6494 if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
   6495 if (code == NULL || subject == NULL || match_data == NULL)
   6496   return PCRE2_ERROR_NULL;
   6497 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
   6498 
   6499 /* Check that the first field in the block is the magic number. */
   6500 
   6501 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
   6502 
   6503 /* Check the code unit width. */
   6504 
   6505 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
   6506   return PCRE2_ERROR_BADMODE;
   6507 
   6508 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
   6509 options variable for this function. Users of PCRE2 who are not calling the
   6510 function directly would like to have a way of setting these flags, in the same
   6511 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
   6512 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
   6513 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
   6514 transferred to the options for this function. The bits are guaranteed to be
   6515 adjacent, but do not have the same values. This bit of Boolean trickery assumes
   6516 that the match-time bits are not more significant than the flag bits. If by
   6517 accident this is not the case, a compile-time division by zero error will
   6518 occur. */
   6519 
   6520 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
   6521 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
   6522 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
   6523 #undef FF
   6524 #undef OO
   6525 
   6526 /* A NULL match context means "use a default context" */
   6527 
   6528 if (mcontext == NULL)
   6529   mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
   6530 
   6531 /* These two settings are used in the code for checking a UTF string that
   6532 follows immediately afterwards. Other values in the mb block are used only
   6533 during interpretive pcre_match() processing, not when the JIT support is in
   6534 use, so they are set up later. */
   6535 
   6536 utf = (re->overall_options & PCRE2_UTF) != 0;
   6537 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
   6538               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
   6539 
   6540 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
   6541 we must also check that a starting offset does not point into the middle of a
   6542 multiunit character. We check only the portion of the subject that is going to
   6543 be inspected during matching - from the offset minus the maximum back reference
   6544 to the given length. This saves time when a small part of a large subject is
   6545 being matched by the use of a starting offset. Note that the maximum lookbehind
   6546 is a number of characters, not code units. */
   6547 
   6548 #ifdef SUPPORT_UNICODE
   6549 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
   6550   {
   6551   PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
   6552 
   6553   if (start_offset > 0)
   6554     {
   6555 #if PCRE2_CODE_UNIT_WIDTH != 32
   6556     unsigned int i;
   6557     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
   6558       return PCRE2_ERROR_BADUTFOFFSET;
   6559     for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
   6560       {
   6561       check_subject--;
   6562       while (check_subject > subject &&
   6563 #if PCRE2_CODE_UNIT_WIDTH == 8
   6564       (*check_subject & 0xc0) == 0x80)
   6565 #else  /* 16-bit */
   6566       (*check_subject & 0xfc00) == 0xdc00)
   6567 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
   6568         check_subject--;
   6569       }
   6570 #else
   6571     /* In the 32-bit library, one code unit equals one character. However,
   6572     we cannot just subtract the lookbehind and then compare pointers, because
   6573     a very large lookbehind could create an invalid pointer. */
   6574 
   6575     if (start_offset >= re->max_lookbehind)
   6576       check_subject -= re->max_lookbehind;
   6577     else
   6578       check_subject = subject;
   6579 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
   6580     }
   6581 
   6582   /* Validate the relevant portion of the subject. After an error, adjust the
   6583   offset to be an absolute offset in the whole string. */
   6584 
   6585   match_data->rc = PRIV(valid_utf)(check_subject,
   6586     length - (check_subject - subject), &(match_data->startchar));
   6587   if (match_data->rc != 0)
   6588     {
   6589     match_data->startchar += check_subject - subject;
   6590     return match_data->rc;
   6591     }
   6592   }
   6593 #endif  /* SUPPORT_UNICODE */
   6594 
   6595 /* It is an error to set an offset limit without setting the flag at compile
   6596 time. */
   6597 
   6598 if (mcontext->offset_limit != PCRE2_UNSET &&
   6599      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
   6600   return PCRE2_ERROR_BADOFFSETLIMIT;
   6601 
   6602 /* If the pattern was successfully studied with JIT support, run the JIT
   6603 executable instead of the rest of this function. Most options must be set at
   6604 compile time for the JIT code to be usable. Fallback to the normal code path if
   6605 an unsupported option is set or if JIT returns BADOPTION (which means that the
   6606 selected normal or partial matching mode was not compiled). */
   6607 
   6608 #ifdef SUPPORT_JIT
   6609 if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
   6610   {
   6611   rc = pcre2_jit_match(code, subject, length, start_offset, options,
   6612     match_data, mcontext);
   6613   if (rc != PCRE2_ERROR_JIT_BADOPTION) return rc;
   6614   }
   6615 #endif
   6616 
   6617 /* Carry on with non-JIT matching. */
   6618 
   6619 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
   6620 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
   6621 startline = (re->flags & PCRE2_STARTLINE) != 0;
   6622 bumpalong_limit =  (mcontext->offset_limit == PCRE2_UNSET)?
   6623   end_subject : subject + mcontext->offset_limit;
   6624 
   6625 /* Fill in the fields in the match block. */
   6626 
   6627 mb->callout = mcontext->callout;
   6628 mb->callout_data = mcontext->callout_data;
   6629 mb->memctl = mcontext->memctl;
   6630 #ifdef HEAP_MATCH_RECURSE
   6631 mb->stack_memctl = mcontext->stack_memctl;
   6632 #endif
   6633 
   6634 mb->start_subject = subject;
   6635 mb->start_offset = start_offset;
   6636 mb->end_subject = end_subject;
   6637 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
   6638 
   6639 mb->moptions = options;                 /* Match options */
   6640 mb->poptions = re->overall_options;     /* Pattern options */
   6641 
   6642 mb->ignore_skip_arg = 0;
   6643 mb->mark = mb->nomatch_mark = NULL;     /* In case never set */
   6644 mb->recursive = NULL;                   /* No recursion at top level */
   6645 mb->ovecsave_chain = NULL;              /* No ovecsave blocks yet */
   6646 mb->hitend = FALSE;
   6647 
   6648 /* The name table is needed for finding all the numbers associated with a
   6649 given name, for condition testing. The code follows the name table. */
   6650 
   6651 mb->name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
   6652 mb->name_count = re->name_count;
   6653 mb->name_entry_size = re->name_entry_size;
   6654 mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
   6655 
   6656 /* Limits set in the pattern override the match context only if they are
   6657 smaller. */
   6658 
   6659 mb->match_limit = (mcontext->match_limit < re->limit_match)?
   6660                   mcontext->match_limit : re->limit_match;
   6661 mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
   6662                             mcontext->recursion_limit : re->limit_recursion;
   6663 
   6664 /* Pointers to the individual character tables */
   6665 
   6666 mb->lcc = re->tables + lcc_offset;
   6667 mb->fcc = re->tables + fcc_offset;
   6668 mb->ctypes = re->tables + ctypes_offset;
   6669 
   6670 /* Process the \R and newline settings. */
   6671 
   6672 mb->bsr_convention = re->bsr_convention;
   6673 mb->nltype = NLTYPE_FIXED;
   6674 switch(re->newline_convention)
   6675   {
   6676   case PCRE2_NEWLINE_CR:
   6677   mb->nllen = 1;
   6678   mb->nl[0] = CHAR_CR;
   6679   break;
   6680 
   6681   case PCRE2_NEWLINE_LF:
   6682   mb->nllen = 1;
   6683   mb->nl[0] = CHAR_NL;
   6684   break;
   6685 
   6686   case PCRE2_NEWLINE_CRLF:
   6687   mb->nllen = 2;
   6688   mb->nl[0] = CHAR_CR;
   6689   mb->nl[1] = CHAR_NL;
   6690   break;
   6691 
   6692   case PCRE2_NEWLINE_ANY:
   6693   mb->nltype = NLTYPE_ANY;
   6694   break;
   6695 
   6696   case PCRE2_NEWLINE_ANYCRLF:
   6697   mb->nltype = NLTYPE_ANYCRLF;
   6698   break;
   6699 
   6700   default: return PCRE2_ERROR_INTERNAL;
   6701   }
   6702 
   6703 /* If the expression has got more back references than the offsets supplied can
   6704 hold, we get a temporary chunk of memory to use during the matching. Otherwise,
   6705 we can use the vector supplied. The size of the ovector is three times the
   6706 value in the oveccount field. Two-thirds of it is pairs for storing matching
   6707 offsets, and the top third is working space. */
   6708 
   6709 if (re->top_backref >= match_data->oveccount)
   6710   {
   6711   ocount = re->top_backref * 3 + 3;
   6712   mb->ovector = (PCRE2_SIZE *)(mb->memctl.malloc(ocount * sizeof(PCRE2_SIZE),
   6713     mb->memctl.memory_data));
   6714   if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY;
   6715   using_temporary_offsets = TRUE;
   6716   }
   6717 else
   6718   {
   6719   ocount = 3 * match_data->oveccount;
   6720   mb->ovector = match_data->ovector;
   6721   }
   6722 
   6723 mb->offset_end = ocount;
   6724 mb->offset_max = (2*ocount)/3;
   6725 
   6726 /* Reset the working variable associated with each extraction. These should
   6727 never be used unless previously set, but they get saved and restored, and so we
   6728 initialize them to avoid reading uninitialized locations. Also, unset the
   6729 offsets for the matched string. This is really just for tidiness with callouts,
   6730 in case they inspect these fields. */
   6731 
   6732 if (ocount > 0)
   6733   {
   6734   register PCRE2_SIZE *iptr = mb->ovector + ocount;
   6735   register PCRE2_SIZE *iend = iptr - re->top_bracket;
   6736   if (iend < mb->ovector + 2) iend = mb->ovector + 2;
   6737   while (--iptr >= iend) *iptr = PCRE2_UNSET;
   6738   mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET;
   6739   }
   6740 
   6741 /* Set up the first code unit to match, if available. The first_codeunit value
   6742 is never set for an anchored regular expression, but the anchoring may be
   6743 forced at run time, so we have to test for anchoring. The first code unit may
   6744 be unset for an unanchored pattern, of course. If there's no first code unit
   6745 there may be a bitmap of possible first characters. */
   6746 
   6747 if (!anchored)
   6748   {
   6749   if ((re->flags & PCRE2_FIRSTSET) != 0)
   6750     {
   6751     has_first_cu = TRUE;
   6752     first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
   6753     if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
   6754       {
   6755       first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
   6756 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
   6757       if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
   6758 #endif
   6759       }
   6760     }
   6761   else
   6762     if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
   6763       start_bits = re->start_bitmap;
   6764   }
   6765 
   6766 /* For anchored or unanchored matches, there may be a "last known required
   6767 character" set. */
   6768 
   6769 if ((re->flags & PCRE2_LASTSET) != 0)
   6770   {
   6771   has_req_cu = TRUE;
   6772   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
   6773   if ((re->flags & PCRE2_LASTCASELESS) != 0)
   6774     {
   6775     req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
   6776 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
   6777     if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
   6778 #endif
   6779     }
   6780   }
   6781 
   6782 
   6783 /* ==========================================================================*/
   6784 
   6785 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
   6786 the loop runs just once. */
   6787 
   6788 for(;;)
   6789   {
   6790   PCRE2_SPTR new_start_match;
   6791   mb->capture_last = 0;
   6792 
   6793   /* ----------------- Start of match optimizations ---------------- */
   6794 
   6795   /* There are some optimizations that avoid running the match if a known
   6796   starting point is not found, or if a known later code unit is not present.
   6797   However, there is an option (settable at compile time) that disables these,
   6798   for testing and for ensuring that all callouts do actually occur. */
   6799 
   6800   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
   6801     {
   6802     PCRE2_SPTR save_end_subject = end_subject;
   6803 
   6804     /* If firstline is TRUE, the start of the match is constrained to the first
   6805     line of a multiline string. That is, the match must be before or at the
   6806     first newline. Implement this by temporarily adjusting end_subject so that
   6807     we stop the optimization scans at a newline. If the match fails at the
   6808     newline, later code breaks this loop. */
   6809 
   6810     if (firstline)
   6811       {
   6812       PCRE2_SPTR t = start_match;
   6813 #ifdef SUPPORT_UNICODE
   6814       if (utf)
   6815         {
   6816         while (t < mb->end_subject && !IS_NEWLINE(t))
   6817           {
   6818           t++;
   6819           ACROSSCHAR(t < end_subject, *t, t++);
   6820           }
   6821         }
   6822       else
   6823 #endif
   6824       while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
   6825       end_subject = t;
   6826       }
   6827 
   6828     /* Advance to a unique first code unit if there is one. In 8-bit mode, the
   6829     use of memchr() gives a big speed up. */
   6830 
   6831     if (has_first_cu)
   6832       {
   6833       PCRE2_UCHAR smc;
   6834       if (first_cu != first_cu2)
   6835         while (start_match < end_subject &&
   6836           (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
   6837           start_match++;
   6838       else
   6839         {
   6840 #if PCRE2_CODE_UNIT_WIDTH != 8
   6841         while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
   6842           start_match++;
   6843 #else
   6844         start_match = memchr(start_match, first_cu, end_subject - start_match);
   6845         if (start_match == NULL) start_match = end_subject;
   6846 #endif
   6847         }
   6848       }
   6849 
   6850     /* Or to just after a linebreak for a multiline match */
   6851 
   6852     else if (startline)
   6853       {
   6854       if (start_match > mb->start_subject + start_offset)
   6855         {
   6856 #ifdef SUPPORT_UNICODE
   6857         if (utf)
   6858           {
   6859           while (start_match < end_subject && !WAS_NEWLINE(start_match))
   6860             {
   6861             start_match++;
   6862             ACROSSCHAR(start_match < end_subject, *start_match,
   6863               start_match++);
   6864             }
   6865           }
   6866         else
   6867 #endif
   6868         while (start_match < end_subject && !WAS_NEWLINE(start_match))
   6869           start_match++;
   6870 
   6871         /* If we have just passed a CR and the newline option is ANY or
   6872         ANYCRLF, and we are now at a LF, advance the match position by one more
   6873         code unit. */
   6874 
   6875         if (start_match[-1] == CHAR_CR &&
   6876              (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
   6877              start_match < end_subject &&
   6878              UCHAR21TEST(start_match) == CHAR_NL)
   6879           start_match++;
   6880         }
   6881       }
   6882 
   6883     /* Or to a non-unique first code unit if any have been identified. The
   6884     bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all
   6885     code units greater than 254 set the 255 bit. */
   6886 
   6887     else if (start_bits != NULL)
   6888       {
   6889       while (start_match < end_subject)
   6890         {
   6891         register uint32_t c = UCHAR21TEST(start_match);
   6892 #if PCRE2_CODE_UNIT_WIDTH != 8
   6893         if (c > 255) c = 255;
   6894 #endif
   6895         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
   6896         start_match++;
   6897         }
   6898       }
   6899 
   6900     /* Restore fudged end_subject */
   6901 
   6902     end_subject = save_end_subject;
   6903 
   6904     /* The following two optimizations are disabled for partial matching. */
   6905 
   6906     if (!mb->partial)
   6907       {
   6908       /* The minimum matching length is a lower bound; no actual string of that
   6909       length may actually match the pattern. Although the value is, strictly,
   6910       in characters, we treat it as code units to avoid spending too much time
   6911       in this optimization. */
   6912 
   6913       if (end_subject - start_match < re->minlength)
   6914         {
   6915         rc = MATCH_NOMATCH;
   6916         break;
   6917         }
   6918 
   6919       /* If req_cu is set, we know that that code unit must appear in the
   6920       subject for the match to succeed. If the first code unit is set, req_cu
   6921       must be later in the subject; otherwise the test starts at the match
   6922       point. This optimization can save a huge amount of backtracking in
   6923       patterns with nested unlimited repeats that aren't going to match.
   6924       Writing separate code for cased/caseless versions makes it go faster, as
   6925       does using an autoincrement and backing off on a match.
   6926 
   6927       HOWEVER: when the subject string is very, very long, searching to its end
   6928       can take a long time, and give bad performance on quite ordinary
   6929       patterns. This showed up when somebody was matching something like
   6930       /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
   6931       sufficiently long. */
   6932 
   6933       if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
   6934         {
   6935         register PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
   6936 
   6937         /* We don't need to repeat the search if we haven't yet reached the
   6938         place we found it at last time. */
   6939 
   6940         if (p > req_cu_ptr)
   6941           {
   6942           if (req_cu != req_cu2)
   6943             {
   6944             while (p < end_subject)
   6945               {
   6946               register uint32_t pp = UCHAR21INCTEST(p);
   6947               if (pp == req_cu || pp == req_cu2) { p--; break; }
   6948               }
   6949             }
   6950           else
   6951             {
   6952             while (p < end_subject)
   6953               {
   6954               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
   6955               }
   6956             }
   6957 
   6958           /* If we can't find the required code unit, break the matching loop,
   6959           forcing a match failure. */
   6960 
   6961           if (p >= end_subject)
   6962             {
   6963             rc = MATCH_NOMATCH;
   6964             break;
   6965             }
   6966 
   6967           /* If we have found the required code unit, save the point where we
   6968           found it, so that we don't search again next time round the loop if
   6969           the start hasn't passed this code unit yet. */
   6970 
   6971           req_cu_ptr = p;
   6972           }
   6973         }
   6974       }
   6975     }
   6976 
   6977   /* ------------ End of start of match optimizations ------------ */
   6978 
   6979   /* Give no match if we have passed the bumpalong limit. */
   6980 
   6981   if (start_match > bumpalong_limit)
   6982     {
   6983     rc = MATCH_NOMATCH;
   6984     break;
   6985     }
   6986 
   6987   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
   6988   first starting point for which a partial match was found. */
   6989 
   6990   mb->start_match_ptr = start_match;
   6991   mb->start_used_ptr = start_match;
   6992   mb->last_used_ptr = start_match;
   6993   mb->match_call_count = 0;
   6994   mb->match_function_type = 0;
   6995   mb->end_offset_top = 0;
   6996   mb->skip_arg_count = 0;
   6997   rc = match(start_match, mb->start_code, start_match, 2, mb, NULL, 0);
   6998 
   6999   if (mb->hitend && start_partial == NULL)
   7000     {
   7001     start_partial = mb->start_used_ptr;
   7002     match_partial = start_match;
   7003     }
   7004 
   7005   switch(rc)
   7006     {
   7007     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
   7008     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
   7009     entirely. The only way we can do that is to re-do the match at the same
   7010     point, with a flag to force SKIP with an argument to be ignored. Just
   7011     treating this case as NOMATCH does not work because it does not check other
   7012     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
   7013 
   7014     case MATCH_SKIP_ARG:
   7015     new_start_match = start_match;
   7016     mb->ignore_skip_arg = mb->skip_arg_count;
   7017     break;
   7018 
   7019     /* SKIP passes back the next starting point explicitly, but if it is no
   7020     greater than the match we have just done, treat it as NOMATCH. */
   7021 
   7022     case MATCH_SKIP:
   7023     if (mb->start_match_ptr > start_match)
   7024       {
   7025       new_start_match = mb->start_match_ptr;
   7026       break;
   7027       }
   7028     /* Fall through */
   7029 
   7030     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
   7031     exactly like PRUNE. Unset ignore SKIP-with-argument. */
   7032 
   7033     case MATCH_NOMATCH:
   7034     case MATCH_PRUNE:
   7035     case MATCH_THEN:
   7036     mb->ignore_skip_arg = 0;
   7037     new_start_match = start_match + 1;
   7038 #ifdef SUPPORT_UNICODE
   7039     if (utf)
   7040       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
   7041         new_start_match++);
   7042 #endif
   7043     break;
   7044 
   7045     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
   7046 
   7047     case MATCH_COMMIT:
   7048     rc = MATCH_NOMATCH;
   7049     goto ENDLOOP;
   7050 
   7051     /* Any other return is either a match, or some kind of error. */
   7052 
   7053     default:
   7054     goto ENDLOOP;
   7055     }
   7056 
   7057   /* Control reaches here for the various types of "no match at this point"
   7058   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
   7059 
   7060   rc = MATCH_NOMATCH;
   7061 
   7062   /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
   7063   newline in the subject (though it may continue over the newline). Therefore,
   7064   if we have just failed to match, starting at a newline, do not continue. */
   7065 
   7066   if (firstline && IS_NEWLINE(start_match)) break;
   7067 
   7068   /* Advance to new matching position */
   7069 
   7070   start_match = new_start_match;
   7071 
   7072   /* Break the loop if the pattern is anchored or if we have passed the end of
   7073   the subject. */
   7074 
   7075   if (anchored || start_match > end_subject) break;
   7076 
   7077   /* If we have just passed a CR and we are now at a LF, and the pattern does
   7078   not contain any explicit matches for \r or \n, and the newline option is CRLF
   7079   or ANY or ANYCRLF, advance the match position by one more code unit. In
   7080   normal matching start_match will aways be greater than the first position at
   7081   this stage, but a failed *SKIP can cause a return at the same point, which is
   7082   why the first test exists. */
   7083 
   7084   if (start_match > subject + start_offset &&
   7085       start_match[-1] == CHAR_CR &&
   7086       start_match < end_subject &&
   7087       *start_match == CHAR_NL &&
   7088       (re->flags & PCRE2_HASCRORLF) == 0 &&
   7089         (mb->nltype == NLTYPE_ANY ||
   7090          mb->nltype == NLTYPE_ANYCRLF ||
   7091          mb->nllen == 2))
   7092     start_match++;
   7093 
   7094   mb->mark = NULL;   /* Reset for start of next match attempt */
   7095   }                  /* End of for(;;) "bumpalong" loop */
   7096 
   7097 /* ==========================================================================*/
   7098 
   7099 /* When we reach here, one of the stopping conditions is true:
   7100 
   7101 (1) The match succeeded, either completely, or partially;
   7102 
   7103 (2) The pattern is anchored or the match was failed by (*COMMIT);
   7104 
   7105 (3) We are past the end of the subject or the bumpalong limit;
   7106 
   7107 (4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
   7108     this option requests that a match occur at or before the first newline in
   7109     the subject.
   7110 
   7111 (5) Some kind of error occurred.
   7112 
   7113 */
   7114 
   7115 ENDLOOP:
   7116 
   7117 #ifdef HEAP_MATCH_RECURSE
   7118 release_match_heapframes(&frame_zero, mb);
   7119 #endif
   7120 
   7121 /* Release any frames that were saved from recursions. */
   7122 
   7123 while (mb->ovecsave_chain != NULL)
   7124   {
   7125   ovecsave_frame *this = mb->ovecsave_chain;
   7126   mb->ovecsave_chain = this->next;
   7127   mb->memctl.free(this, mb->memctl.memory_data);
   7128   }
   7129 
   7130 /* Fill in fields that are always returned in the match data. */
   7131 
   7132 match_data->code = re;
   7133 match_data->subject = subject;
   7134 match_data->mark = mb->mark;
   7135 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
   7136 
   7137 /* Handle a fully successful match. */
   7138 
   7139 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   7140   {
   7141   uint32_t arg_offset_max = 2 * match_data->oveccount;
   7142 
   7143   /* When the offset vector is big enough to deal with any backreferences,
   7144   captured substring offsets will already be set up. In the case where we had
   7145   to get some local memory to hold offsets for backreference processing, copy
   7146   those that we can. In this case there need not be overflow if certain parts
   7147   of the pattern were not used, even though there are more capturing
   7148   parentheses than vector slots. */
   7149 
   7150   if (using_temporary_offsets)
   7151     {
   7152     if (arg_offset_max >= 4)
   7153       {
   7154       memcpy(match_data->ovector + 2, mb->ovector + 2,
   7155         (arg_offset_max - 2) * sizeof(PCRE2_SIZE));
   7156       }
   7157     if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT;
   7158     mb->memctl.free(mb->ovector, mb->memctl.memory_data);
   7159     }
   7160 
   7161   /* Set the return code to the number of captured strings, or 0 if there were
   7162   too many to fit into the ovector. */
   7163 
   7164   match_data->rc = ((mb->capture_last & OVFLBIT) != 0)?
   7165     0 : mb->end_offset_top/2;
   7166 
   7167   /* If there is space in the offset vector, set any pairs that follow the
   7168   highest-numbered captured string but are less than the number of capturing
   7169   groups in the pattern (and are within the ovector) to PCRE2_UNSET. It is
   7170   documented that this happens. In earlier versions, the whole set of potential
   7171   capturing offsets was initialized each time round the loop, but this is
   7172   handled differently now. "Gaps" are set to PCRE2_UNSET dynamically instead
   7173   (this fixed a bug). Thus, it is only those at the end that need setting here.
   7174   We can't just mark them all unset at the start of the whole thing because
   7175   they may get set in one branch that is not the final matching branch. */
   7176 
   7177   if (mb->end_offset_top/2 <= re->top_bracket)
   7178     {
   7179     register PCRE2_SIZE *iptr, *iend;
   7180     int resetcount = re->top_bracket + 1;
   7181     if (resetcount > match_data->oveccount) resetcount = match_data->oveccount;
   7182     iptr = match_data->ovector + mb->end_offset_top;
   7183     iend = match_data->ovector + 2 * resetcount;
   7184     while (iptr < iend) *iptr++ = PCRE2_UNSET;
   7185     }
   7186 
   7187   /* If there is space, set up the whole thing as substring 0. The value of
   7188   mb->start_match_ptr might be modified if \K was encountered on the success
   7189   matching path. */
   7190 
   7191   if (match_data->oveccount < 1) rc = 0; else
   7192     {
   7193     match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
   7194     match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
   7195     }
   7196 
   7197   /* Set the remaining returned values */
   7198 
   7199   match_data->startchar = start_match - subject;
   7200   match_data->leftchar = mb->start_used_ptr - subject;
   7201   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
   7202     mb->last_used_ptr : mb->end_match_ptr) - subject;
   7203   return match_data->rc;
   7204   }
   7205 
   7206 /* Control gets here if there has been a partial match, an error, or if the
   7207 overall match attempt has failed at all permitted starting positions. Any mark
   7208 data is in the nomatch_mark field. */
   7209 
   7210 match_data->mark = mb->nomatch_mark;
   7211 
   7212 /* For anything other than nomatch or partial match, just return the code. */
   7213 
   7214 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
   7215   match_data->rc = rc;
   7216 
   7217 /* Else handle a partial match. */
   7218 
   7219 else if (match_partial != NULL)
   7220   {
   7221   if (match_data->oveccount > 0)
   7222     {
   7223     match_data->ovector[0] = match_partial - subject;
   7224     match_data->ovector[1] = end_subject - subject;
   7225     }
   7226   match_data->startchar = match_partial - subject;
   7227   match_data->leftchar = start_partial - subject;
   7228   match_data->rightchar = end_subject - subject;
   7229   match_data->rc = PCRE2_ERROR_PARTIAL;
   7230   }
   7231 
   7232 /* Else this is the classic nomatch case. */
   7233 
   7234 else match_data->rc = PCRE2_ERROR_NOMATCH;
   7235 
   7236 /* Free any temporary offsets. */
   7237 
   7238 if (using_temporary_offsets)
   7239   mb->memctl.free(mb->ovector, mb->memctl.memory_data);
   7240 return match_data->rc;
   7241 }
   7242 
   7243 /* End of pcre2_match.c */
   7244