Home | History | Annotate | Download | only in dist
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9            Copyright (c) 1997-2014 University of Cambridge
     10 
     11 -----------------------------------------------------------------------------
     12 Redistribution and use in source and binary forms, with or without
     13 modification, are permitted provided that the following conditions are met:
     14 
     15     * Redistributions of source code must retain the above copyright notice,
     16       this list of conditions and the following disclaimer.
     17 
     18     * Redistributions in binary form must reproduce the above copyright
     19       notice, this list of conditions and the following disclaimer in the
     20       documentation and/or other materials provided with the distribution.
     21 
     22     * Neither the name of the University of Cambridge nor the names of its
     23       contributors may be used to endorse or promote products derived from
     24       this software without specific prior written permission.
     25 
     26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36 POSSIBILITY OF SUCH DAMAGE.
     37 -----------------------------------------------------------------------------
     38 */
     39 
     40 /* This module contains pcre_exec(), the externally visible function that does
     41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
     42 possible. There are also some static supporting functions. */
     43 
     44 #ifdef HAVE_CONFIG_H
     45 #include "config.h"
     46 #endif
     47 
     48 #define NLBLOCK md             /* Block containing newline information */
     49 #define PSSTART start_subject  /* Field containing processed string start */
     50 #define PSEND   end_subject    /* Field containing processed string end */
     51 
     52 #include "pcre_internal.h"
     53 
     54 /* Undefine some potentially clashing cpp symbols */
     55 
     56 #undef min
     57 #undef max
     58 
     59 /* The md->capture_last field uses the lower 16 bits for the last captured
     60 substring (which can never be greater than 65535) and a bit in the top half
     61 to mean "capture vector overflowed". This odd way of doing things was
     62 implemented when it was realized that preserving and restoring the overflow bit
     63 whenever the last capture number was saved/restored made for a neater
     64 interface, and doing it this way saved on (a) another variable, which would
     65 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
     66 separate set of save/restore instructions. The following defines are used in
     67 implementing this. */
     68 
     69 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
     70 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
     71 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
     72 
     73 /* Values for setting in md->match_function_type to indicate two special types
     74 of call to match(). We do it this way to save on using another stack variable,
     75 as stack usage is to be discouraged. */
     76 
     77 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
     78 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
     79 
     80 /* Non-error returns from the match() function. Error returns are externally
     81 defined PCRE_ERROR_xxx codes, which are all negative. */
     82 
     83 #define MATCH_MATCH        1
     84 #define MATCH_NOMATCH      0
     85 
     86 /* Special internal returns from the match() function. Make them sufficiently
     87 negative to avoid the external error codes. */
     88 
     89 #define MATCH_ACCEPT       (-999)
     90 #define MATCH_KETRPOS      (-998)
     91 #define MATCH_ONCE         (-997)
     92 /* The next 5 must be kept together and in sequence so that a test that checks
     93 for any one of them can use a range. */
     94 #define MATCH_COMMIT       (-996)
     95 #define MATCH_PRUNE        (-995)
     96 #define MATCH_SKIP         (-994)
     97 #define MATCH_SKIP_ARG     (-993)
     98 #define MATCH_THEN         (-992)
     99 #define MATCH_BACKTRACK_MAX MATCH_THEN
    100 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
    101 
    102 /* Maximum number of ints of offset to save on the stack for recursive calls.
    103 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
    104 because the offset vector is always a multiple of 3 long. */
    105 
    106 #define REC_STACK_SAVE_MAX 30
    107 
    108 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
    109 
    110 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
    111 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
    112 
    113 #ifdef PCRE_DEBUG
    114 /*************************************************
    115 *        Debugging function to print chars       *
    116 *************************************************/
    117 
    118 /* Print a sequence of chars in printable format, stopping at the end of the
    119 subject if the requested.
    120 
    121 Arguments:
    122   p           points to characters
    123   length      number to print
    124   is_subject  TRUE if printing from within md->start_subject
    125   md          pointer to matching data block, if is_subject is TRUE
    126 
    127 Returns:     nothing
    128 */
    129 
    130 static void
    131 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
    132 {
    133 pcre_uint32 c;
    134 BOOL utf = md->utf;
    135 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
    136 while (length-- > 0)
    137   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
    138 }
    139 #endif
    140 
    141 
    142 
    143 /*************************************************
    144 *          Match a back-reference                *
    145 *************************************************/
    146 
    147 /* Normally, if a back reference hasn't been set, the length that is passed is
    148 negative, so the match always fails. However, in JavaScript compatibility mode,
    149 the length passed is zero. Note that in caseless UTF-8 mode, the number of
    150 subject bytes matched may be different to the number of reference bytes.
    151 
    152 Arguments:
    153   offset      index into the offset vector
    154   eptr        pointer into the subject
    155   length      length of reference to be matched (number of bytes)
    156   md          points to match data block
    157   caseless    TRUE if caseless
    158 
    159 Returns:      >= 0 the number of subject bytes matched
    160               -1 no match
    161               -2 partial match; always given if at end subject
    162 */
    163 
    164 static int
    165 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
    166   BOOL caseless)
    167 {
    168 PCRE_PUCHAR eptr_start = eptr;
    169 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
    170 #if defined SUPPORT_UTF && defined SUPPORT_UCP
    171 BOOL utf = md->utf;
    172 #endif
    173 
    174 #ifdef PCRE_DEBUG
    175 if (eptr >= md->end_subject)
    176   printf("matching subject <null>");
    177 else
    178   {
    179   printf("matching subject ");
    180   pchars(eptr, length, TRUE, md);
    181   }
    182 printf(" against backref ");
    183 pchars(p, length, FALSE, md);
    184 printf("\n");
    185 #endif
    186 
    187 /* Always fail if reference not set (and not JavaScript compatible - in that
    188 case the length is passed as zero). */
    189 
    190 if (length < 0) return -1;
    191 
    192 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
    193 properly if Unicode properties are supported. Otherwise, we can check only
    194 ASCII characters. */
    195 
    196 if (caseless)
    197   {
    198 #if defined SUPPORT_UTF && defined SUPPORT_UCP
    199   if (utf)
    200     {
    201     /* Match characters up to the end of the reference. NOTE: the number of
    202     data units matched may differ, because in UTF-8 there are some characters
    203     whose upper and lower case versions code have different numbers of bytes.
    204     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
    205     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
    206     sequence of two of the latter. It is important, therefore, to check the
    207     length along the reference, not along the subject (earlier code did this
    208     wrong). */
    209 
    210     PCRE_PUCHAR endptr = p + length;
    211     while (p < endptr)
    212       {
    213       pcre_uint32 c, d;
    214       const ucd_record *ur;
    215       if (eptr >= md->end_subject) return -2;   /* Partial match */
    216       GETCHARINC(c, eptr);
    217       GETCHARINC(d, p);
    218       ur = GET_UCD(d);
    219       if (c != d && c != d + ur->other_case)
    220         {
    221         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
    222         for (;;)
    223           {
    224           if (c < *pp) return -1;
    225           if (c == *pp++) break;
    226           }
    227         }
    228       }
    229     }
    230   else
    231 #endif
    232 
    233   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
    234   is no UCP support. */
    235     {
    236     while (length-- > 0)
    237       {
    238       pcre_uint32 cc, cp;
    239       if (eptr >= md->end_subject) return -2;   /* Partial match */
    240       cc = UCHAR21TEST(eptr);
    241       cp = UCHAR21TEST(p);
    242       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
    243       p++;
    244       eptr++;
    245       }
    246     }
    247   }
    248 
    249 /* In the caseful case, we can just compare the bytes, whether or not we
    250 are in UTF-8 mode. */
    251 
    252 else
    253   {
    254   while (length-- > 0)
    255     {
    256     if (eptr >= md->end_subject) return -2;   /* Partial match */
    257     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
    258     }
    259   }
    260 
    261 return (int)(eptr - eptr_start);
    262 }
    263 
    264 
    265 
    266 /***************************************************************************
    267 ****************************************************************************
    268                    RECURSION IN THE match() FUNCTION
    269 
    270 The match() function is highly recursive, though not every recursive call
    271 increases the recursive depth. Nevertheless, some regular expressions can cause
    272 it to recurse to a great depth. I was writing for Unix, so I just let it call
    273 itself recursively. This uses the stack for saving everything that has to be
    274 saved for a recursive call. On Unix, the stack can be large, and this works
    275 fine.
    276 
    277 It turns out that on some non-Unix-like systems there are problems with
    278 programs that use a lot of stack. (This despite the fact that every last chip
    279 has oodles of memory these days, and techniques for extending the stack have
    280 been known for decades.) So....
    281 
    282 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
    283 calls by keeping local variables that need to be preserved in blocks of memory
    284 obtained from malloc() instead instead of on the stack. Macros are used to
    285 achieve this so that the actual code doesn't look very different to what it
    286 always used to.
    287 
    288 The original heap-recursive code used longjmp(). However, it seems that this
    289 can be very slow on some operating systems. Following a suggestion from Stan
    290 Switzer, the use of longjmp() has been abolished, at the cost of having to
    291 provide a unique number for each call to RMATCH. There is no way of generating
    292 a sequence of numbers at compile time in C. I have given them names, to make
    293 them stand out more clearly.
    294 
    295 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
    296 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
    297 tests. Furthermore, not using longjmp() means that local dynamic variables
    298 don't have indeterminate values; this has meant that the frame size can be
    299 reduced because the result can be "passed back" by straight setting of the
    300 variable instead of being passed in the frame.
    301 ****************************************************************************
    302 ***************************************************************************/
    303 
    304 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
    305 below must be updated in sync.  */
    306 
    307 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
    308        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
    309        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
    310        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
    311        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
    312        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
    313        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
    314 
    315 /* These versions of the macros use the stack, as normal. There are debugging
    316 versions and production versions. Note that the "rw" argument of RMATCH isn't
    317 actually used in this definition. */
    318 
    319 #ifndef NO_RECURSE
    320 #define REGISTER register
    321 
    322 #ifdef PCRE_DEBUG
    323 #define RMATCH(ra,rb,rc,rd,re,rw) \
    324   { \
    325   printf("match() called in line %d\n", __LINE__); \
    326   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
    327   printf("to line %d\n", __LINE__); \
    328   }
    329 #define RRETURN(ra) \
    330   { \
    331   printf("match() returned %d from line %d\n", ra, __LINE__); \
    332   return ra; \
    333   }
    334 #else
    335 #define RMATCH(ra,rb,rc,rd,re,rw) \
    336   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
    337 #define RRETURN(ra) return ra
    338 #endif
    339 
    340 #else
    341 
    342 
    343 /* These versions of the macros manage a private stack on the heap. Note that
    344 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
    345 argument of match(), which never changes. */
    346 
    347 #define REGISTER
    348 
    349 #define RMATCH(ra,rb,rc,rd,re,rw)\
    350   {\
    351   heapframe *newframe = frame->Xnextframe;\
    352   if (newframe == NULL)\
    353     {\
    354     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
    355     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
    356     newframe->Xnextframe = NULL;\
    357     frame->Xnextframe = newframe;\
    358     }\
    359   frame->Xwhere = rw;\
    360   newframe->Xeptr = ra;\
    361   newframe->Xecode = rb;\
    362   newframe->Xmstart = mstart;\
    363   newframe->Xoffset_top = rc;\
    364   newframe->Xeptrb = re;\
    365   newframe->Xrdepth = frame->Xrdepth + 1;\
    366   newframe->Xprevframe = frame;\
    367   frame = newframe;\
    368   DPRINTF(("restarting from line %d\n", __LINE__));\
    369   goto HEAP_RECURSE;\
    370   L_##rw:\
    371   DPRINTF(("jumped back to line %d\n", __LINE__));\
    372   }
    373 
    374 #define RRETURN(ra)\
    375   {\
    376   heapframe *oldframe = frame;\
    377   frame = oldframe->Xprevframe;\
    378   if (frame != NULL)\
    379     {\
    380     rrc = ra;\
    381     goto HEAP_RETURN;\
    382     }\
    383   return ra;\
    384   }
    385 
    386 
    387 /* Structure for remembering the local variables in a private frame */
    388 
    389 typedef struct heapframe {
    390   struct heapframe *Xprevframe;
    391   struct heapframe *Xnextframe;
    392 
    393   /* Function arguments that may change */
    394 
    395   PCRE_PUCHAR Xeptr;
    396   const pcre_uchar *Xecode;
    397   PCRE_PUCHAR Xmstart;
    398   int Xoffset_top;
    399   eptrblock *Xeptrb;
    400   unsigned int Xrdepth;
    401 
    402   /* Function local variables */
    403 
    404   PCRE_PUCHAR Xcallpat;
    405 #ifdef SUPPORT_UTF
    406   PCRE_PUCHAR Xcharptr;
    407 #endif
    408   PCRE_PUCHAR Xdata;
    409   PCRE_PUCHAR Xnext;
    410   PCRE_PUCHAR Xpp;
    411   PCRE_PUCHAR Xprev;
    412   PCRE_PUCHAR Xsaved_eptr;
    413 
    414   recursion_info Xnew_recursive;
    415 
    416   BOOL Xcur_is_word;
    417   BOOL Xcondition;
    418   BOOL Xprev_is_word;
    419 
    420 #ifdef SUPPORT_UCP
    421   int Xprop_type;
    422   unsigned int Xprop_value;
    423   int Xprop_fail_result;
    424   int Xoclength;
    425   pcre_uchar Xocchars[6];
    426 #endif
    427 
    428   int Xcodelink;
    429   int Xctype;
    430   unsigned int Xfc;
    431   int Xfi;
    432   int Xlength;
    433   int Xmax;
    434   int Xmin;
    435   unsigned int Xnumber;
    436   int Xoffset;
    437   unsigned int Xop;
    438   pcre_int32 Xsave_capture_last;
    439   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
    440   int Xstacksave[REC_STACK_SAVE_MAX];
    441 
    442   eptrblock Xnewptrb;
    443 
    444   /* Where to jump back to */
    445 
    446   int Xwhere;
    447 
    448 } heapframe;
    449 
    450 #endif
    451 
    452 
    453 /***************************************************************************
    454 ***************************************************************************/
    455 
    456 
    457 
    458 /*************************************************
    459 *         Match from current position            *
    460 *************************************************/
    461 
    462 /* This function is called recursively in many circumstances. Whenever it
    463 returns a negative (error) response, the outer incarnation must also return the
    464 same response. */
    465 
    466 /* These macros pack up tests that are used for partial matching, and which
    467 appear several times in the code. We set the "hit end" flag if the pointer is
    468 at the end of the subject and also past the start of the subject (i.e.
    469 something has been matched). For hard partial matching, we then return
    470 immediately. The second one is used when we already know we are past the end of
    471 the subject. */
    472 
    473 #define CHECK_PARTIAL()\
    474   if (md->partial != 0 && eptr >= md->end_subject && \
    475       eptr > md->start_used_ptr) \
    476     { \
    477     md->hitend = TRUE; \
    478     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
    479     }
    480 
    481 #define SCHECK_PARTIAL()\
    482   if (md->partial != 0 && eptr > md->start_used_ptr) \
    483     { \
    484     md->hitend = TRUE; \
    485     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
    486     }
    487 
    488 
    489 /* Performance note: It might be tempting to extract commonly used fields from
    490 the md structure (e.g. utf, end_subject) into individual variables to improve
    491 performance. Tests using gcc on a SPARC disproved this; in the first case, it
    492 made performance worse.
    493 
    494 Arguments:
    495    eptr        pointer to current character in subject
    496    ecode       pointer to current position in compiled code
    497    mstart      pointer to the current match start position (can be modified
    498                  by encountering \K)
    499    offset_top  current top pointer
    500    md          pointer to "static" info for the match
    501    eptrb       pointer to chain of blocks containing eptr at start of
    502                  brackets - for testing for empty matches
    503    rdepth      the recursion depth
    504 
    505 Returns:       MATCH_MATCH if matched            )  these values are >= 0
    506                MATCH_NOMATCH if failed to match  )
    507                a negative MATCH_xxx value for PRUNE, SKIP, etc
    508                a negative PCRE_ERROR_xxx value if aborted by an error condition
    509                  (e.g. stopped by repeated call or recursion limit)
    510 */
    511 
    512 static int
    513 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
    514   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
    515   unsigned int rdepth)
    516 {
    517 /* These variables do not need to be preserved over recursion in this function,
    518 so they can be ordinary variables in all cases. Mark some of them with
    519 "register" because they are used a lot in loops. */
    520 
    521 register int  rrc;         /* Returns from recursive calls */
    522 register int  i;           /* Used for loops not involving calls to RMATCH() */
    523 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
    524 register BOOL utf;         /* Local copy of UTF flag for speed */
    525 
    526 BOOL minimize, possessive; /* Quantifier options */
    527 BOOL caseless;
    528 int condcode;
    529 
    530 /* When recursion is not being used, all "local" variables that have to be
    531 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
    532 frame on the stack here; subsequent instantiations are obtained from the heap
    533 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
    534 the top-level on the stack rather than malloc-ing them all gives a performance
    535 boost in many cases where there is not much "recursion". */
    536 
    537 #ifdef NO_RECURSE
    538 heapframe *frame = (heapframe *)md->match_frames_base;
    539 
    540 /* Copy in the original argument variables */
    541 
    542 frame->Xeptr = eptr;
    543 frame->Xecode = ecode;
    544 frame->Xmstart = mstart;
    545 frame->Xoffset_top = offset_top;
    546 frame->Xeptrb = eptrb;
    547 frame->Xrdepth = rdepth;
    548 
    549 /* This is where control jumps back to to effect "recursion" */
    550 
    551 HEAP_RECURSE:
    552 
    553 /* Macros make the argument variables come from the current frame */
    554 
    555 #define eptr               frame->Xeptr
    556 #define ecode              frame->Xecode
    557 #define mstart             frame->Xmstart
    558 #define offset_top         frame->Xoffset_top
    559 #define eptrb              frame->Xeptrb
    560 #define rdepth             frame->Xrdepth
    561 
    562 /* Ditto for the local variables */
    563 
    564 #ifdef SUPPORT_UTF
    565 #define charptr            frame->Xcharptr
    566 #endif
    567 #define callpat            frame->Xcallpat
    568 #define codelink           frame->Xcodelink
    569 #define data               frame->Xdata
    570 #define next               frame->Xnext
    571 #define pp                 frame->Xpp
    572 #define prev               frame->Xprev
    573 #define saved_eptr         frame->Xsaved_eptr
    574 
    575 #define new_recursive      frame->Xnew_recursive
    576 
    577 #define cur_is_word        frame->Xcur_is_word
    578 #define condition          frame->Xcondition
    579 #define prev_is_word       frame->Xprev_is_word
    580 
    581 #ifdef SUPPORT_UCP
    582 #define prop_type          frame->Xprop_type
    583 #define prop_value         frame->Xprop_value
    584 #define prop_fail_result   frame->Xprop_fail_result
    585 #define oclength           frame->Xoclength
    586 #define occhars            frame->Xocchars
    587 #endif
    588 
    589 #define ctype              frame->Xctype
    590 #define fc                 frame->Xfc
    591 #define fi                 frame->Xfi
    592 #define length             frame->Xlength
    593 #define max                frame->Xmax
    594 #define min                frame->Xmin
    595 #define number             frame->Xnumber
    596 #define offset             frame->Xoffset
    597 #define op                 frame->Xop
    598 #define save_capture_last  frame->Xsave_capture_last
    599 #define save_offset1       frame->Xsave_offset1
    600 #define save_offset2       frame->Xsave_offset2
    601 #define save_offset3       frame->Xsave_offset3
    602 #define stacksave          frame->Xstacksave
    603 
    604 #define newptrb            frame->Xnewptrb
    605 
    606 /* When recursion is being used, local variables are allocated on the stack and
    607 get preserved during recursion in the normal way. In this environment, fi and
    608 i, and fc and c, can be the same variables. */
    609 
    610 #else         /* NO_RECURSE not defined */
    611 #define fi i
    612 #define fc c
    613 
    614 /* Many of the following variables are used only in small blocks of the code.
    615 My normal style of coding would have declared them within each of those blocks.
    616 However, in order to accommodate the version of this code that uses an external
    617 "stack" implemented on the heap, it is easier to declare them all here, so the
    618 declarations can be cut out in a block. The only declarations within blocks
    619 below are for variables that do not have to be preserved over a recursive call
    620 to RMATCH(). */
    621 
    622 #ifdef SUPPORT_UTF
    623 const pcre_uchar *charptr;
    624 #endif
    625 const pcre_uchar *callpat;
    626 const pcre_uchar *data;
    627 const pcre_uchar *next;
    628 PCRE_PUCHAR       pp;
    629 const pcre_uchar *prev;
    630 PCRE_PUCHAR       saved_eptr;
    631 
    632 recursion_info new_recursive;
    633 
    634 BOOL cur_is_word;
    635 BOOL condition;
    636 BOOL prev_is_word;
    637 
    638 #ifdef SUPPORT_UCP
    639 int prop_type;
    640 unsigned int prop_value;
    641 int prop_fail_result;
    642 int oclength;
    643 pcre_uchar occhars[6];
    644 #endif
    645 
    646 int codelink;
    647 int ctype;
    648 int length;
    649 int max;
    650 int min;
    651 unsigned int number;
    652 int offset;
    653 unsigned int op;
    654 pcre_int32 save_capture_last;
    655 int save_offset1, save_offset2, save_offset3;
    656 int stacksave[REC_STACK_SAVE_MAX];
    657 
    658 eptrblock newptrb;
    659 
    660 /* There is a special fudge for calling match() in a way that causes it to
    661 measure the size of its basic stack frame when the stack is being used for
    662 recursion. The second argument (ecode) being NULL triggers this behaviour. It
    663 cannot normally ever be NULL. The return is the negated value of the frame
    664 size. */
    665 
    666 if (ecode == NULL)
    667   {
    668   if (rdepth == 0)
    669     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
    670   else
    671     {
    672     int len = (char *)&rdepth - (char *)eptr;
    673     return (len > 0)? -len : len;
    674     }
    675   }
    676 #endif     /* NO_RECURSE */
    677 
    678 /* To save space on the stack and in the heap frame, I have doubled up on some
    679 of the local variables that are used only in localised parts of the code, but
    680 still need to be preserved over recursive calls of match(). These macros define
    681 the alternative names that are used. */
    682 
    683 #define allow_zero    cur_is_word
    684 #define cbegroup      condition
    685 #define code_offset   codelink
    686 #define condassert    condition
    687 #define matched_once  prev_is_word
    688 #define foc           number
    689 #define save_mark     data
    690 
    691 /* These statements are here to stop the compiler complaining about unitialized
    692 variables. */
    693 
    694 #ifdef SUPPORT_UCP
    695 prop_value = 0;
    696 prop_fail_result = 0;
    697 #endif
    698 
    699 
    700 /* This label is used for tail recursion, which is used in a few cases even
    701 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
    702 used. Thanks to Ian Taylor for noticing this possibility and sending the
    703 original patch. */
    704 
    705 TAIL_RECURSE:
    706 
    707 /* OK, now we can get on with the real code of the function. Recursive calls
    708 are specified by the macro RMATCH and RRETURN is used to return. When
    709 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
    710 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
    711 defined). However, RMATCH isn't like a function call because it's quite a
    712 complicated macro. It has to be used in one particular way. This shouldn't,
    713 however, impact performance when true recursion is being used. */
    714 
    715 #ifdef SUPPORT_UTF
    716 utf = md->utf;       /* Local copy of the flag */
    717 #else
    718 utf = FALSE;
    719 #endif
    720 
    721 /* First check that we haven't called match() too many times, or that we
    722 haven't exceeded the recursive call limit. */
    723 
    724 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
    725 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
    726 
    727 /* At the start of a group with an unlimited repeat that may match an empty
    728 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
    729 done this way to save having to use another function argument, which would take
    730 up space on the stack. See also MATCH_CONDASSERT below.
    731 
    732 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
    733 such remembered pointers, to be checked when we hit the closing ket, in order
    734 to break infinite loops that match no characters. When match() is called in
    735 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
    736 NOT be used with tail recursion, because the memory block that is used is on
    737 the stack, so a new one may be required for each match(). */
    738 
    739 if (md->match_function_type == MATCH_CBEGROUP)
    740   {
    741   newptrb.epb_saved_eptr = eptr;
    742   newptrb.epb_prev = eptrb;
    743   eptrb = &newptrb;
    744   md->match_function_type = 0;
    745   }
    746 
    747 /* Now start processing the opcodes. */
    748 
    749 for (;;)
    750   {
    751   minimize = possessive = FALSE;
    752   op = *ecode;
    753 
    754   switch(op)
    755     {
    756     case OP_MARK:
    757     md->nomatch_mark = ecode + 2;
    758     md->mark = NULL;    /* In case previously set by assertion */
    759     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
    760       eptrb, RM55);
    761     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    762          md->mark == NULL) md->mark = ecode + 2;
    763 
    764     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
    765     argument, and we must check whether that argument matches this MARK's
    766     argument. It is passed back in md->start_match_ptr (an overloading of that
    767     variable). If it does match, we reset that variable to the current subject
    768     position and return MATCH_SKIP. Otherwise, pass back the return code
    769     unaltered. */
    770 
    771     else if (rrc == MATCH_SKIP_ARG &&
    772         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
    773       {
    774       md->start_match_ptr = eptr;
    775       RRETURN(MATCH_SKIP);
    776       }
    777     RRETURN(rrc);
    778 
    779     case OP_FAIL:
    780     RRETURN(MATCH_NOMATCH);
    781 
    782     case OP_COMMIT:
    783     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    784       eptrb, RM52);
    785     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    786     RRETURN(MATCH_COMMIT);
    787 
    788     case OP_PRUNE:
    789     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    790       eptrb, RM51);
    791     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    792     RRETURN(MATCH_PRUNE);
    793 
    794     case OP_PRUNE_ARG:
    795     md->nomatch_mark = ecode + 2;
    796     md->mark = NULL;    /* In case previously set by assertion */
    797     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
    798       eptrb, RM56);
    799     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    800          md->mark == NULL) md->mark = ecode + 2;
    801     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    802     RRETURN(MATCH_PRUNE);
    803 
    804     case OP_SKIP:
    805     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    806       eptrb, RM53);
    807     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    808     md->start_match_ptr = eptr;   /* Pass back current position */
    809     RRETURN(MATCH_SKIP);
    810 
    811     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
    812     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
    813     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
    814     that failed and any that precede it (either they also failed, or were not
    815     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
    816     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
    817     set to the count of the one that failed. */
    818 
    819     case OP_SKIP_ARG:
    820     md->skip_arg_count++;
    821     if (md->skip_arg_count <= md->ignore_skip_arg)
    822       {
    823       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
    824       break;
    825       }
    826     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
    827       eptrb, RM57);
    828     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    829 
    830     /* Pass back the current skip name by overloading md->start_match_ptr and
    831     returning the special MATCH_SKIP_ARG return code. This will either be
    832     caught by a matching MARK, or get to the top, where it causes a rematch
    833     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
    834 
    835     md->start_match_ptr = ecode + 2;
    836     RRETURN(MATCH_SKIP_ARG);
    837 
    838     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
    839     the branch in which it occurs can be determined. Overload the start of
    840     match pointer to do this. */
    841 
    842     case OP_THEN:
    843     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    844       eptrb, RM54);
    845     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    846     md->start_match_ptr = ecode;
    847     RRETURN(MATCH_THEN);
    848 
    849     case OP_THEN_ARG:
    850     md->nomatch_mark = ecode + 2;
    851     md->mark = NULL;    /* In case previously set by assertion */
    852     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
    853       md, eptrb, RM58);
    854     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    855          md->mark == NULL) md->mark = ecode + 2;
    856     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    857     md->start_match_ptr = ecode;
    858     RRETURN(MATCH_THEN);
    859 
    860     /* Handle an atomic group that does not contain any capturing parentheses.
    861     This can be handled like an assertion. Prior to 8.13, all atomic groups
    862     were handled this way. In 8.13, the code was changed as below for ONCE, so
    863     that backups pass through the group and thereby reset captured values.
    864     However, this uses a lot more stack, so in 8.20, atomic groups that do not
    865     contain any captures generate OP_ONCE_NC, which can be handled in the old,
    866     less stack intensive way.
    867 
    868     Check the alternative branches in turn - the matching won't pass the KET
    869     for this kind of subpattern. If any one branch matches, we carry on as at
    870     the end of a normal bracket, leaving the subject pointer, but resetting
    871     the start-of-match value in case it was changed by \K. */
    872 
    873     case OP_ONCE_NC:
    874     prev = ecode;
    875     saved_eptr = eptr;
    876     save_mark = md->mark;
    877     do
    878       {
    879       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
    880       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
    881         {
    882         mstart = md->start_match_ptr;
    883         break;
    884         }
    885       if (rrc == MATCH_THEN)
    886         {
    887         next = ecode + GET(ecode,1);
    888         if (md->start_match_ptr < next &&
    889             (*ecode == OP_ALT || *next == OP_ALT))
    890           rrc = MATCH_NOMATCH;
    891         }
    892 
    893       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    894       ecode += GET(ecode,1);
    895       md->mark = save_mark;
    896       }
    897     while (*ecode == OP_ALT);
    898 
    899     /* If hit the end of the group (which could be repeated), fail */
    900 
    901     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
    902 
    903     /* Continue as from after the group, updating the offsets high water
    904     mark, since extracts may have been taken. */
    905 
    906     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
    907 
    908     offset_top = md->end_offset_top;
    909     eptr = md->end_match_ptr;
    910 
    911     /* For a non-repeating ket, just continue at this level. This also
    912     happens for a repeating ket if no characters were matched in the group.
    913     This is the forcible breaking of infinite loops as implemented in Perl
    914     5.005. */
    915 
    916     if (*ecode == OP_KET || eptr == saved_eptr)
    917       {
    918       ecode += 1+LINK_SIZE;
    919       break;
    920       }
    921 
    922     /* The repeating kets try the rest of the pattern or restart from the
    923     preceding bracket, in the appropriate order. The second "call" of match()
    924     uses tail recursion, to avoid using another stack frame. */
    925 
    926     if (*ecode == OP_KETRMIN)
    927       {
    928       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
    929       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    930       ecode = prev;
    931       goto TAIL_RECURSE;
    932       }
    933     else  /* OP_KETRMAX */
    934       {
    935       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
    936       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    937       ecode += 1 + LINK_SIZE;
    938       goto TAIL_RECURSE;
    939       }
    940     /* Control never gets here */
    941 
    942     /* Handle a capturing bracket, other than those that are possessive with an
    943     unlimited repeat. If there is space in the offset vector, save the current
    944     subject position in the working slot at the top of the vector. We mustn't
    945     change the current values of the data slot, because they may be set from a
    946     previous iteration of this group, and be referred to by a reference inside
    947     the group. A failure to match might occur after the group has succeeded,
    948     if something later on doesn't match. For this reason, we need to restore
    949     the working value and also the values of the final offsets, in case they
    950     were set by a previous iteration of the same bracket.
    951 
    952     If there isn't enough space in the offset vector, treat this as if it were
    953     a non-capturing bracket. Don't worry about setting the flag for the error
    954     case here; that is handled in the code for KET. */
    955 
    956     case OP_CBRA:
    957     case OP_SCBRA:
    958     number = GET2(ecode, 1+LINK_SIZE);
    959     offset = number << 1;
    960 
    961 #ifdef PCRE_DEBUG
    962     printf("start bracket %d\n", number);
    963     printf("subject=");
    964     pchars(eptr, 16, TRUE, md);
    965     printf("\n");
    966 #endif
    967 
    968     if (offset < md->offset_max)
    969       {
    970       save_offset1 = md->offset_vector[offset];
    971       save_offset2 = md->offset_vector[offset+1];
    972       save_offset3 = md->offset_vector[md->offset_end - number];
    973       save_capture_last = md->capture_last;
    974       save_mark = md->mark;
    975 
    976       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
    977       md->offset_vector[md->offset_end - number] =
    978         (int)(eptr - md->start_subject);
    979 
    980       for (;;)
    981         {
    982         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
    983         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    984           eptrb, RM1);
    985         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
    986 
    987         /* If we backed up to a THEN, check whether it is within the current
    988         branch by comparing the address of the THEN that is passed back with
    989         the end of the branch. If it is within the current branch, and the
    990         branch is one of two or more alternatives (it either starts or ends
    991         with OP_ALT), we have reached the limit of THEN's action, so convert
    992         the return code to NOMATCH, which will cause normal backtracking to
    993         happen from now on. Otherwise, THEN is passed back to an outer
    994         alternative. This implements Perl's treatment of parenthesized groups,
    995         where a group not containing | does not affect the current alternative,
    996         that is, (X) is NOT the same as (X|(*F)). */
    997 
    998         if (rrc == MATCH_THEN)
    999           {
   1000           next = ecode + GET(ecode,1);
   1001           if (md->start_match_ptr < next &&
   1002               (*ecode == OP_ALT || *next == OP_ALT))
   1003             rrc = MATCH_NOMATCH;
   1004           }
   1005 
   1006         /* Anything other than NOMATCH is passed back. */
   1007 
   1008         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1009         md->capture_last = save_capture_last;
   1010         ecode += GET(ecode, 1);
   1011         md->mark = save_mark;
   1012         if (*ecode != OP_ALT) break;
   1013         }
   1014 
   1015       DPRINTF(("bracket %d failed\n", number));
   1016       md->offset_vector[offset] = save_offset1;
   1017       md->offset_vector[offset+1] = save_offset2;
   1018       md->offset_vector[md->offset_end - number] = save_offset3;
   1019 
   1020       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
   1021 
   1022       RRETURN(rrc);
   1023       }
   1024 
   1025     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
   1026     as a non-capturing bracket. */
   1027 
   1028     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1029     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1030 
   1031     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
   1032 
   1033     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1034     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1035 
   1036     /* Non-capturing or atomic group, except for possessive with unlimited
   1037     repeat and ONCE group with no captures. Loop for all the alternatives.
   1038 
   1039     When we get to the final alternative within the brackets, we used to return
   1040     the result of a recursive call to match() whatever happened so it was
   1041     possible to reduce stack usage by turning this into a tail recursion,
   1042     except in the case of a possibly empty group. However, now that there is
   1043     the possiblity of (*THEN) occurring in the final alternative, this
   1044     optimization is no longer always possible.
   1045 
   1046     We can optimize if we know there are no (*THEN)s in the pattern; at present
   1047     this is the best that can be done.
   1048 
   1049     MATCH_ONCE is returned when the end of an atomic group is successfully
   1050     reached, but subsequent matching fails. It passes back up the tree (causing
   1051     captured values to be reset) until the original atomic group level is
   1052     reached. This is tested by comparing md->once_target with the start of the
   1053     group. At this point, the return is converted into MATCH_NOMATCH so that
   1054     previous backup points can be taken. */
   1055 
   1056     case OP_ONCE:
   1057     case OP_BRA:
   1058     case OP_SBRA:
   1059     DPRINTF(("start non-capturing bracket\n"));
   1060 
   1061     for (;;)
   1062       {
   1063       if (op >= OP_SBRA || op == OP_ONCE)
   1064         md->match_function_type = MATCH_CBEGROUP;
   1065 
   1066       /* If this is not a possibly empty group, and there are no (*THEN)s in
   1067       the pattern, and this is the final alternative, optimize as described
   1068       above. */
   1069 
   1070       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
   1071         {
   1072         ecode += PRIV(OP_lengths)[*ecode];
   1073         goto TAIL_RECURSE;
   1074         }
   1075 
   1076       /* In all other cases, we have to make another call to match(). */
   1077 
   1078       save_mark = md->mark;
   1079       save_capture_last = md->capture_last;
   1080       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
   1081         RM2);
   1082 
   1083       /* See comment in the code for capturing groups above about handling
   1084       THEN. */
   1085 
   1086       if (rrc == MATCH_THEN)
   1087         {
   1088         next = ecode + GET(ecode,1);
   1089         if (md->start_match_ptr < next &&
   1090             (*ecode == OP_ALT || *next == OP_ALT))
   1091           rrc = MATCH_NOMATCH;
   1092         }
   1093 
   1094       if (rrc != MATCH_NOMATCH)
   1095         {
   1096         if (rrc == MATCH_ONCE)
   1097           {
   1098           const pcre_uchar *scode = ecode;
   1099           if (*scode != OP_ONCE)           /* If not at start, find it */
   1100             {
   1101             while (*scode == OP_ALT) scode += GET(scode, 1);
   1102             scode -= GET(scode, 1);
   1103             }
   1104           if (md->once_target == scode) rrc = MATCH_NOMATCH;
   1105           }
   1106         RRETURN(rrc);
   1107         }
   1108       ecode += GET(ecode, 1);
   1109       md->mark = save_mark;
   1110       if (*ecode != OP_ALT) break;
   1111       md->capture_last = save_capture_last;
   1112       }
   1113 
   1114     RRETURN(MATCH_NOMATCH);
   1115 
   1116     /* Handle possessive capturing brackets with an unlimited repeat. We come
   1117     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
   1118     handled similarly to the normal case above. However, the matching is
   1119     different. The end of these brackets will always be OP_KETRPOS, which
   1120     returns MATCH_KETRPOS without going further in the pattern. By this means
   1121     we can handle the group by iteration rather than recursion, thereby
   1122     reducing the amount of stack needed. */
   1123 
   1124     case OP_CBRAPOS:
   1125     case OP_SCBRAPOS:
   1126     allow_zero = FALSE;
   1127 
   1128     POSSESSIVE_CAPTURE:
   1129     number = GET2(ecode, 1+LINK_SIZE);
   1130     offset = number << 1;
   1131 
   1132 #ifdef PCRE_DEBUG
   1133     printf("start possessive bracket %d\n", number);
   1134     printf("subject=");
   1135     pchars(eptr, 16, TRUE, md);
   1136     printf("\n");
   1137 #endif
   1138 
   1139     if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
   1140 
   1141     matched_once = FALSE;
   1142     code_offset = (int)(ecode - md->start_code);
   1143 
   1144     save_offset1 = md->offset_vector[offset];
   1145     save_offset2 = md->offset_vector[offset+1];
   1146     save_offset3 = md->offset_vector[md->offset_end - number];
   1147     save_capture_last = md->capture_last;
   1148 
   1149     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
   1150 
   1151     /* Each time round the loop, save the current subject position for use
   1152     when the group matches. For MATCH_MATCH, the group has matched, so we
   1153     restart it with a new subject starting position, remembering that we had
   1154     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
   1155     usual. If we haven't matched any alternatives in any iteration, check to
   1156     see if a previous iteration matched. If so, the group has matched;
   1157     continue from afterwards. Otherwise it has failed; restore the previous
   1158     capture values before returning NOMATCH. */
   1159 
   1160     for (;;)
   1161       {
   1162       md->offset_vector[md->offset_end - number] =
   1163         (int)(eptr - md->start_subject);
   1164       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
   1165       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
   1166         eptrb, RM63);
   1167       if (rrc == MATCH_KETRPOS)
   1168         {
   1169         offset_top = md->end_offset_top;
   1170         ecode = md->start_code + code_offset;
   1171         save_capture_last = md->capture_last;
   1172         matched_once = TRUE;
   1173         mstart = md->start_match_ptr;    /* In case \K changed it */
   1174         if (eptr == md->end_match_ptr)   /* Matched an empty string */
   1175           {
   1176           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1177           break;
   1178           }
   1179         eptr = md->end_match_ptr;
   1180         continue;
   1181         }
   1182 
   1183       /* See comment in the code for capturing groups above about handling
   1184       THEN. */
   1185 
   1186       if (rrc == MATCH_THEN)
   1187         {
   1188         next = ecode + GET(ecode,1);
   1189         if (md->start_match_ptr < next &&
   1190             (*ecode == OP_ALT || *next == OP_ALT))
   1191           rrc = MATCH_NOMATCH;
   1192         }
   1193 
   1194       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1195       md->capture_last = save_capture_last;
   1196       ecode += GET(ecode, 1);
   1197       if (*ecode != OP_ALT) break;
   1198       }
   1199 
   1200     if (!matched_once)
   1201       {
   1202       md->offset_vector[offset] = save_offset1;
   1203       md->offset_vector[offset+1] = save_offset2;
   1204       md->offset_vector[md->offset_end - number] = save_offset3;
   1205       }
   1206 
   1207     if (allow_zero || matched_once)
   1208       {
   1209       ecode += 1 + LINK_SIZE;
   1210       break;
   1211       }
   1212 
   1213     RRETURN(MATCH_NOMATCH);
   1214 
   1215     /* Non-capturing possessive bracket with unlimited repeat. We come here
   1216     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
   1217     without the capturing complication. It is written out separately for speed
   1218     and cleanliness. */
   1219 
   1220     case OP_BRAPOS:
   1221     case OP_SBRAPOS:
   1222     allow_zero = FALSE;
   1223 
   1224     POSSESSIVE_NON_CAPTURE:
   1225     matched_once = FALSE;
   1226     code_offset = (int)(ecode - md->start_code);
   1227     save_capture_last = md->capture_last;
   1228 
   1229     for (;;)
   1230       {
   1231       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
   1232       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
   1233         eptrb, RM48);
   1234       if (rrc == MATCH_KETRPOS)
   1235         {
   1236         offset_top = md->end_offset_top;
   1237         ecode = md->start_code + code_offset;
   1238         matched_once = TRUE;
   1239         mstart = md->start_match_ptr;   /* In case \K reset it */
   1240         if (eptr == md->end_match_ptr)  /* Matched an empty string */
   1241           {
   1242           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1243           break;
   1244           }
   1245         eptr = md->end_match_ptr;
   1246         continue;
   1247         }
   1248 
   1249       /* See comment in the code for capturing groups above about handling
   1250       THEN. */
   1251 
   1252       if (rrc == MATCH_THEN)
   1253         {
   1254         next = ecode + GET(ecode,1);
   1255         if (md->start_match_ptr < next &&
   1256             (*ecode == OP_ALT || *next == OP_ALT))
   1257           rrc = MATCH_NOMATCH;
   1258         }
   1259 
   1260       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1261       ecode += GET(ecode, 1);
   1262       if (*ecode != OP_ALT) break;
   1263       md->capture_last = save_capture_last;
   1264       }
   1265 
   1266     if (matched_once || allow_zero)
   1267       {
   1268       ecode += 1 + LINK_SIZE;
   1269       break;
   1270       }
   1271     RRETURN(MATCH_NOMATCH);
   1272 
   1273     /* Control never reaches here. */
   1274 
   1275     /* Conditional group: compilation checked that there are no more than two
   1276     branches. If the condition is false, skipping the first branch takes us
   1277     past the end of the item if there is only one branch, but that's exactly
   1278     what we want. */
   1279 
   1280     case OP_COND:
   1281     case OP_SCOND:
   1282 
   1283     /* The variable codelink will be added to ecode when the condition is
   1284     false, to get to the second branch. Setting it to the offset to the ALT
   1285     or KET, then incrementing ecode achieves this effect. We now have ecode
   1286     pointing to the condition or callout. */
   1287 
   1288     codelink = GET(ecode, 1);   /* Offset to the second branch */
   1289     ecode += 1 + LINK_SIZE;     /* From this opcode */
   1290 
   1291     /* Because of the way auto-callout works during compile, a callout item is
   1292     inserted between OP_COND and an assertion condition. */
   1293 
   1294     if (*ecode == OP_CALLOUT)
   1295       {
   1296       if (PUBL(callout) != NULL)
   1297         {
   1298         PUBL(callout_block) cb;
   1299         cb.version          = 2;   /* Version 1 of the callout block */
   1300         cb.callout_number   = ecode[1];
   1301         cb.offset_vector    = md->offset_vector;
   1302 #if defined COMPILE_PCRE8
   1303         cb.subject          = (PCRE_SPTR)md->start_subject;
   1304 #elif defined COMPILE_PCRE16
   1305         cb.subject          = (PCRE_SPTR16)md->start_subject;
   1306 #elif defined COMPILE_PCRE32
   1307         cb.subject          = (PCRE_SPTR32)md->start_subject;
   1308 #endif
   1309         cb.subject_length   = (int)(md->end_subject - md->start_subject);
   1310         cb.start_match      = (int)(mstart - md->start_subject);
   1311         cb.current_position = (int)(eptr - md->start_subject);
   1312         cb.pattern_position = GET(ecode, 2);
   1313         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
   1314         cb.capture_top      = offset_top/2;
   1315         cb.capture_last     = md->capture_last & CAPLMASK;
   1316         /* Internal change requires this for API compatibility. */
   1317         if (cb.capture_last == 0) cb.capture_last = -1;
   1318         cb.callout_data     = md->callout_data;
   1319         cb.mark             = md->nomatch_mark;
   1320         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
   1321         if (rrc < 0) RRETURN(rrc);
   1322         }
   1323 
   1324       /* Advance ecode past the callout, so it now points to the condition. We
   1325       must adjust codelink so that the value of ecode+codelink is unchanged. */
   1326 
   1327       ecode += PRIV(OP_lengths)[OP_CALLOUT];
   1328       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
   1329       }
   1330 
   1331     /* Test the various possible conditions */
   1332 
   1333     condition = FALSE;
   1334     switch(condcode = *ecode)
   1335       {
   1336       case OP_RREF:         /* Numbered group recursion test */
   1337       if (md->recursive != NULL)     /* Not recursing => FALSE */
   1338         {
   1339         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
   1340         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
   1341         }
   1342       break;
   1343 
   1344       case OP_DNRREF:       /* Duplicate named group recursion test */
   1345       if (md->recursive != NULL)
   1346         {
   1347         int count = GET2(ecode, 1 + IMM2_SIZE);
   1348         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
   1349         while (count-- > 0)
   1350           {
   1351           unsigned int recno = GET2(slot, 0);
   1352           condition = recno == md->recursive->group_num;
   1353           if (condition) break;
   1354           slot += md->name_entry_size;
   1355           }
   1356         }
   1357       break;
   1358 
   1359       case OP_CREF:         /* Numbered group used test */
   1360       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
   1361       condition = offset < offset_top && md->offset_vector[offset] >= 0;
   1362       break;
   1363 
   1364       case OP_DNCREF:      /* Duplicate named group used test */
   1365         {
   1366         int count = GET2(ecode, 1 + IMM2_SIZE);
   1367         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
   1368         while (count-- > 0)
   1369           {
   1370           offset = GET2(slot, 0) << 1;
   1371           condition = offset < offset_top && md->offset_vector[offset] >= 0;
   1372           if (condition) break;
   1373           slot += md->name_entry_size;
   1374           }
   1375         }
   1376       break;
   1377 
   1378       case OP_DEF:     /* DEFINE - always false */
   1379       case OP_FAIL:    /* From optimized (?!) condition */
   1380       break;
   1381 
   1382       /* The condition is an assertion. Call match() to evaluate it - setting
   1383       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
   1384       of an assertion. */
   1385 
   1386       default:
   1387       md->match_function_type = MATCH_CONDASSERT;
   1388       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
   1389       if (rrc == MATCH_MATCH)
   1390         {
   1391         if (md->end_offset_top > offset_top)
   1392           offset_top = md->end_offset_top;  /* Captures may have happened */
   1393         condition = TRUE;
   1394 
   1395         /* Advance ecode past the assertion to the start of the first branch,
   1396         but adjust it so that the general choosing code below works. If the
   1397         assertion has a quantifier that allows zero repeats we must skip over
   1398         the BRAZERO. This is a lunatic thing to do, but somebody did! */
   1399 
   1400         if (*ecode == OP_BRAZERO) ecode++;
   1401         ecode += GET(ecode, 1);
   1402         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
   1403         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
   1404         }
   1405 
   1406       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
   1407       assertion; it is therefore treated as NOMATCH. Any other return is an
   1408       error. */
   1409 
   1410       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
   1411         {
   1412         RRETURN(rrc);         /* Need braces because of following else */
   1413         }
   1414       break;
   1415       }
   1416 
   1417     /* Choose branch according to the condition */
   1418 
   1419     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
   1420 
   1421     /* We are now at the branch that is to be obeyed. As there is only one, we
   1422     can use tail recursion to avoid using another stack frame, except when
   1423     there is unlimited repeat of a possibly empty group. In the latter case, a
   1424     recursive call to match() is always required, unless the second alternative
   1425     doesn't exist, in which case we can just plough on. Note that, for
   1426     compatibility with Perl, the | in a conditional group is NOT treated as
   1427     creating two alternatives. If a THEN is encountered in the branch, it
   1428     propagates out to the enclosing alternative (unless nested in a deeper set
   1429     of alternatives, of course). */
   1430 
   1431     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
   1432       {
   1433       if (op != OP_SCOND)
   1434         {
   1435         goto TAIL_RECURSE;
   1436         }
   1437 
   1438       md->match_function_type = MATCH_CBEGROUP;
   1439       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
   1440       RRETURN(rrc);
   1441       }
   1442 
   1443      /* Condition false & no alternative; continue after the group. */
   1444 
   1445     else
   1446       {
   1447       }
   1448     break;
   1449 
   1450 
   1451     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
   1452     to close any currently open capturing brackets. */
   1453 
   1454     case OP_CLOSE:
   1455     number = GET2(ecode, 1);   /* Must be less than 65536 */
   1456     offset = number << 1;
   1457 
   1458 #ifdef PCRE_DEBUG
   1459       printf("end bracket %d at *ACCEPT", number);
   1460       printf("\n");
   1461 #endif
   1462 
   1463     md->capture_last = (md->capture_last & OVFLMASK) | number;
   1464     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
   1465       {
   1466       md->offset_vector[offset] =
   1467         md->offset_vector[md->offset_end - number];
   1468       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
   1469 
   1470       /* If this group is at or above the current highwater mark, ensure that
   1471       any groups between the current high water mark and this group are marked
   1472       unset and then update the high water mark. */
   1473 
   1474       if (offset >= offset_top)
   1475         {
   1476         register int *iptr = md->offset_vector + offset_top;
   1477         register int *iend = md->offset_vector + offset;
   1478         while (iptr < iend) *iptr++ = -1;
   1479         offset_top = offset + 2;
   1480         }
   1481       }
   1482     ecode += 1 + IMM2_SIZE;
   1483     break;
   1484 
   1485 
   1486     /* End of the pattern, either real or forced. */
   1487 
   1488     case OP_END:
   1489     case OP_ACCEPT:
   1490     case OP_ASSERT_ACCEPT:
   1491 
   1492     /* If we have matched an empty string, fail if not in an assertion and not
   1493     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
   1494     is set and we have matched at the start of the subject. In both cases,
   1495     backtracking will then try other alternatives, if any. */
   1496 
   1497     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
   1498          md->recursive == NULL &&
   1499          (md->notempty ||
   1500            (md->notempty_atstart &&
   1501              mstart == md->start_subject + md->start_offset)))
   1502       RRETURN(MATCH_NOMATCH);
   1503 
   1504     /* Otherwise, we have a match. */
   1505 
   1506     md->end_match_ptr = eptr;           /* Record where we ended */
   1507     md->end_offset_top = offset_top;    /* and how many extracts were taken */
   1508     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
   1509 
   1510     /* For some reason, the macros don't work properly if an expression is
   1511     given as the argument to RRETURN when the heap is in use. */
   1512 
   1513     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
   1514     RRETURN(rrc);
   1515 
   1516     /* Assertion brackets. Check the alternative branches in turn - the
   1517     matching won't pass the KET for an assertion. If any one branch matches,
   1518     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
   1519     start of each branch to move the current point backwards, so the code at
   1520     this level is identical to the lookahead case. When the assertion is part
   1521     of a condition, we want to return immediately afterwards. The caller of
   1522     this incarnation of the match() function will have set MATCH_CONDASSERT in
   1523     md->match_function type, and one of these opcodes will be the first opcode
   1524     that is processed. We use a local variable that is preserved over calls to
   1525     match() to remember this case. */
   1526 
   1527     case OP_ASSERT:
   1528     case OP_ASSERTBACK:
   1529     save_mark = md->mark;
   1530     if (md->match_function_type == MATCH_CONDASSERT)
   1531       {
   1532       condassert = TRUE;
   1533       md->match_function_type = 0;
   1534       }
   1535     else condassert = FALSE;
   1536 
   1537     /* Loop for each branch */
   1538 
   1539     do
   1540       {
   1541       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
   1542 
   1543       /* A match means that the assertion is true; break out of the loop
   1544       that matches its alternatives. */
   1545 
   1546       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1547         {
   1548         mstart = md->start_match_ptr;   /* In case \K reset it */
   1549         break;
   1550         }
   1551 
   1552       /* If not matched, restore the previous mark setting. */
   1553 
   1554       md->mark = save_mark;
   1555 
   1556       /* See comment in the code for capturing groups above about handling
   1557       THEN. */
   1558 
   1559       if (rrc == MATCH_THEN)
   1560         {
   1561         next = ecode + GET(ecode,1);
   1562         if (md->start_match_ptr < next &&
   1563             (*ecode == OP_ALT || *next == OP_ALT))
   1564           rrc = MATCH_NOMATCH;
   1565         }
   1566 
   1567       /* Anything other than NOMATCH causes the entire assertion to fail,
   1568       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
   1569       uncaptured THEN, which means they take their normal effect. This
   1570       consistent approach does not always have exactly the same effect as in
   1571       Perl. */
   1572 
   1573       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1574       ecode += GET(ecode, 1);
   1575       }
   1576     while (*ecode == OP_ALT);   /* Continue for next alternative */
   1577 
   1578     /* If we have tried all the alternative branches, the assertion has
   1579     failed. If not, we broke out after a match. */
   1580 
   1581     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
   1582 
   1583     /* If checking an assertion for a condition, return MATCH_MATCH. */
   1584 
   1585     if (condassert) RRETURN(MATCH_MATCH);
   1586 
   1587     /* Continue from after a successful assertion, updating the offsets high
   1588     water mark, since extracts may have been taken during the assertion. */
   1589 
   1590     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1591     ecode += 1 + LINK_SIZE;
   1592     offset_top = md->end_offset_top;
   1593     continue;
   1594 
   1595     /* Negative assertion: all branches must fail to match for the assertion to
   1596     succeed. */
   1597 
   1598     case OP_ASSERT_NOT:
   1599     case OP_ASSERTBACK_NOT:
   1600     save_mark = md->mark;
   1601     if (md->match_function_type == MATCH_CONDASSERT)
   1602       {
   1603       condassert = TRUE;
   1604       md->match_function_type = 0;
   1605       }
   1606     else condassert = FALSE;
   1607 
   1608     /* Loop for each alternative branch. */
   1609 
   1610     do
   1611       {
   1612       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
   1613       md->mark = save_mark;   /* Always restore the mark setting */
   1614 
   1615       switch(rrc)
   1616         {
   1617         case MATCH_MATCH:            /* A successful match means */
   1618         case MATCH_ACCEPT:           /* the assertion has failed. */
   1619         RRETURN(MATCH_NOMATCH);
   1620 
   1621         case MATCH_NOMATCH:          /* Carry on with next branch */
   1622         break;
   1623 
   1624         /* See comment in the code for capturing groups above about handling
   1625         THEN. */
   1626 
   1627         case MATCH_THEN:
   1628         next = ecode + GET(ecode,1);
   1629         if (md->start_match_ptr < next &&
   1630             (*ecode == OP_ALT || *next == OP_ALT))
   1631           {
   1632           rrc = MATCH_NOMATCH;
   1633           break;
   1634           }
   1635         /* Otherwise fall through. */
   1636 
   1637         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
   1638         assertion to fail to match, without considering any more alternatives.
   1639         Failing to match means the assertion is true. This is a consistent
   1640         approach, but does not always have the same effect as in Perl. */
   1641 
   1642         case MATCH_COMMIT:
   1643         case MATCH_SKIP:
   1644         case MATCH_SKIP_ARG:
   1645         case MATCH_PRUNE:
   1646         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1647         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
   1648 
   1649         /* Anything else is an error */
   1650 
   1651         default:
   1652         RRETURN(rrc);
   1653         }
   1654 
   1655       /* Continue with next branch */
   1656 
   1657       ecode += GET(ecode,1);
   1658       }
   1659     while (*ecode == OP_ALT);
   1660 
   1661     /* All branches in the assertion failed to match. */
   1662 
   1663     NEG_ASSERT_TRUE:
   1664     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
   1665     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
   1666     continue;
   1667 
   1668     /* Move the subject pointer back. This occurs only at the start of
   1669     each branch of a lookbehind assertion. If we are too close to the start to
   1670     move back, this match function fails. When working with UTF-8 we move
   1671     back a number of characters, not bytes. */
   1672 
   1673     case OP_REVERSE:
   1674 #ifdef SUPPORT_UTF
   1675     if (utf)
   1676       {
   1677       i = GET(ecode, 1);
   1678       while (i-- > 0)
   1679         {
   1680         eptr--;
   1681         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
   1682         BACKCHAR(eptr);
   1683         }
   1684       }
   1685     else
   1686 #endif
   1687 
   1688     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
   1689 
   1690       {
   1691       eptr -= GET(ecode, 1);
   1692       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
   1693       }
   1694 
   1695     /* Save the earliest consulted character, then skip to next op code */
   1696 
   1697     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
   1698     ecode += 1 + LINK_SIZE;
   1699     break;
   1700 
   1701     /* The callout item calls an external function, if one is provided, passing
   1702     details of the match so far. This is mainly for debugging, though the
   1703     function is able to force a failure. */
   1704 
   1705     case OP_CALLOUT:
   1706     if (PUBL(callout) != NULL)
   1707       {
   1708       PUBL(callout_block) cb;
   1709       cb.version          = 2;   /* Version 1 of the callout block */
   1710       cb.callout_number   = ecode[1];
   1711       cb.offset_vector    = md->offset_vector;
   1712 #if defined COMPILE_PCRE8
   1713       cb.subject          = (PCRE_SPTR)md->start_subject;
   1714 #elif defined COMPILE_PCRE16
   1715       cb.subject          = (PCRE_SPTR16)md->start_subject;
   1716 #elif defined COMPILE_PCRE32
   1717       cb.subject          = (PCRE_SPTR32)md->start_subject;
   1718 #endif
   1719       cb.subject_length   = (int)(md->end_subject - md->start_subject);
   1720       cb.start_match      = (int)(mstart - md->start_subject);
   1721       cb.current_position = (int)(eptr - md->start_subject);
   1722       cb.pattern_position = GET(ecode, 2);
   1723       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
   1724       cb.capture_top      = offset_top/2;
   1725       cb.capture_last     = md->capture_last & CAPLMASK;
   1726       /* Internal change requires this for API compatibility. */
   1727       if (cb.capture_last == 0) cb.capture_last = -1;
   1728       cb.callout_data     = md->callout_data;
   1729       cb.mark             = md->nomatch_mark;
   1730       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
   1731       if (rrc < 0) RRETURN(rrc);
   1732       }
   1733     ecode += 2 + 2*LINK_SIZE;
   1734     break;
   1735 
   1736     /* Recursion either matches the current regex, or some subexpression. The
   1737     offset data is the offset to the starting bracket from the start of the
   1738     whole pattern. (This is so that it works from duplicated subpatterns.)
   1739 
   1740     The state of the capturing groups is preserved over recursion, and
   1741     re-instated afterwards. We don't know how many are started and not yet
   1742     finished (offset_top records the completed total) so we just have to save
   1743     all the potential data. There may be up to 65535 such values, which is too
   1744     large to put on the stack, but using malloc for small numbers seems
   1745     expensive. As a compromise, the stack is used when there are no more than
   1746     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
   1747 
   1748     There are also other values that have to be saved. We use a chained
   1749     sequence of blocks that actually live on the stack. Thanks to Robin Houston
   1750     for the original version of this logic. It has, however, been hacked around
   1751     a lot, so he is not to blame for the current way it works. */
   1752 
   1753     case OP_RECURSE:
   1754       {
   1755       recursion_info *ri;
   1756       unsigned int recno;
   1757 
   1758       callpat = md->start_code + GET(ecode, 1);
   1759       recno = (callpat == md->start_code)? 0 :
   1760         GET2(callpat, 1 + LINK_SIZE);
   1761 
   1762       /* Check for repeating a recursion without advancing the subject pointer.
   1763       This should catch convoluted mutual recursions. (Some simple cases are
   1764       caught at compile time.) */
   1765 
   1766       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
   1767         if (recno == ri->group_num && eptr == ri->subject_position)
   1768           RRETURN(PCRE_ERROR_RECURSELOOP);
   1769 
   1770       /* Add to "recursing stack" */
   1771 
   1772       new_recursive.group_num = recno;
   1773       new_recursive.saved_capture_last = md->capture_last;
   1774       new_recursive.subject_position = eptr;
   1775       new_recursive.prevrec = md->recursive;
   1776       md->recursive = &new_recursive;
   1777 
   1778       /* Where to continue from afterwards */
   1779 
   1780       ecode += 1 + LINK_SIZE;
   1781 
   1782       /* Now save the offset data */
   1783 
   1784       new_recursive.saved_max = md->offset_end;
   1785       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
   1786         new_recursive.offset_save = stacksave;
   1787       else
   1788         {
   1789         new_recursive.offset_save =
   1790           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
   1791         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
   1792         }
   1793       memcpy(new_recursive.offset_save, md->offset_vector,
   1794             new_recursive.saved_max * sizeof(int));
   1795 
   1796       /* OK, now we can do the recursion. After processing each alternative,
   1797       restore the offset data and the last captured value. If there were nested
   1798       recursions, md->recursive might be changed, so reset it before looping.
   1799       */
   1800 
   1801       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
   1802       cbegroup = (*callpat >= OP_SBRA);
   1803       do
   1804         {
   1805         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
   1806         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
   1807           md, eptrb, RM6);
   1808         memcpy(md->offset_vector, new_recursive.offset_save,
   1809             new_recursive.saved_max * sizeof(int));
   1810         md->capture_last = new_recursive.saved_capture_last;
   1811         md->recursive = new_recursive.prevrec;
   1812         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1813           {
   1814           DPRINTF(("Recursion matched\n"));
   1815           if (new_recursive.offset_save != stacksave)
   1816             (PUBL(free))(new_recursive.offset_save);
   1817 
   1818           /* Set where we got to in the subject, and reset the start in case
   1819           it was changed by \K. This *is* propagated back out of a recursion,
   1820           for Perl compatibility. */
   1821 
   1822           eptr = md->end_match_ptr;
   1823           mstart = md->start_match_ptr;
   1824           goto RECURSION_MATCHED;        /* Exit loop; end processing */
   1825           }
   1826 
   1827         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
   1828         recursion; they cause a NOMATCH for the entire recursion. These codes
   1829         are defined in a range that can be tested for. */
   1830 
   1831         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
   1832           {
   1833           if (new_recursive.offset_save != stacksave)
   1834             (PUBL(free))(new_recursive.offset_save);
   1835           RRETURN(MATCH_NOMATCH);
   1836           }
   1837 
   1838         /* Any return code other than NOMATCH is an error. */
   1839 
   1840         if (rrc != MATCH_NOMATCH)
   1841           {
   1842           DPRINTF(("Recursion gave error %d\n", rrc));
   1843           if (new_recursive.offset_save != stacksave)
   1844             (PUBL(free))(new_recursive.offset_save);
   1845           RRETURN(rrc);
   1846           }
   1847 
   1848         md->recursive = &new_recursive;
   1849         callpat += GET(callpat, 1);
   1850         }
   1851       while (*callpat == OP_ALT);
   1852 
   1853       DPRINTF(("Recursion didn't match\n"));
   1854       md->recursive = new_recursive.prevrec;
   1855       if (new_recursive.offset_save != stacksave)
   1856         (PUBL(free))(new_recursive.offset_save);
   1857       RRETURN(MATCH_NOMATCH);
   1858       }
   1859 
   1860     RECURSION_MATCHED:
   1861     break;
   1862 
   1863     /* An alternation is the end of a branch; scan along to find the end of the
   1864     bracketed group and go to there. */
   1865 
   1866     case OP_ALT:
   1867     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1868     break;
   1869 
   1870     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
   1871     indicating that it may occur zero times. It may repeat infinitely, or not
   1872     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
   1873     with fixed upper repeat limits are compiled as a number of copies, with the
   1874     optional ones preceded by BRAZERO or BRAMINZERO. */
   1875 
   1876     case OP_BRAZERO:
   1877     next = ecode + 1;
   1878     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
   1879     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1880     do next += GET(next, 1); while (*next == OP_ALT);
   1881     ecode = next + 1 + LINK_SIZE;
   1882     break;
   1883 
   1884     case OP_BRAMINZERO:
   1885     next = ecode + 1;
   1886     do next += GET(next, 1); while (*next == OP_ALT);
   1887     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
   1888     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1889     ecode++;
   1890     break;
   1891 
   1892     case OP_SKIPZERO:
   1893     next = ecode+1;
   1894     do next += GET(next,1); while (*next == OP_ALT);
   1895     ecode = next + 1 + LINK_SIZE;
   1896     break;
   1897 
   1898     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
   1899     here; just jump to the group, with allow_zero set TRUE. */
   1900 
   1901     case OP_BRAPOSZERO:
   1902     op = *(++ecode);
   1903     allow_zero = TRUE;
   1904     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
   1905       goto POSSESSIVE_NON_CAPTURE;
   1906 
   1907     /* End of a group, repeated or non-repeating. */
   1908 
   1909     case OP_KET:
   1910     case OP_KETRMIN:
   1911     case OP_KETRMAX:
   1912     case OP_KETRPOS:
   1913     prev = ecode - GET(ecode, 1);
   1914 
   1915     /* If this was a group that remembered the subject start, in order to break
   1916     infinite repeats of empty string matches, retrieve the subject start from
   1917     the chain. Otherwise, set it NULL. */
   1918 
   1919     if (*prev >= OP_SBRA || *prev == OP_ONCE)
   1920       {
   1921       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
   1922       eptrb = eptrb->epb_prev;              /* Backup to previous group */
   1923       }
   1924     else saved_eptr = NULL;
   1925 
   1926     /* If we are at the end of an assertion group or a non-capturing atomic
   1927     group, stop matching and return MATCH_MATCH, but record the current high
   1928     water mark for use by positive assertions. We also need to record the match
   1929     start in case it was changed by \K. */
   1930 
   1931     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
   1932          *prev == OP_ONCE_NC)
   1933       {
   1934       md->end_match_ptr = eptr;      /* For ONCE_NC */
   1935       md->end_offset_top = offset_top;
   1936       md->start_match_ptr = mstart;
   1937       RRETURN(MATCH_MATCH);         /* Sets md->mark */
   1938       }
   1939 
   1940     /* For capturing groups we have to check the group number back at the start
   1941     and if necessary complete handling an extraction by setting the offsets and
   1942     bumping the high water mark. Whole-pattern recursion is coded as a recurse
   1943     into group 0, so it won't be picked up here. Instead, we catch it when the
   1944     OP_END is reached. Other recursion is handled here. We just have to record
   1945     the current subject position and start match pointer and give a MATCH
   1946     return. */
   1947 
   1948     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
   1949         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
   1950       {
   1951       number = GET2(prev, 1+LINK_SIZE);
   1952       offset = number << 1;
   1953 
   1954 #ifdef PCRE_DEBUG
   1955       printf("end bracket %d", number);
   1956       printf("\n");
   1957 #endif
   1958 
   1959       /* Handle a recursively called group. */
   1960 
   1961       if (md->recursive != NULL && md->recursive->group_num == number)
   1962         {
   1963         md->end_match_ptr = eptr;
   1964         md->start_match_ptr = mstart;
   1965         RRETURN(MATCH_MATCH);
   1966         }
   1967 
   1968       /* Deal with capturing */
   1969 
   1970       md->capture_last = (md->capture_last & OVFLMASK) | number;
   1971       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
   1972         {
   1973         /* If offset is greater than offset_top, it means that we are
   1974         "skipping" a capturing group, and that group's offsets must be marked
   1975         unset. In earlier versions of PCRE, all the offsets were unset at the
   1976         start of matching, but this doesn't work because atomic groups and
   1977         assertions can cause a value to be set that should later be unset.
   1978         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
   1979         part of the atomic group, but this is not on the final matching path,
   1980         so must be unset when 2 is set. (If there is no group 2, there is no
   1981         problem, because offset_top will then be 2, indicating no capture.) */
   1982 
   1983         if (offset > offset_top)
   1984           {
   1985           register int *iptr = md->offset_vector + offset_top;
   1986           register int *iend = md->offset_vector + offset;
   1987           while (iptr < iend) *iptr++ = -1;
   1988           }
   1989 
   1990         /* Now make the extraction */
   1991 
   1992         md->offset_vector[offset] =
   1993           md->offset_vector[md->offset_end - number];
   1994         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
   1995         if (offset_top <= offset) offset_top = offset + 2;
   1996         }
   1997       }
   1998 
   1999     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
   2000     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
   2001     at a time from the outer level, thus saving stack. This must precede the
   2002     empty string test - in this case that test is done at the outer level. */
   2003 
   2004     if (*ecode == OP_KETRPOS)
   2005       {
   2006       md->start_match_ptr = mstart;    /* In case \K reset it */
   2007       md->end_match_ptr = eptr;
   2008       md->end_offset_top = offset_top;
   2009       RRETURN(MATCH_KETRPOS);
   2010       }
   2011 
   2012     /* For an ordinary non-repeating ket, just continue at this level. This
   2013     also happens for a repeating ket if no characters were matched in the
   2014     group. This is the forcible breaking of infinite loops as implemented in
   2015     Perl 5.005. For a non-repeating atomic group that includes captures,
   2016     establish a backup point by processing the rest of the pattern at a lower
   2017     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
   2018     original OP_ONCE level, thereby bypassing intermediate backup points, but
   2019     resetting any captures that happened along the way. */
   2020 
   2021     if (*ecode == OP_KET || eptr == saved_eptr)
   2022       {
   2023       if (*prev == OP_ONCE)
   2024         {
   2025         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
   2026         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2027         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
   2028         RRETURN(MATCH_ONCE);
   2029         }
   2030       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
   2031       break;
   2032       }
   2033 
   2034     /* The normal repeating kets try the rest of the pattern or restart from
   2035     the preceding bracket, in the appropriate order. In the second case, we can
   2036     use tail recursion to avoid using another stack frame, unless we have an
   2037     an atomic group or an unlimited repeat of a group that can match an empty
   2038     string. */
   2039 
   2040     if (*ecode == OP_KETRMIN)
   2041       {
   2042       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
   2043       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2044       if (*prev == OP_ONCE)
   2045         {
   2046         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
   2047         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2048         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
   2049         RRETURN(MATCH_ONCE);
   2050         }
   2051       if (*prev >= OP_SBRA)    /* Could match an empty string */
   2052         {
   2053         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
   2054         RRETURN(rrc);
   2055         }
   2056       ecode = prev;
   2057       goto TAIL_RECURSE;
   2058       }
   2059     else  /* OP_KETRMAX */
   2060       {
   2061       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
   2062       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
   2063       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2064       if (*prev == OP_ONCE)
   2065         {
   2066         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
   2067         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2068         md->once_target = prev;
   2069         RRETURN(MATCH_ONCE);
   2070         }
   2071       ecode += 1 + LINK_SIZE;
   2072       goto TAIL_RECURSE;
   2073       }
   2074     /* Control never gets here */
   2075 
   2076     /* Not multiline mode: start of subject assertion, unless notbol. */
   2077 
   2078     case OP_CIRC:
   2079     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
   2080 
   2081     /* Start of subject assertion */
   2082 
   2083     case OP_SOD:
   2084     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
   2085     ecode++;
   2086     break;
   2087 
   2088     /* Multiline mode: start of subject unless notbol, or after any newline. */
   2089 
   2090     case OP_CIRCM:
   2091     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
   2092     if (eptr != md->start_subject &&
   2093         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
   2094       RRETURN(MATCH_NOMATCH);
   2095     ecode++;
   2096     break;
   2097 
   2098     /* Start of match assertion */
   2099 
   2100     case OP_SOM:
   2101     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
   2102     ecode++;
   2103     break;
   2104 
   2105     /* Reset the start of match point */
   2106 
   2107     case OP_SET_SOM:
   2108     mstart = eptr;
   2109     ecode++;
   2110     break;
   2111 
   2112     /* Multiline mode: assert before any newline, or before end of subject
   2113     unless noteol is set. */
   2114 
   2115     case OP_DOLLM:
   2116     if (eptr < md->end_subject)
   2117       {
   2118       if (!IS_NEWLINE(eptr))
   2119         {
   2120         if (md->partial != 0 &&
   2121             eptr + 1 >= md->end_subject &&
   2122             NLBLOCK->nltype == NLTYPE_FIXED &&
   2123             NLBLOCK->nllen == 2 &&
   2124             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2125           {
   2126           md->hitend = TRUE;
   2127           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2128           }
   2129         RRETURN(MATCH_NOMATCH);
   2130         }
   2131       }
   2132     else
   2133       {
   2134       if (md->noteol) RRETURN(MATCH_NOMATCH);
   2135       SCHECK_PARTIAL();
   2136       }
   2137     ecode++;
   2138     break;
   2139 
   2140     /* Not multiline mode: assert before a terminating newline or before end of
   2141     subject unless noteol is set. */
   2142 
   2143     case OP_DOLL:
   2144     if (md->noteol) RRETURN(MATCH_NOMATCH);
   2145     if (!md->endonly) goto ASSERT_NL_OR_EOS;
   2146 
   2147     /* ... else fall through for endonly */
   2148 
   2149     /* End of subject assertion (\z) */
   2150 
   2151     case OP_EOD:
   2152     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
   2153     SCHECK_PARTIAL();
   2154     ecode++;
   2155     break;
   2156 
   2157     /* End of subject or ending \n assertion (\Z) */
   2158 
   2159     case OP_EODN:
   2160     ASSERT_NL_OR_EOS:
   2161     if (eptr < md->end_subject &&
   2162         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
   2163       {
   2164       if (md->partial != 0 &&
   2165           eptr + 1 >= md->end_subject &&
   2166           NLBLOCK->nltype == NLTYPE_FIXED &&
   2167           NLBLOCK->nllen == 2 &&
   2168           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2169         {
   2170         md->hitend = TRUE;
   2171         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2172         }
   2173       RRETURN(MATCH_NOMATCH);
   2174       }
   2175 
   2176     /* Either at end of string or \n before end. */
   2177 
   2178     SCHECK_PARTIAL();
   2179     ecode++;
   2180     break;
   2181 
   2182     /* Word boundary assertions */
   2183 
   2184     case OP_NOT_WORD_BOUNDARY:
   2185     case OP_WORD_BOUNDARY:
   2186       {
   2187 
   2188       /* Find out if the previous and current characters are "word" characters.
   2189       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
   2190       be "non-word" characters. Remember the earliest consulted character for
   2191       partial matching. */
   2192 
   2193 #ifdef SUPPORT_UTF
   2194       if (utf)
   2195         {
   2196         /* Get status of previous character */
   2197 
   2198         if (eptr == md->start_subject) prev_is_word = FALSE; else
   2199           {
   2200           PCRE_PUCHAR lastptr = eptr - 1;
   2201           BACKCHAR(lastptr);
   2202           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
   2203           GETCHAR(c, lastptr);
   2204 #ifdef SUPPORT_UCP
   2205           if (md->use_ucp)
   2206             {
   2207             if (c == '_') prev_is_word = TRUE; else
   2208               {
   2209               int cat = UCD_CATEGORY(c);
   2210               prev_is_word = (cat == ucp_L || cat == ucp_N);
   2211               }
   2212             }
   2213           else
   2214 #endif
   2215           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
   2216           }
   2217 
   2218         /* Get status of next character */
   2219 
   2220         if (eptr >= md->end_subject)
   2221           {
   2222           SCHECK_PARTIAL();
   2223           cur_is_word = FALSE;
   2224           }
   2225         else
   2226           {
   2227           GETCHAR(c, eptr);
   2228 #ifdef SUPPORT_UCP
   2229           if (md->use_ucp)
   2230             {
   2231             if (c == '_') cur_is_word = TRUE; else
   2232               {
   2233               int cat = UCD_CATEGORY(c);
   2234               cur_is_word = (cat == ucp_L || cat == ucp_N);
   2235               }
   2236             }
   2237           else
   2238 #endif
   2239           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
   2240           }
   2241         }
   2242       else
   2243 #endif
   2244 
   2245       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
   2246       consistency with the behaviour of \w we do use it in this case. */
   2247 
   2248         {
   2249         /* Get status of previous character */
   2250 
   2251         if (eptr == md->start_subject) prev_is_word = FALSE; else
   2252           {
   2253           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
   2254 #ifdef SUPPORT_UCP
   2255           if (md->use_ucp)
   2256             {
   2257             c = eptr[-1];
   2258             if (c == '_') prev_is_word = TRUE; else
   2259               {
   2260               int cat = UCD_CATEGORY(c);
   2261               prev_is_word = (cat == ucp_L || cat == ucp_N);
   2262               }
   2263             }
   2264           else
   2265 #endif
   2266           prev_is_word = MAX_255(eptr[-1])
   2267             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
   2268           }
   2269 
   2270         /* Get status of next character */
   2271 
   2272         if (eptr >= md->end_subject)
   2273           {
   2274           SCHECK_PARTIAL();
   2275           cur_is_word = FALSE;
   2276           }
   2277         else
   2278 #ifdef SUPPORT_UCP
   2279         if (md->use_ucp)
   2280           {
   2281           c = *eptr;
   2282           if (c == '_') cur_is_word = TRUE; else
   2283             {
   2284             int cat = UCD_CATEGORY(c);
   2285             cur_is_word = (cat == ucp_L || cat == ucp_N);
   2286             }
   2287           }
   2288         else
   2289 #endif
   2290         cur_is_word = MAX_255(*eptr)
   2291           && ((md->ctypes[*eptr] & ctype_word) != 0);
   2292         }
   2293 
   2294       /* Now see if the situation is what we want */
   2295 
   2296       if ((*ecode++ == OP_WORD_BOUNDARY)?
   2297            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
   2298         RRETURN(MATCH_NOMATCH);
   2299       }
   2300     break;
   2301 
   2302     /* Match any single character type except newline; have to take care with
   2303     CRLF newlines and partial matching. */
   2304 
   2305     case OP_ANY:
   2306     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   2307     if (md->partial != 0 &&
   2308         eptr + 1 >= md->end_subject &&
   2309         NLBLOCK->nltype == NLTYPE_FIXED &&
   2310         NLBLOCK->nllen == 2 &&
   2311         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2312       {
   2313       md->hitend = TRUE;
   2314       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2315       }
   2316 
   2317     /* Fall through */
   2318 
   2319     /* Match any single character whatsoever. */
   2320 
   2321     case OP_ALLANY:
   2322     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
   2323       {                            /* not be updated before SCHECK_PARTIAL. */
   2324       SCHECK_PARTIAL();
   2325       RRETURN(MATCH_NOMATCH);
   2326       }
   2327     eptr++;
   2328 #ifdef SUPPORT_UTF
   2329     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   2330 #endif
   2331     ecode++;
   2332     break;
   2333 
   2334     /* Match a single byte, even in UTF-8 mode. This opcode really does match
   2335     any byte, even newline, independent of the setting of PCRE_DOTALL. */
   2336 
   2337     case OP_ANYBYTE:
   2338     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
   2339       {                            /* not be updated before SCHECK_PARTIAL. */
   2340       SCHECK_PARTIAL();
   2341       RRETURN(MATCH_NOMATCH);
   2342       }
   2343     eptr++;
   2344     ecode++;
   2345     break;
   2346 
   2347     case OP_NOT_DIGIT:
   2348     if (eptr >= md->end_subject)
   2349       {
   2350       SCHECK_PARTIAL();
   2351       RRETURN(MATCH_NOMATCH);
   2352       }
   2353     GETCHARINCTEST(c, eptr);
   2354     if (
   2355 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2356        c < 256 &&
   2357 #endif
   2358        (md->ctypes[c] & ctype_digit) != 0
   2359        )
   2360       RRETURN(MATCH_NOMATCH);
   2361     ecode++;
   2362     break;
   2363 
   2364     case OP_DIGIT:
   2365     if (eptr >= md->end_subject)
   2366       {
   2367       SCHECK_PARTIAL();
   2368       RRETURN(MATCH_NOMATCH);
   2369       }
   2370     GETCHARINCTEST(c, eptr);
   2371     if (
   2372 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2373        c > 255 ||
   2374 #endif
   2375        (md->ctypes[c] & ctype_digit) == 0
   2376        )
   2377       RRETURN(MATCH_NOMATCH);
   2378     ecode++;
   2379     break;
   2380 
   2381     case OP_NOT_WHITESPACE:
   2382     if (eptr >= md->end_subject)
   2383       {
   2384       SCHECK_PARTIAL();
   2385       RRETURN(MATCH_NOMATCH);
   2386       }
   2387     GETCHARINCTEST(c, eptr);
   2388     if (
   2389 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2390        c < 256 &&
   2391 #endif
   2392        (md->ctypes[c] & ctype_space) != 0
   2393        )
   2394       RRETURN(MATCH_NOMATCH);
   2395     ecode++;
   2396     break;
   2397 
   2398     case OP_WHITESPACE:
   2399     if (eptr >= md->end_subject)
   2400       {
   2401       SCHECK_PARTIAL();
   2402       RRETURN(MATCH_NOMATCH);
   2403       }
   2404     GETCHARINCTEST(c, eptr);
   2405     if (
   2406 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2407        c > 255 ||
   2408 #endif
   2409        (md->ctypes[c] & ctype_space) == 0
   2410        )
   2411       RRETURN(MATCH_NOMATCH);
   2412     ecode++;
   2413     break;
   2414 
   2415     case OP_NOT_WORDCHAR:
   2416     if (eptr >= md->end_subject)
   2417       {
   2418       SCHECK_PARTIAL();
   2419       RRETURN(MATCH_NOMATCH);
   2420       }
   2421     GETCHARINCTEST(c, eptr);
   2422     if (
   2423 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2424        c < 256 &&
   2425 #endif
   2426        (md->ctypes[c] & ctype_word) != 0
   2427        )
   2428       RRETURN(MATCH_NOMATCH);
   2429     ecode++;
   2430     break;
   2431 
   2432     case OP_WORDCHAR:
   2433     if (eptr >= md->end_subject)
   2434       {
   2435       SCHECK_PARTIAL();
   2436       RRETURN(MATCH_NOMATCH);
   2437       }
   2438     GETCHARINCTEST(c, eptr);
   2439     if (
   2440 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2441        c > 255 ||
   2442 #endif
   2443        (md->ctypes[c] & ctype_word) == 0
   2444        )
   2445       RRETURN(MATCH_NOMATCH);
   2446     ecode++;
   2447     break;
   2448 
   2449     case OP_ANYNL:
   2450     if (eptr >= md->end_subject)
   2451       {
   2452       SCHECK_PARTIAL();
   2453       RRETURN(MATCH_NOMATCH);
   2454       }
   2455     GETCHARINCTEST(c, eptr);
   2456     switch(c)
   2457       {
   2458       default: RRETURN(MATCH_NOMATCH);
   2459 
   2460       case CHAR_CR:
   2461       if (eptr >= md->end_subject)
   2462         {
   2463         SCHECK_PARTIAL();
   2464         }
   2465       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
   2466       break;
   2467 
   2468       case CHAR_LF:
   2469       break;
   2470 
   2471       case CHAR_VT:
   2472       case CHAR_FF:
   2473       case CHAR_NEL:
   2474 #ifndef EBCDIC
   2475       case 0x2028:
   2476       case 0x2029:
   2477 #endif  /* Not EBCDIC */
   2478       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   2479       break;
   2480       }
   2481     ecode++;
   2482     break;
   2483 
   2484     case OP_NOT_HSPACE:
   2485     if (eptr >= md->end_subject)
   2486       {
   2487       SCHECK_PARTIAL();
   2488       RRETURN(MATCH_NOMATCH);
   2489       }
   2490     GETCHARINCTEST(c, eptr);
   2491     switch(c)
   2492       {
   2493       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
   2494       default: break;
   2495       }
   2496     ecode++;
   2497     break;
   2498 
   2499     case OP_HSPACE:
   2500     if (eptr >= md->end_subject)
   2501       {
   2502       SCHECK_PARTIAL();
   2503       RRETURN(MATCH_NOMATCH);
   2504       }
   2505     GETCHARINCTEST(c, eptr);
   2506     switch(c)
   2507       {
   2508       HSPACE_CASES: break;  /* Byte and multibyte cases */
   2509       default: RRETURN(MATCH_NOMATCH);
   2510       }
   2511     ecode++;
   2512     break;
   2513 
   2514     case OP_NOT_VSPACE:
   2515     if (eptr >= md->end_subject)
   2516       {
   2517       SCHECK_PARTIAL();
   2518       RRETURN(MATCH_NOMATCH);
   2519       }
   2520     GETCHARINCTEST(c, eptr);
   2521     switch(c)
   2522       {
   2523       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   2524       default: break;
   2525       }
   2526     ecode++;
   2527     break;
   2528 
   2529     case OP_VSPACE:
   2530     if (eptr >= md->end_subject)
   2531       {
   2532       SCHECK_PARTIAL();
   2533       RRETURN(MATCH_NOMATCH);
   2534       }
   2535     GETCHARINCTEST(c, eptr);
   2536     switch(c)
   2537       {
   2538       VSPACE_CASES: break;
   2539       default: RRETURN(MATCH_NOMATCH);
   2540       }
   2541     ecode++;
   2542     break;
   2543 
   2544 #ifdef SUPPORT_UCP
   2545     /* Check the next character by Unicode property. We will get here only
   2546     if the support is in the binary; otherwise a compile-time error occurs. */
   2547 
   2548     case OP_PROP:
   2549     case OP_NOTPROP:
   2550     if (eptr >= md->end_subject)
   2551       {
   2552       SCHECK_PARTIAL();
   2553       RRETURN(MATCH_NOMATCH);
   2554       }
   2555     GETCHARINCTEST(c, eptr);
   2556       {
   2557       const pcre_uint32 *cp;
   2558       const ucd_record *prop = GET_UCD(c);
   2559 
   2560       switch(ecode[1])
   2561         {
   2562         case PT_ANY:
   2563         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
   2564         break;
   2565 
   2566         case PT_LAMP:
   2567         if ((prop->chartype == ucp_Lu ||
   2568              prop->chartype == ucp_Ll ||
   2569              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
   2570           RRETURN(MATCH_NOMATCH);
   2571         break;
   2572 
   2573         case PT_GC:
   2574         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
   2575           RRETURN(MATCH_NOMATCH);
   2576         break;
   2577 
   2578         case PT_PC:
   2579         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
   2580           RRETURN(MATCH_NOMATCH);
   2581         break;
   2582 
   2583         case PT_SC:
   2584         if ((ecode[2] != prop->script) == (op == OP_PROP))
   2585           RRETURN(MATCH_NOMATCH);
   2586         break;
   2587 
   2588         /* These are specials */
   2589 
   2590         case PT_ALNUM:
   2591         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   2592              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
   2593           RRETURN(MATCH_NOMATCH);
   2594         break;
   2595 
   2596         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   2597         which means that Perl space and POSIX space are now identical. PCRE
   2598         was changed at release 8.34. */
   2599 
   2600         case PT_SPACE:    /* Perl space */
   2601         case PT_PXSPACE:  /* POSIX space */
   2602         switch(c)
   2603           {
   2604           HSPACE_CASES:
   2605           VSPACE_CASES:
   2606           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
   2607           break;
   2608 
   2609           default:
   2610           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
   2611             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
   2612           break;
   2613           }
   2614         break;
   2615 
   2616         case PT_WORD:
   2617         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   2618              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   2619              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
   2620           RRETURN(MATCH_NOMATCH);
   2621         break;
   2622 
   2623         case PT_CLIST:
   2624         cp = PRIV(ucd_caseless_sets) + ecode[2];
   2625         for (;;)
   2626           {
   2627           if (c < *cp)
   2628             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
   2629           if (c == *cp++)
   2630             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
   2631           }
   2632         break;
   2633 
   2634         case PT_UCNC:
   2635         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   2636              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   2637              c >= 0xe000) == (op == OP_NOTPROP))
   2638           RRETURN(MATCH_NOMATCH);
   2639         break;
   2640 
   2641         /* This should never occur */
   2642 
   2643         default:
   2644         RRETURN(PCRE_ERROR_INTERNAL);
   2645         }
   2646 
   2647       ecode += 3;
   2648       }
   2649     break;
   2650 
   2651     /* Match an extended Unicode sequence. We will get here only if the support
   2652     is in the binary; otherwise a compile-time error occurs. */
   2653 
   2654     case OP_EXTUNI:
   2655     if (eptr >= md->end_subject)
   2656       {
   2657       SCHECK_PARTIAL();
   2658       RRETURN(MATCH_NOMATCH);
   2659       }
   2660     else
   2661       {
   2662       int lgb, rgb;
   2663       GETCHARINCTEST(c, eptr);
   2664       lgb = UCD_GRAPHBREAK(c);
   2665       while (eptr < md->end_subject)
   2666         {
   2667         int len = 1;
   2668         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   2669         rgb = UCD_GRAPHBREAK(c);
   2670         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   2671         lgb = rgb;
   2672         eptr += len;
   2673         }
   2674       }
   2675     CHECK_PARTIAL();
   2676     ecode++;
   2677     break;
   2678 #endif  /* SUPPORT_UCP */
   2679 
   2680 
   2681     /* Match a back reference, possibly repeatedly. Look past the end of the
   2682     item to see if there is repeat information following. The code is similar
   2683     to that for character classes, but repeated for efficiency. Then obey
   2684     similar code to character type repeats - written out again for speed.
   2685     However, if the referenced string is the empty string, always treat
   2686     it as matched, any number of times (otherwise there could be infinite
   2687     loops). If the reference is unset, there are two possibilities:
   2688 
   2689     (a) In the default, Perl-compatible state, set the length negative;
   2690     this ensures that every attempt at a match fails. We can't just fail
   2691     here, because of the possibility of quantifiers with zero minima.
   2692 
   2693     (b) If the JavaScript compatibility flag is set, set the length to zero
   2694     so that the back reference matches an empty string.
   2695 
   2696     Otherwise, set the length to the length of what was matched by the
   2697     referenced subpattern.
   2698 
   2699     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
   2700     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
   2701     and OP_DNREFI are used. In this case we must scan the list of groups to
   2702     which the name refers, and use the first one that is set. */
   2703 
   2704     case OP_DNREF:
   2705     case OP_DNREFI:
   2706     caseless = op == OP_DNREFI;
   2707       {
   2708       int count = GET2(ecode, 1+IMM2_SIZE);
   2709       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
   2710       ecode += 1 + 2*IMM2_SIZE;
   2711 
   2712       /* Setting the default length first and initializing 'offset' avoids
   2713       compiler warnings in the REF_REPEAT code. */
   2714 
   2715       length = (md->jscript_compat)? 0 : -1;
   2716       offset = 0;
   2717 
   2718       while (count-- > 0)
   2719         {
   2720         offset = GET2(slot, 0) << 1;
   2721         if (offset < offset_top && md->offset_vector[offset] >= 0)
   2722           {
   2723           length = md->offset_vector[offset+1] - md->offset_vector[offset];
   2724           break;
   2725           }
   2726         slot += md->name_entry_size;
   2727         }
   2728       }
   2729     goto REF_REPEAT;
   2730 
   2731     case OP_REF:
   2732     case OP_REFI:
   2733     caseless = op == OP_REFI;
   2734     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
   2735     ecode += 1 + IMM2_SIZE;
   2736     if (offset >= offset_top || md->offset_vector[offset] < 0)
   2737       length = (md->jscript_compat)? 0 : -1;
   2738     else
   2739       length = md->offset_vector[offset+1] - md->offset_vector[offset];
   2740 
   2741     /* Set up for repetition, or handle the non-repeated case */
   2742 
   2743     REF_REPEAT:
   2744     switch (*ecode)
   2745       {
   2746       case OP_CRSTAR:
   2747       case OP_CRMINSTAR:
   2748       case OP_CRPLUS:
   2749       case OP_CRMINPLUS:
   2750       case OP_CRQUERY:
   2751       case OP_CRMINQUERY:
   2752       c = *ecode++ - OP_CRSTAR;
   2753       minimize = (c & 1) != 0;
   2754       min = rep_min[c];                 /* Pick up values from tables; */
   2755       max = rep_max[c];                 /* zero for max => infinity */
   2756       if (max == 0) max = INT_MAX;
   2757       break;
   2758 
   2759       case OP_CRRANGE:
   2760       case OP_CRMINRANGE:
   2761       minimize = (*ecode == OP_CRMINRANGE);
   2762       min = GET2(ecode, 1);
   2763       max = GET2(ecode, 1 + IMM2_SIZE);
   2764       if (max == 0) max = INT_MAX;
   2765       ecode += 1 + 2 * IMM2_SIZE;
   2766       break;
   2767 
   2768       default:               /* No repeat follows */
   2769       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
   2770         {
   2771         if (length == -2) eptr = md->end_subject;   /* Partial match */
   2772         CHECK_PARTIAL();
   2773         RRETURN(MATCH_NOMATCH);
   2774         }
   2775       eptr += length;
   2776       continue;              /* With the main loop */
   2777       }
   2778 
   2779     /* Handle repeated back references. If the length of the reference is
   2780     zero, just continue with the main loop. If the length is negative, it
   2781     means the reference is unset in non-Java-compatible mode. If the minimum is
   2782     zero, we can continue at the same level without recursion. For any other
   2783     minimum, carrying on will result in NOMATCH. */
   2784 
   2785     if (length == 0) continue;
   2786     if (length < 0 && min == 0) continue;
   2787 
   2788     /* First, ensure the minimum number of matches are present. We get back
   2789     the length of the reference string explicitly rather than passing the
   2790     address of eptr, so that eptr can be a register variable. */
   2791 
   2792     for (i = 1; i <= min; i++)
   2793       {
   2794       int slength;
   2795       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
   2796         {
   2797         if (slength == -2) eptr = md->end_subject;   /* Partial match */
   2798         CHECK_PARTIAL();
   2799         RRETURN(MATCH_NOMATCH);
   2800         }
   2801       eptr += slength;
   2802       }
   2803 
   2804     /* If min = max, continue at the same level without recursion.
   2805     They are not both allowed to be zero. */
   2806 
   2807     if (min == max) continue;
   2808 
   2809     /* If minimizing, keep trying and advancing the pointer */
   2810 
   2811     if (minimize)
   2812       {
   2813       for (fi = min;; fi++)
   2814         {
   2815         int slength;
   2816         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
   2817         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2818         if (fi >= max) RRETURN(MATCH_NOMATCH);
   2819         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
   2820           {
   2821           if (slength == -2) eptr = md->end_subject;   /* Partial match */
   2822           CHECK_PARTIAL();
   2823           RRETURN(MATCH_NOMATCH);
   2824           }
   2825         eptr += slength;
   2826         }
   2827       /* Control never gets here */
   2828       }
   2829 
   2830     /* If maximizing, find the longest string and work backwards */
   2831 
   2832     else
   2833       {
   2834       pp = eptr;
   2835       for (i = min; i < max; i++)
   2836         {
   2837         int slength;
   2838         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
   2839           {
   2840           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
   2841           the soft partial matching case. */
   2842 
   2843           if (slength == -2 && md->partial != 0 &&
   2844               md->end_subject > md->start_used_ptr)
   2845             {
   2846             md->hitend = TRUE;
   2847             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2848             }
   2849           break;
   2850           }
   2851         eptr += slength;
   2852         }
   2853 
   2854       while (eptr >= pp)
   2855         {
   2856         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
   2857         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2858         eptr -= length;
   2859         }
   2860       RRETURN(MATCH_NOMATCH);
   2861       }
   2862     /* Control never gets here */
   2863 
   2864     /* Match a bit-mapped character class, possibly repeatedly. This op code is
   2865     used when all the characters in the class have values in the range 0-255,
   2866     and either the matching is caseful, or the characters are in the range
   2867     0-127 when UTF-8 processing is enabled. The only difference between
   2868     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
   2869     encountered.
   2870 
   2871     First, look past the end of the item to see if there is repeat information
   2872     following. Then obey similar code to character type repeats - written out
   2873     again for speed. */
   2874 
   2875     case OP_NCLASS:
   2876     case OP_CLASS:
   2877       {
   2878       /* The data variable is saved across frames, so the byte map needs to
   2879       be stored there. */
   2880 #define BYTE_MAP ((pcre_uint8 *)data)
   2881       data = ecode + 1;                /* Save for matching */
   2882       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
   2883 
   2884       switch (*ecode)
   2885         {
   2886         case OP_CRSTAR:
   2887         case OP_CRMINSTAR:
   2888         case OP_CRPLUS:
   2889         case OP_CRMINPLUS:
   2890         case OP_CRQUERY:
   2891         case OP_CRMINQUERY:
   2892         case OP_CRPOSSTAR:
   2893         case OP_CRPOSPLUS:
   2894         case OP_CRPOSQUERY:
   2895         c = *ecode++ - OP_CRSTAR;
   2896         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
   2897         else possessive = TRUE;
   2898         min = rep_min[c];                 /* Pick up values from tables; */
   2899         max = rep_max[c];                 /* zero for max => infinity */
   2900         if (max == 0) max = INT_MAX;
   2901         break;
   2902 
   2903         case OP_CRRANGE:
   2904         case OP_CRMINRANGE:
   2905         case OP_CRPOSRANGE:
   2906         minimize = (*ecode == OP_CRMINRANGE);
   2907         possessive = (*ecode == OP_CRPOSRANGE);
   2908         min = GET2(ecode, 1);
   2909         max = GET2(ecode, 1 + IMM2_SIZE);
   2910         if (max == 0) max = INT_MAX;
   2911         ecode += 1 + 2 * IMM2_SIZE;
   2912         break;
   2913 
   2914         default:               /* No repeat follows */
   2915         min = max = 1;
   2916         break;
   2917         }
   2918 
   2919       /* First, ensure the minimum number of matches are present. */
   2920 
   2921 #ifdef SUPPORT_UTF
   2922       if (utf)
   2923         {
   2924         for (i = 1; i <= min; i++)
   2925           {
   2926           if (eptr >= md->end_subject)
   2927             {
   2928             SCHECK_PARTIAL();
   2929             RRETURN(MATCH_NOMATCH);
   2930             }
   2931           GETCHARINC(c, eptr);
   2932           if (c > 255)
   2933             {
   2934             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   2935             }
   2936           else
   2937             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   2938           }
   2939         }
   2940       else
   2941 #endif
   2942       /* Not UTF mode */
   2943         {
   2944         for (i = 1; i <= min; i++)
   2945           {
   2946           if (eptr >= md->end_subject)
   2947             {
   2948             SCHECK_PARTIAL();
   2949             RRETURN(MATCH_NOMATCH);
   2950             }
   2951           c = *eptr++;
   2952 #ifndef COMPILE_PCRE8
   2953           if (c > 255)
   2954             {
   2955             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   2956             }
   2957           else
   2958 #endif
   2959             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   2960           }
   2961         }
   2962 
   2963       /* If max == min we can continue with the main loop without the
   2964       need to recurse. */
   2965 
   2966       if (min == max) continue;
   2967 
   2968       /* If minimizing, keep testing the rest of the expression and advancing
   2969       the pointer while it matches the class. */
   2970 
   2971       if (minimize)
   2972         {
   2973 #ifdef SUPPORT_UTF
   2974         if (utf)
   2975           {
   2976           for (fi = min;; fi++)
   2977             {
   2978             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
   2979             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2980             if (fi >= max) RRETURN(MATCH_NOMATCH);
   2981             if (eptr >= md->end_subject)
   2982               {
   2983               SCHECK_PARTIAL();
   2984               RRETURN(MATCH_NOMATCH);
   2985               }
   2986             GETCHARINC(c, eptr);
   2987             if (c > 255)
   2988               {
   2989               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   2990               }
   2991             else
   2992               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   2993             }
   2994           }
   2995         else
   2996 #endif
   2997         /* Not UTF mode */
   2998           {
   2999           for (fi = min;; fi++)
   3000             {
   3001             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
   3002             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3003             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3004             if (eptr >= md->end_subject)
   3005               {
   3006               SCHECK_PARTIAL();
   3007               RRETURN(MATCH_NOMATCH);
   3008               }
   3009             c = *eptr++;
   3010 #ifndef COMPILE_PCRE8
   3011             if (c > 255)
   3012               {
   3013               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   3014               }
   3015             else
   3016 #endif
   3017               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   3018             }
   3019           }
   3020         /* Control never gets here */
   3021         }
   3022 
   3023       /* If maximizing, find the longest possible run, then work backwards. */
   3024 
   3025       else
   3026         {
   3027         pp = eptr;
   3028 
   3029 #ifdef SUPPORT_UTF
   3030         if (utf)
   3031           {
   3032           for (i = min; i < max; i++)
   3033             {
   3034             int len = 1;
   3035             if (eptr >= md->end_subject)
   3036               {
   3037               SCHECK_PARTIAL();
   3038               break;
   3039               }
   3040             GETCHARLEN(c, eptr, len);
   3041             if (c > 255)
   3042               {
   3043               if (op == OP_CLASS) break;
   3044               }
   3045             else
   3046               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
   3047             eptr += len;
   3048             }
   3049 
   3050           if (possessive) continue;    /* No backtracking */
   3051 
   3052           for (;;)
   3053             {
   3054             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
   3055             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3056             if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3057             BACKCHAR(eptr);
   3058             }
   3059           }
   3060         else
   3061 #endif
   3062           /* Not UTF mode */
   3063           {
   3064           for (i = min; i < max; i++)
   3065             {
   3066             if (eptr >= md->end_subject)
   3067               {
   3068               SCHECK_PARTIAL();
   3069               break;
   3070               }
   3071             c = *eptr;
   3072 #ifndef COMPILE_PCRE8
   3073             if (c > 255)
   3074               {
   3075               if (op == OP_CLASS) break;
   3076               }
   3077             else
   3078 #endif
   3079               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
   3080             eptr++;
   3081             }
   3082 
   3083           if (possessive) continue;    /* No backtracking */
   3084 
   3085           while (eptr >= pp)
   3086             {
   3087             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
   3088             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3089             eptr--;
   3090             }
   3091           }
   3092 
   3093         RRETURN(MATCH_NOMATCH);
   3094         }
   3095 #undef BYTE_MAP
   3096       }
   3097     /* Control never gets here */
   3098 
   3099 
   3100     /* Match an extended character class. In the 8-bit library, this opcode is
   3101     encountered only when UTF-8 mode mode is supported. In the 16-bit and
   3102     32-bit libraries, codepoints greater than 255 may be encountered even when
   3103     UTF is not supported. */
   3104 
   3105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   3106     case OP_XCLASS:
   3107       {
   3108       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
   3109       ecode += GET(ecode, 1);                      /* Advance past the item */
   3110 
   3111       switch (*ecode)
   3112         {
   3113         case OP_CRSTAR:
   3114         case OP_CRMINSTAR:
   3115         case OP_CRPLUS:
   3116         case OP_CRMINPLUS:
   3117         case OP_CRQUERY:
   3118         case OP_CRMINQUERY:
   3119         case OP_CRPOSSTAR:
   3120         case OP_CRPOSPLUS:
   3121         case OP_CRPOSQUERY:
   3122         c = *ecode++ - OP_CRSTAR;
   3123         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
   3124         else possessive = TRUE;
   3125         min = rep_min[c];                 /* Pick up values from tables; */
   3126         max = rep_max[c];                 /* zero for max => infinity */
   3127         if (max == 0) max = INT_MAX;
   3128         break;
   3129 
   3130         case OP_CRRANGE:
   3131         case OP_CRMINRANGE:
   3132         case OP_CRPOSRANGE:
   3133         minimize = (*ecode == OP_CRMINRANGE);
   3134         possessive = (*ecode == OP_CRPOSRANGE);
   3135         min = GET2(ecode, 1);
   3136         max = GET2(ecode, 1 + IMM2_SIZE);
   3137         if (max == 0) max = INT_MAX;
   3138         ecode += 1 + 2 * IMM2_SIZE;
   3139         break;
   3140 
   3141         default:               /* No repeat follows */
   3142         min = max = 1;
   3143         break;
   3144         }
   3145 
   3146       /* First, ensure the minimum number of matches are present. */
   3147 
   3148       for (i = 1; i <= min; i++)
   3149         {
   3150         if (eptr >= md->end_subject)
   3151           {
   3152           SCHECK_PARTIAL();
   3153           RRETURN(MATCH_NOMATCH);
   3154           }
   3155         GETCHARINCTEST(c, eptr);
   3156         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
   3157         }
   3158 
   3159       /* If max == min we can continue with the main loop without the
   3160       need to recurse. */
   3161 
   3162       if (min == max) continue;
   3163 
   3164       /* If minimizing, keep testing the rest of the expression and advancing
   3165       the pointer while it matches the class. */
   3166 
   3167       if (minimize)
   3168         {
   3169         for (fi = min;; fi++)
   3170           {
   3171           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
   3172           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3173           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3174           if (eptr >= md->end_subject)
   3175             {
   3176             SCHECK_PARTIAL();
   3177             RRETURN(MATCH_NOMATCH);
   3178             }
   3179           GETCHARINCTEST(c, eptr);
   3180           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
   3181           }
   3182         /* Control never gets here */
   3183         }
   3184 
   3185       /* If maximizing, find the longest possible run, then work backwards. */
   3186 
   3187       else
   3188         {
   3189         pp = eptr;
   3190         for (i = min; i < max; i++)
   3191           {
   3192           int len = 1;
   3193           if (eptr >= md->end_subject)
   3194             {
   3195             SCHECK_PARTIAL();
   3196             break;
   3197             }
   3198 #ifdef SUPPORT_UTF
   3199           GETCHARLENTEST(c, eptr, len);
   3200 #else
   3201           c = *eptr;
   3202 #endif
   3203           if (!PRIV(xclass)(c, data, utf)) break;
   3204           eptr += len;
   3205           }
   3206 
   3207         if (possessive) continue;    /* No backtracking */
   3208 
   3209         for(;;)
   3210           {
   3211           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
   3212           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3213           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3214 #ifdef SUPPORT_UTF
   3215           if (utf) BACKCHAR(eptr);
   3216 #endif
   3217           }
   3218         RRETURN(MATCH_NOMATCH);
   3219         }
   3220 
   3221       /* Control never gets here */
   3222       }
   3223 #endif    /* End of XCLASS */
   3224 
   3225     /* Match a single character, casefully */
   3226 
   3227     case OP_CHAR:
   3228 #ifdef SUPPORT_UTF
   3229     if (utf)
   3230       {
   3231       length = 1;
   3232       ecode++;
   3233       GETCHARLEN(fc, ecode, length);
   3234       if (length > md->end_subject - eptr)
   3235         {
   3236         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
   3237         RRETURN(MATCH_NOMATCH);
   3238         }
   3239       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
   3240       }
   3241     else
   3242 #endif
   3243     /* Not UTF mode */
   3244       {
   3245       if (md->end_subject - eptr < 1)
   3246         {
   3247         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
   3248         RRETURN(MATCH_NOMATCH);
   3249         }
   3250       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
   3251       ecode += 2;
   3252       }
   3253     break;
   3254 
   3255     /* Match a single character, caselessly. If we are at the end of the
   3256     subject, give up immediately. */
   3257 
   3258     case OP_CHARI:
   3259     if (eptr >= md->end_subject)
   3260       {
   3261       SCHECK_PARTIAL();
   3262       RRETURN(MATCH_NOMATCH);
   3263       }
   3264 
   3265 #ifdef SUPPORT_UTF
   3266     if (utf)
   3267       {
   3268       length = 1;
   3269       ecode++;
   3270       GETCHARLEN(fc, ecode, length);
   3271 
   3272       /* If the pattern character's value is < 128, we have only one byte, and
   3273       we know that its other case must also be one byte long, so we can use the
   3274       fast lookup table. We know that there is at least one byte left in the
   3275       subject. */
   3276 
   3277       if (fc < 128)
   3278         {
   3279         pcre_uint32 cc = UCHAR21(eptr);
   3280         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
   3281         ecode++;
   3282         eptr++;
   3283         }
   3284 
   3285       /* Otherwise we must pick up the subject character. Note that we cannot
   3286       use the value of "length" to check for sufficient bytes left, because the
   3287       other case of the character may have more or fewer bytes.  */
   3288 
   3289       else
   3290         {
   3291         pcre_uint32 dc;
   3292         GETCHARINC(dc, eptr);
   3293         ecode += length;
   3294 
   3295         /* If we have Unicode property support, we can use it to test the other
   3296         case of the character, if there is one. */
   3297 
   3298         if (fc != dc)
   3299           {
   3300 #ifdef SUPPORT_UCP
   3301           if (dc != UCD_OTHERCASE(fc))
   3302 #endif
   3303             RRETURN(MATCH_NOMATCH);
   3304           }
   3305         }
   3306       }
   3307     else
   3308 #endif   /* SUPPORT_UTF */
   3309 
   3310     /* Not UTF mode */
   3311       {
   3312       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
   3313           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
   3314       eptr++;
   3315       ecode += 2;
   3316       }
   3317     break;
   3318 
   3319     /* Match a single character repeatedly. */
   3320 
   3321     case OP_EXACT:
   3322     case OP_EXACTI:
   3323     min = max = GET2(ecode, 1);
   3324     ecode += 1 + IMM2_SIZE;
   3325     goto REPEATCHAR;
   3326 
   3327     case OP_POSUPTO:
   3328     case OP_POSUPTOI:
   3329     possessive = TRUE;
   3330     /* Fall through */
   3331 
   3332     case OP_UPTO:
   3333     case OP_UPTOI:
   3334     case OP_MINUPTO:
   3335     case OP_MINUPTOI:
   3336     min = 0;
   3337     max = GET2(ecode, 1);
   3338     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
   3339     ecode += 1 + IMM2_SIZE;
   3340     goto REPEATCHAR;
   3341 
   3342     case OP_POSSTAR:
   3343     case OP_POSSTARI:
   3344     possessive = TRUE;
   3345     min = 0;
   3346     max = INT_MAX;
   3347     ecode++;
   3348     goto REPEATCHAR;
   3349 
   3350     case OP_POSPLUS:
   3351     case OP_POSPLUSI:
   3352     possessive = TRUE;
   3353     min = 1;
   3354     max = INT_MAX;
   3355     ecode++;
   3356     goto REPEATCHAR;
   3357 
   3358     case OP_POSQUERY:
   3359     case OP_POSQUERYI:
   3360     possessive = TRUE;
   3361     min = 0;
   3362     max = 1;
   3363     ecode++;
   3364     goto REPEATCHAR;
   3365 
   3366     case OP_STAR:
   3367     case OP_STARI:
   3368     case OP_MINSTAR:
   3369     case OP_MINSTARI:
   3370     case OP_PLUS:
   3371     case OP_PLUSI:
   3372     case OP_MINPLUS:
   3373     case OP_MINPLUSI:
   3374     case OP_QUERY:
   3375     case OP_QUERYI:
   3376     case OP_MINQUERY:
   3377     case OP_MINQUERYI:
   3378     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
   3379     minimize = (c & 1) != 0;
   3380     min = rep_min[c];                 /* Pick up values from tables; */
   3381     max = rep_max[c];                 /* zero for max => infinity */
   3382     if (max == 0) max = INT_MAX;
   3383 
   3384     /* Common code for all repeated single-character matches. We first check
   3385     for the minimum number of characters. If the minimum equals the maximum, we
   3386     are done. Otherwise, if minimizing, check the rest of the pattern for a
   3387     match; if there isn't one, advance up to the maximum, one character at a
   3388     time.
   3389 
   3390     If maximizing, advance up to the maximum number of matching characters,
   3391     until eptr is past the end of the maximum run. If possessive, we are
   3392     then done (no backing up). Otherwise, match at this position; anything
   3393     other than no match is immediately returned. For nomatch, back up one
   3394     character, unless we are matching \R and the last thing matched was
   3395     \r\n, in which case, back up two bytes. When we reach the first optional
   3396     character position, we can save stack by doing a tail recurse.
   3397 
   3398     The various UTF/non-UTF and caseful/caseless cases are handled separately,
   3399     for speed. */
   3400 
   3401     REPEATCHAR:
   3402 #ifdef SUPPORT_UTF
   3403     if (utf)
   3404       {
   3405       length = 1;
   3406       charptr = ecode;
   3407       GETCHARLEN(fc, ecode, length);
   3408       ecode += length;
   3409 
   3410       /* Handle multibyte character matching specially here. There is
   3411       support for caseless matching if UCP support is present. */
   3412 
   3413       if (length > 1)
   3414         {
   3415 #ifdef SUPPORT_UCP
   3416         pcre_uint32 othercase;
   3417         if (op >= OP_STARI &&     /* Caseless */
   3418             (othercase = UCD_OTHERCASE(fc)) != fc)
   3419           oclength = PRIV(ord2utf)(othercase, occhars);
   3420         else oclength = 0;
   3421 #endif  /* SUPPORT_UCP */
   3422 
   3423         for (i = 1; i <= min; i++)
   3424           {
   3425           if (eptr <= md->end_subject - length &&
   3426             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
   3427 #ifdef SUPPORT_UCP
   3428           else if (oclength > 0 &&
   3429                    eptr <= md->end_subject - oclength &&
   3430                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
   3431 #endif  /* SUPPORT_UCP */
   3432           else
   3433             {
   3434             CHECK_PARTIAL();
   3435             RRETURN(MATCH_NOMATCH);
   3436             }
   3437           }
   3438 
   3439         if (min == max) continue;
   3440 
   3441         if (minimize)
   3442           {
   3443           for (fi = min;; fi++)
   3444             {
   3445             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
   3446             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3447             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3448             if (eptr <= md->end_subject - length &&
   3449               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
   3450 #ifdef SUPPORT_UCP
   3451             else if (oclength > 0 &&
   3452                      eptr <= md->end_subject - oclength &&
   3453                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
   3454 #endif  /* SUPPORT_UCP */
   3455             else
   3456               {
   3457               CHECK_PARTIAL();
   3458               RRETURN(MATCH_NOMATCH);
   3459               }
   3460             }
   3461           /* Control never gets here */
   3462           }
   3463 
   3464         else  /* Maximize */
   3465           {
   3466           pp = eptr;
   3467           for (i = min; i < max; i++)
   3468             {
   3469             if (eptr <= md->end_subject - length &&
   3470                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
   3471 #ifdef SUPPORT_UCP
   3472             else if (oclength > 0 &&
   3473                      eptr <= md->end_subject - oclength &&
   3474                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
   3475 #endif  /* SUPPORT_UCP */
   3476             else
   3477               {
   3478               CHECK_PARTIAL();
   3479               break;
   3480               }
   3481             }
   3482 
   3483           if (possessive) continue;    /* No backtracking */
   3484           for(;;)
   3485             {
   3486             if (eptr <= pp) goto TAIL_RECURSE;
   3487             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
   3488             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3489 #ifdef SUPPORT_UCP
   3490             eptr--;
   3491             BACKCHAR(eptr);
   3492 #else   /* without SUPPORT_UCP */
   3493             eptr -= length;
   3494 #endif  /* SUPPORT_UCP */
   3495             }
   3496           }
   3497         /* Control never gets here */
   3498         }
   3499 
   3500       /* If the length of a UTF-8 character is 1, we fall through here, and
   3501       obey the code as for non-UTF-8 characters below, though in this case the
   3502       value of fc will always be < 128. */
   3503       }
   3504     else
   3505 #endif  /* SUPPORT_UTF */
   3506       /* When not in UTF-8 mode, load a single-byte character. */
   3507       fc = *ecode++;
   3508 
   3509     /* The value of fc at this point is always one character, though we may
   3510     or may not be in UTF mode. The code is duplicated for the caseless and
   3511     caseful cases, for speed, since matching characters is likely to be quite
   3512     common. First, ensure the minimum number of matches are present. If min =
   3513     max, continue at the same level without recursing. Otherwise, if
   3514     minimizing, keep trying the rest of the expression and advancing one
   3515     matching character if failing, up to the maximum. Alternatively, if
   3516     maximizing, find the maximum number of characters and work backwards. */
   3517 
   3518     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
   3519       max, (char *)eptr));
   3520 
   3521     if (op >= OP_STARI)  /* Caseless */
   3522       {
   3523 #ifdef COMPILE_PCRE8
   3524       /* fc must be < 128 if UTF is enabled. */
   3525       foc = md->fcc[fc];
   3526 #else
   3527 #ifdef SUPPORT_UTF
   3528 #ifdef SUPPORT_UCP
   3529       if (utf && fc > 127)
   3530         foc = UCD_OTHERCASE(fc);
   3531 #else
   3532       if (utf && fc > 127)
   3533         foc = fc;
   3534 #endif /* SUPPORT_UCP */
   3535       else
   3536 #endif /* SUPPORT_UTF */
   3537         foc = TABLE_GET(fc, md->fcc, fc);
   3538 #endif /* COMPILE_PCRE8 */
   3539 
   3540       for (i = 1; i <= min; i++)
   3541         {
   3542         pcre_uint32 cc;                 /* Faster than pcre_uchar */
   3543         if (eptr >= md->end_subject)
   3544           {
   3545           SCHECK_PARTIAL();
   3546           RRETURN(MATCH_NOMATCH);
   3547           }
   3548         cc = UCHAR21TEST(eptr);
   3549         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
   3550         eptr++;
   3551         }
   3552       if (min == max) continue;
   3553       if (minimize)
   3554         {
   3555         for (fi = min;; fi++)
   3556           {
   3557           pcre_uint32 cc;               /* Faster than pcre_uchar */
   3558           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
   3559           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3560           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3561           if (eptr >= md->end_subject)
   3562             {
   3563             SCHECK_PARTIAL();
   3564             RRETURN(MATCH_NOMATCH);
   3565             }
   3566           cc = UCHAR21TEST(eptr);
   3567           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
   3568           eptr++;
   3569           }
   3570         /* Control never gets here */
   3571         }
   3572       else  /* Maximize */
   3573         {
   3574         pp = eptr;
   3575         for (i = min; i < max; i++)
   3576           {
   3577           pcre_uint32 cc;               /* Faster than pcre_uchar */
   3578           if (eptr >= md->end_subject)
   3579             {
   3580             SCHECK_PARTIAL();
   3581             break;
   3582             }
   3583           cc = UCHAR21TEST(eptr);
   3584           if (fc != cc && foc != cc) break;
   3585           eptr++;
   3586           }
   3587         if (possessive) continue;       /* No backtracking */
   3588         for (;;)
   3589           {
   3590           if (eptr == pp) goto TAIL_RECURSE;
   3591           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
   3592           eptr--;
   3593           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3594           }
   3595         /* Control never gets here */
   3596         }
   3597       }
   3598 
   3599     /* Caseful comparisons (includes all multi-byte characters) */
   3600 
   3601     else
   3602       {
   3603       for (i = 1; i <= min; i++)
   3604         {
   3605         if (eptr >= md->end_subject)
   3606           {
   3607           SCHECK_PARTIAL();
   3608           RRETURN(MATCH_NOMATCH);
   3609           }
   3610         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
   3611         }
   3612 
   3613       if (min == max) continue;
   3614 
   3615       if (minimize)
   3616         {
   3617         for (fi = min;; fi++)
   3618           {
   3619           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
   3620           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3621           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3622           if (eptr >= md->end_subject)
   3623             {
   3624             SCHECK_PARTIAL();
   3625             RRETURN(MATCH_NOMATCH);
   3626             }
   3627           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
   3628           }
   3629         /* Control never gets here */
   3630         }
   3631       else  /* Maximize */
   3632         {
   3633         pp = eptr;
   3634         for (i = min; i < max; i++)
   3635           {
   3636           if (eptr >= md->end_subject)
   3637             {
   3638             SCHECK_PARTIAL();
   3639             break;
   3640             }
   3641           if (fc != UCHAR21TEST(eptr)) break;
   3642           eptr++;
   3643           }
   3644         if (possessive) continue;    /* No backtracking */
   3645         for (;;)
   3646           {
   3647           if (eptr == pp) goto TAIL_RECURSE;
   3648           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
   3649           eptr--;
   3650           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3651           }
   3652         /* Control never gets here */
   3653         }
   3654       }
   3655     /* Control never gets here */
   3656 
   3657     /* Match a negated single one-byte character. The character we are
   3658     checking can be multibyte. */
   3659 
   3660     case OP_NOT:
   3661     case OP_NOTI:
   3662     if (eptr >= md->end_subject)
   3663       {
   3664       SCHECK_PARTIAL();
   3665       RRETURN(MATCH_NOMATCH);
   3666       }
   3667 #ifdef SUPPORT_UTF
   3668     if (utf)
   3669       {
   3670       register pcre_uint32 ch, och;
   3671 
   3672       ecode++;
   3673       GETCHARINC(ch, ecode);
   3674       GETCHARINC(c, eptr);
   3675 
   3676       if (op == OP_NOT)
   3677         {
   3678         if (ch == c) RRETURN(MATCH_NOMATCH);
   3679         }
   3680       else
   3681         {
   3682 #ifdef SUPPORT_UCP
   3683         if (ch > 127)
   3684           och = UCD_OTHERCASE(ch);
   3685 #else
   3686         if (ch > 127)
   3687           och = ch;
   3688 #endif /* SUPPORT_UCP */
   3689         else
   3690           och = TABLE_GET(ch, md->fcc, ch);
   3691         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
   3692         }
   3693       }
   3694     else
   3695 #endif
   3696       {
   3697       register pcre_uint32 ch = ecode[1];
   3698       c = *eptr++;
   3699       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
   3700         RRETURN(MATCH_NOMATCH);
   3701       ecode += 2;
   3702       }
   3703     break;
   3704 
   3705     /* Match a negated single one-byte character repeatedly. This is almost a
   3706     repeat of the code for a repeated single character, but I haven't found a
   3707     nice way of commoning these up that doesn't require a test of the
   3708     positive/negative option for each character match. Maybe that wouldn't add
   3709     very much to the time taken, but character matching *is* what this is all
   3710     about... */
   3711 
   3712     case OP_NOTEXACT:
   3713     case OP_NOTEXACTI:
   3714     min = max = GET2(ecode, 1);
   3715     ecode += 1 + IMM2_SIZE;
   3716     goto REPEATNOTCHAR;
   3717 
   3718     case OP_NOTUPTO:
   3719     case OP_NOTUPTOI:
   3720     case OP_NOTMINUPTO:
   3721     case OP_NOTMINUPTOI:
   3722     min = 0;
   3723     max = GET2(ecode, 1);
   3724     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
   3725     ecode += 1 + IMM2_SIZE;
   3726     goto REPEATNOTCHAR;
   3727 
   3728     case OP_NOTPOSSTAR:
   3729     case OP_NOTPOSSTARI:
   3730     possessive = TRUE;
   3731     min = 0;
   3732     max = INT_MAX;
   3733     ecode++;
   3734     goto REPEATNOTCHAR;
   3735 
   3736     case OP_NOTPOSPLUS:
   3737     case OP_NOTPOSPLUSI:
   3738     possessive = TRUE;
   3739     min = 1;
   3740     max = INT_MAX;
   3741     ecode++;
   3742     goto REPEATNOTCHAR;
   3743 
   3744     case OP_NOTPOSQUERY:
   3745     case OP_NOTPOSQUERYI:
   3746     possessive = TRUE;
   3747     min = 0;
   3748     max = 1;
   3749     ecode++;
   3750     goto REPEATNOTCHAR;
   3751 
   3752     case OP_NOTPOSUPTO:
   3753     case OP_NOTPOSUPTOI:
   3754     possessive = TRUE;
   3755     min = 0;
   3756     max = GET2(ecode, 1);
   3757     ecode += 1 + IMM2_SIZE;
   3758     goto REPEATNOTCHAR;
   3759 
   3760     case OP_NOTSTAR:
   3761     case OP_NOTSTARI:
   3762     case OP_NOTMINSTAR:
   3763     case OP_NOTMINSTARI:
   3764     case OP_NOTPLUS:
   3765     case OP_NOTPLUSI:
   3766     case OP_NOTMINPLUS:
   3767     case OP_NOTMINPLUSI:
   3768     case OP_NOTQUERY:
   3769     case OP_NOTQUERYI:
   3770     case OP_NOTMINQUERY:
   3771     case OP_NOTMINQUERYI:
   3772     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
   3773     minimize = (c & 1) != 0;
   3774     min = rep_min[c];                 /* Pick up values from tables; */
   3775     max = rep_max[c];                 /* zero for max => infinity */
   3776     if (max == 0) max = INT_MAX;
   3777 
   3778     /* Common code for all repeated single-byte matches. */
   3779 
   3780     REPEATNOTCHAR:
   3781     GETCHARINCTEST(fc, ecode);
   3782 
   3783     /* The code is duplicated for the caseless and caseful cases, for speed,
   3784     since matching characters is likely to be quite common. First, ensure the
   3785     minimum number of matches are present. If min = max, continue at the same
   3786     level without recursing. Otherwise, if minimizing, keep trying the rest of
   3787     the expression and advancing one matching character if failing, up to the
   3788     maximum. Alternatively, if maximizing, find the maximum number of
   3789     characters and work backwards. */
   3790 
   3791     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
   3792       max, (char *)eptr));
   3793 
   3794     if (op >= OP_NOTSTARI)     /* Caseless */
   3795       {
   3796 #ifdef SUPPORT_UTF
   3797 #ifdef SUPPORT_UCP
   3798       if (utf && fc > 127)
   3799         foc = UCD_OTHERCASE(fc);
   3800 #else
   3801       if (utf && fc > 127)
   3802         foc = fc;
   3803 #endif /* SUPPORT_UCP */
   3804       else
   3805 #endif /* SUPPORT_UTF */
   3806         foc = TABLE_GET(fc, md->fcc, fc);
   3807 
   3808 #ifdef SUPPORT_UTF
   3809       if (utf)
   3810         {
   3811         register pcre_uint32 d;
   3812         for (i = 1; i <= min; i++)
   3813           {
   3814           if (eptr >= md->end_subject)
   3815             {
   3816             SCHECK_PARTIAL();
   3817             RRETURN(MATCH_NOMATCH);
   3818             }
   3819           GETCHARINC(d, eptr);
   3820           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
   3821           }
   3822         }
   3823       else
   3824 #endif  /* SUPPORT_UTF */
   3825       /* Not UTF mode */
   3826         {
   3827         for (i = 1; i <= min; i++)
   3828           {
   3829           if (eptr >= md->end_subject)
   3830             {
   3831             SCHECK_PARTIAL();
   3832             RRETURN(MATCH_NOMATCH);
   3833             }
   3834           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
   3835           eptr++;
   3836           }
   3837         }
   3838 
   3839       if (min == max) continue;
   3840 
   3841       if (minimize)
   3842         {
   3843 #ifdef SUPPORT_UTF
   3844         if (utf)
   3845           {
   3846           register pcre_uint32 d;
   3847           for (fi = min;; fi++)
   3848             {
   3849             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
   3850             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3851             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3852             if (eptr >= md->end_subject)
   3853               {
   3854               SCHECK_PARTIAL();
   3855               RRETURN(MATCH_NOMATCH);
   3856               }
   3857             GETCHARINC(d, eptr);
   3858             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
   3859             }
   3860           }
   3861         else
   3862 #endif  /*SUPPORT_UTF */
   3863         /* Not UTF mode */
   3864           {
   3865           for (fi = min;; fi++)
   3866             {
   3867             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
   3868             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3869             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3870             if (eptr >= md->end_subject)
   3871               {
   3872               SCHECK_PARTIAL();
   3873               RRETURN(MATCH_NOMATCH);
   3874               }
   3875             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
   3876             eptr++;
   3877             }
   3878           }
   3879         /* Control never gets here */
   3880         }
   3881 
   3882       /* Maximize case */
   3883 
   3884       else
   3885         {
   3886         pp = eptr;
   3887 
   3888 #ifdef SUPPORT_UTF
   3889         if (utf)
   3890           {
   3891           register pcre_uint32 d;
   3892           for (i = min; i < max; i++)
   3893             {
   3894             int len = 1;
   3895             if (eptr >= md->end_subject)
   3896               {
   3897               SCHECK_PARTIAL();
   3898               break;
   3899               }
   3900             GETCHARLEN(d, eptr, len);
   3901             if (fc == d || (unsigned int)foc == d) break;
   3902             eptr += len;
   3903             }
   3904           if (possessive) continue;    /* No backtracking */
   3905           for(;;)
   3906             {
   3907             if (eptr <= pp) goto TAIL_RECURSE;
   3908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
   3909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3910             eptr--;
   3911             BACKCHAR(eptr);
   3912             }
   3913           }
   3914         else
   3915 #endif  /* SUPPORT_UTF */
   3916         /* Not UTF mode */
   3917           {
   3918           for (i = min; i < max; i++)
   3919             {
   3920             if (eptr >= md->end_subject)
   3921               {
   3922               SCHECK_PARTIAL();
   3923               break;
   3924               }
   3925             if (fc == *eptr || foc == *eptr) break;
   3926             eptr++;
   3927             }
   3928           if (possessive) continue;    /* No backtracking */
   3929           for (;;)
   3930             {
   3931             if (eptr == pp) goto TAIL_RECURSE;
   3932             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
   3933             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3934             eptr--;
   3935             }
   3936           }
   3937         /* Control never gets here */
   3938         }
   3939       }
   3940 
   3941     /* Caseful comparisons */
   3942 
   3943     else
   3944       {
   3945 #ifdef SUPPORT_UTF
   3946       if (utf)
   3947         {
   3948         register pcre_uint32 d;
   3949         for (i = 1; i <= min; i++)
   3950           {
   3951           if (eptr >= md->end_subject)
   3952             {
   3953             SCHECK_PARTIAL();
   3954             RRETURN(MATCH_NOMATCH);
   3955             }
   3956           GETCHARINC(d, eptr);
   3957           if (fc == d) RRETURN(MATCH_NOMATCH);
   3958           }
   3959         }
   3960       else
   3961 #endif
   3962       /* Not UTF mode */
   3963         {
   3964         for (i = 1; i <= min; i++)
   3965           {
   3966           if (eptr >= md->end_subject)
   3967             {
   3968             SCHECK_PARTIAL();
   3969             RRETURN(MATCH_NOMATCH);
   3970             }
   3971           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
   3972           }
   3973         }
   3974 
   3975       if (min == max) continue;
   3976 
   3977       if (minimize)
   3978         {
   3979 #ifdef SUPPORT_UTF
   3980         if (utf)
   3981           {
   3982           register pcre_uint32 d;
   3983           for (fi = min;; fi++)
   3984             {
   3985             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
   3986             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3987             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3988             if (eptr >= md->end_subject)
   3989               {
   3990               SCHECK_PARTIAL();
   3991               RRETURN(MATCH_NOMATCH);
   3992               }
   3993             GETCHARINC(d, eptr);
   3994             if (fc == d) RRETURN(MATCH_NOMATCH);
   3995             }
   3996           }
   3997         else
   3998 #endif
   3999         /* Not UTF mode */
   4000           {
   4001           for (fi = min;; fi++)
   4002             {
   4003             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
   4004             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4005             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4006             if (eptr >= md->end_subject)
   4007               {
   4008               SCHECK_PARTIAL();
   4009               RRETURN(MATCH_NOMATCH);
   4010               }
   4011             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
   4012             }
   4013           }
   4014         /* Control never gets here */
   4015         }
   4016 
   4017       /* Maximize case */
   4018 
   4019       else
   4020         {
   4021         pp = eptr;
   4022 
   4023 #ifdef SUPPORT_UTF
   4024         if (utf)
   4025           {
   4026           register pcre_uint32 d;
   4027           for (i = min; i < max; i++)
   4028             {
   4029             int len = 1;
   4030             if (eptr >= md->end_subject)
   4031               {
   4032               SCHECK_PARTIAL();
   4033               break;
   4034               }
   4035             GETCHARLEN(d, eptr, len);
   4036             if (fc == d) break;
   4037             eptr += len;
   4038             }
   4039           if (possessive) continue;    /* No backtracking */
   4040           for(;;)
   4041             {
   4042             if (eptr <= pp) goto TAIL_RECURSE;
   4043             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
   4044             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4045             eptr--;
   4046             BACKCHAR(eptr);
   4047             }
   4048           }
   4049         else
   4050 #endif
   4051         /* Not UTF mode */
   4052           {
   4053           for (i = min; i < max; i++)
   4054             {
   4055             if (eptr >= md->end_subject)
   4056               {
   4057               SCHECK_PARTIAL();
   4058               break;
   4059               }
   4060             if (fc == *eptr) break;
   4061             eptr++;
   4062             }
   4063           if (possessive) continue;    /* No backtracking */
   4064           for (;;)
   4065             {
   4066             if (eptr == pp) goto TAIL_RECURSE;
   4067             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
   4068             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4069             eptr--;
   4070             }
   4071           }
   4072         /* Control never gets here */
   4073         }
   4074       }
   4075     /* Control never gets here */
   4076 
   4077     /* Match a single character type repeatedly; several different opcodes
   4078     share code. This is very similar to the code for single characters, but we
   4079     repeat it in the interests of efficiency. */
   4080 
   4081     case OP_TYPEEXACT:
   4082     min = max = GET2(ecode, 1);
   4083     minimize = TRUE;
   4084     ecode += 1 + IMM2_SIZE;
   4085     goto REPEATTYPE;
   4086 
   4087     case OP_TYPEUPTO:
   4088     case OP_TYPEMINUPTO:
   4089     min = 0;
   4090     max = GET2(ecode, 1);
   4091     minimize = *ecode == OP_TYPEMINUPTO;
   4092     ecode += 1 + IMM2_SIZE;
   4093     goto REPEATTYPE;
   4094 
   4095     case OP_TYPEPOSSTAR:
   4096     possessive = TRUE;
   4097     min = 0;
   4098     max = INT_MAX;
   4099     ecode++;
   4100     goto REPEATTYPE;
   4101 
   4102     case OP_TYPEPOSPLUS:
   4103     possessive = TRUE;
   4104     min = 1;
   4105     max = INT_MAX;
   4106     ecode++;
   4107     goto REPEATTYPE;
   4108 
   4109     case OP_TYPEPOSQUERY:
   4110     possessive = TRUE;
   4111     min = 0;
   4112     max = 1;
   4113     ecode++;
   4114     goto REPEATTYPE;
   4115 
   4116     case OP_TYPEPOSUPTO:
   4117     possessive = TRUE;
   4118     min = 0;
   4119     max = GET2(ecode, 1);
   4120     ecode += 1 + IMM2_SIZE;
   4121     goto REPEATTYPE;
   4122 
   4123     case OP_TYPESTAR:
   4124     case OP_TYPEMINSTAR:
   4125     case OP_TYPEPLUS:
   4126     case OP_TYPEMINPLUS:
   4127     case OP_TYPEQUERY:
   4128     case OP_TYPEMINQUERY:
   4129     c = *ecode++ - OP_TYPESTAR;
   4130     minimize = (c & 1) != 0;
   4131     min = rep_min[c];                 /* Pick up values from tables; */
   4132     max = rep_max[c];                 /* zero for max => infinity */
   4133     if (max == 0) max = INT_MAX;
   4134 
   4135     /* Common code for all repeated single character type matches. Note that
   4136     in UTF-8 mode, '.' matches a character of any length, but for the other
   4137     character types, the valid characters are all one-byte long. */
   4138 
   4139     REPEATTYPE:
   4140     ctype = *ecode++;      /* Code for the character type */
   4141 
   4142 #ifdef SUPPORT_UCP
   4143     if (ctype == OP_PROP || ctype == OP_NOTPROP)
   4144       {
   4145       prop_fail_result = ctype == OP_NOTPROP;
   4146       prop_type = *ecode++;
   4147       prop_value = *ecode++;
   4148       }
   4149     else prop_type = -1;
   4150 #endif
   4151 
   4152     /* First, ensure the minimum number of matches are present. Use inline
   4153     code for maximizing the speed, and do the type test once at the start
   4154     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
   4155     is tidier. Also separate the UCP code, which can be the same for both UTF-8
   4156     and single-bytes. */
   4157 
   4158     if (min > 0)
   4159       {
   4160 #ifdef SUPPORT_UCP
   4161       if (prop_type >= 0)
   4162         {
   4163         switch(prop_type)
   4164           {
   4165           case PT_ANY:
   4166           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4167           for (i = 1; i <= min; i++)
   4168             {
   4169             if (eptr >= md->end_subject)
   4170               {
   4171               SCHECK_PARTIAL();
   4172               RRETURN(MATCH_NOMATCH);
   4173               }
   4174             GETCHARINCTEST(c, eptr);
   4175             }
   4176           break;
   4177 
   4178           case PT_LAMP:
   4179           for (i = 1; i <= min; i++)
   4180             {
   4181             int chartype;
   4182             if (eptr >= md->end_subject)
   4183               {
   4184               SCHECK_PARTIAL();
   4185               RRETURN(MATCH_NOMATCH);
   4186               }
   4187             GETCHARINCTEST(c, eptr);
   4188             chartype = UCD_CHARTYPE(c);
   4189             if ((chartype == ucp_Lu ||
   4190                  chartype == ucp_Ll ||
   4191                  chartype == ucp_Lt) == prop_fail_result)
   4192               RRETURN(MATCH_NOMATCH);
   4193             }
   4194           break;
   4195 
   4196           case PT_GC:
   4197           for (i = 1; i <= min; i++)
   4198             {
   4199             if (eptr >= md->end_subject)
   4200               {
   4201               SCHECK_PARTIAL();
   4202               RRETURN(MATCH_NOMATCH);
   4203               }
   4204             GETCHARINCTEST(c, eptr);
   4205             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
   4206               RRETURN(MATCH_NOMATCH);
   4207             }
   4208           break;
   4209 
   4210           case PT_PC:
   4211           for (i = 1; i <= min; i++)
   4212             {
   4213             if (eptr >= md->end_subject)
   4214               {
   4215               SCHECK_PARTIAL();
   4216               RRETURN(MATCH_NOMATCH);
   4217               }
   4218             GETCHARINCTEST(c, eptr);
   4219             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
   4220               RRETURN(MATCH_NOMATCH);
   4221             }
   4222           break;
   4223 
   4224           case PT_SC:
   4225           for (i = 1; i <= min; i++)
   4226             {
   4227             if (eptr >= md->end_subject)
   4228               {
   4229               SCHECK_PARTIAL();
   4230               RRETURN(MATCH_NOMATCH);
   4231               }
   4232             GETCHARINCTEST(c, eptr);
   4233             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
   4234               RRETURN(MATCH_NOMATCH);
   4235             }
   4236           break;
   4237 
   4238           case PT_ALNUM:
   4239           for (i = 1; i <= min; i++)
   4240             {
   4241             int category;
   4242             if (eptr >= md->end_subject)
   4243               {
   4244               SCHECK_PARTIAL();
   4245               RRETURN(MATCH_NOMATCH);
   4246               }
   4247             GETCHARINCTEST(c, eptr);
   4248             category = UCD_CATEGORY(c);
   4249             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   4250               RRETURN(MATCH_NOMATCH);
   4251             }
   4252           break;
   4253 
   4254           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   4255           which means that Perl space and POSIX space are now identical. PCRE
   4256           was changed at release 8.34. */
   4257 
   4258           case PT_SPACE:    /* Perl space */
   4259           case PT_PXSPACE:  /* POSIX space */
   4260           for (i = 1; i <= min; i++)
   4261             {
   4262             if (eptr >= md->end_subject)
   4263               {
   4264               SCHECK_PARTIAL();
   4265               RRETURN(MATCH_NOMATCH);
   4266               }
   4267             GETCHARINCTEST(c, eptr);
   4268             switch(c)
   4269               {
   4270               HSPACE_CASES:
   4271               VSPACE_CASES:
   4272               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4273               break;
   4274 
   4275               default:
   4276               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   4277                 RRETURN(MATCH_NOMATCH);
   4278               break;
   4279               }
   4280             }
   4281           break;
   4282 
   4283           case PT_WORD:
   4284           for (i = 1; i <= min; i++)
   4285             {
   4286             int category;
   4287             if (eptr >= md->end_subject)
   4288               {
   4289               SCHECK_PARTIAL();
   4290               RRETURN(MATCH_NOMATCH);
   4291               }
   4292             GETCHARINCTEST(c, eptr);
   4293             category = UCD_CATEGORY(c);
   4294             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
   4295                    == prop_fail_result)
   4296               RRETURN(MATCH_NOMATCH);
   4297             }
   4298           break;
   4299 
   4300           case PT_CLIST:
   4301           for (i = 1; i <= min; i++)
   4302             {
   4303             const pcre_uint32 *cp;
   4304             if (eptr >= md->end_subject)
   4305               {
   4306               SCHECK_PARTIAL();
   4307               RRETURN(MATCH_NOMATCH);
   4308               }
   4309             GETCHARINCTEST(c, eptr);
   4310             cp = PRIV(ucd_caseless_sets) + prop_value;
   4311             for (;;)
   4312               {
   4313               if (c < *cp)
   4314                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
   4315               if (c == *cp++)
   4316                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
   4317               }
   4318             }
   4319           break;
   4320 
   4321           case PT_UCNC:
   4322           for (i = 1; i <= min; i++)
   4323             {
   4324             if (eptr >= md->end_subject)
   4325               {
   4326               SCHECK_PARTIAL();
   4327               RRETURN(MATCH_NOMATCH);
   4328               }
   4329             GETCHARINCTEST(c, eptr);
   4330             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   4331                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   4332                  c >= 0xe000) == prop_fail_result)
   4333               RRETURN(MATCH_NOMATCH);
   4334             }
   4335           break;
   4336 
   4337           /* This should not occur */
   4338 
   4339           default:
   4340           RRETURN(PCRE_ERROR_INTERNAL);
   4341           }
   4342         }
   4343 
   4344       /* Match extended Unicode sequences. We will get here only if the
   4345       support is in the binary; otherwise a compile-time error occurs. */
   4346 
   4347       else if (ctype == OP_EXTUNI)
   4348         {
   4349         for (i = 1; i <= min; i++)
   4350           {
   4351           if (eptr >= md->end_subject)
   4352             {
   4353             SCHECK_PARTIAL();
   4354             RRETURN(MATCH_NOMATCH);
   4355             }
   4356           else
   4357             {
   4358             int lgb, rgb;
   4359             GETCHARINCTEST(c, eptr);
   4360             lgb = UCD_GRAPHBREAK(c);
   4361            while (eptr < md->end_subject)
   4362               {
   4363               int len = 1;
   4364               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   4365               rgb = UCD_GRAPHBREAK(c);
   4366               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   4367               lgb = rgb;
   4368               eptr += len;
   4369               }
   4370             }
   4371           CHECK_PARTIAL();
   4372           }
   4373         }
   4374 
   4375       else
   4376 #endif     /* SUPPORT_UCP */
   4377 
   4378 /* Handle all other cases when the coding is UTF-8 */
   4379 
   4380 #ifdef SUPPORT_UTF
   4381       if (utf) switch(ctype)
   4382         {
   4383         case OP_ANY:
   4384         for (i = 1; i <= min; i++)
   4385           {
   4386           if (eptr >= md->end_subject)
   4387             {
   4388             SCHECK_PARTIAL();
   4389             RRETURN(MATCH_NOMATCH);
   4390             }
   4391           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   4392           if (md->partial != 0 &&
   4393               eptr + 1 >= md->end_subject &&
   4394               NLBLOCK->nltype == NLTYPE_FIXED &&
   4395               NLBLOCK->nllen == 2 &&
   4396               UCHAR21(eptr) == NLBLOCK->nl[0])
   4397             {
   4398             md->hitend = TRUE;
   4399             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   4400             }
   4401           eptr++;
   4402           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4403           }
   4404         break;
   4405 
   4406         case OP_ALLANY:
   4407         for (i = 1; i <= min; i++)
   4408           {
   4409           if (eptr >= md->end_subject)
   4410             {
   4411             SCHECK_PARTIAL();
   4412             RRETURN(MATCH_NOMATCH);
   4413             }
   4414           eptr++;
   4415           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4416           }
   4417         break;
   4418 
   4419         case OP_ANYBYTE:
   4420         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
   4421         eptr += min;
   4422         break;
   4423 
   4424         case OP_ANYNL:
   4425         for (i = 1; i <= min; i++)
   4426           {
   4427           if (eptr >= md->end_subject)
   4428             {
   4429             SCHECK_PARTIAL();
   4430             RRETURN(MATCH_NOMATCH);
   4431             }
   4432           GETCHARINC(c, eptr);
   4433           switch(c)
   4434             {
   4435             default: RRETURN(MATCH_NOMATCH);
   4436 
   4437             case CHAR_CR:
   4438             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
   4439             break;
   4440 
   4441             case CHAR_LF:
   4442             break;
   4443 
   4444             case CHAR_VT:
   4445             case CHAR_FF:
   4446             case CHAR_NEL:
   4447 #ifndef EBCDIC
   4448             case 0x2028:
   4449             case 0x2029:
   4450 #endif  /* Not EBCDIC */
   4451             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   4452             break;
   4453             }
   4454           }
   4455         break;
   4456 
   4457         case OP_NOT_HSPACE:
   4458         for (i = 1; i <= min; i++)
   4459           {
   4460           if (eptr >= md->end_subject)
   4461             {
   4462             SCHECK_PARTIAL();
   4463             RRETURN(MATCH_NOMATCH);
   4464             }
   4465           GETCHARINC(c, eptr);
   4466           switch(c)
   4467             {
   4468             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
   4469             default: break;
   4470             }
   4471           }
   4472         break;
   4473 
   4474         case OP_HSPACE:
   4475         for (i = 1; i <= min; i++)
   4476           {
   4477           if (eptr >= md->end_subject)
   4478             {
   4479             SCHECK_PARTIAL();
   4480             RRETURN(MATCH_NOMATCH);
   4481             }
   4482           GETCHARINC(c, eptr);
   4483           switch(c)
   4484             {
   4485             HSPACE_CASES: break;  /* Byte and multibyte cases */
   4486             default: RRETURN(MATCH_NOMATCH);
   4487             }
   4488           }
   4489         break;
   4490 
   4491         case OP_NOT_VSPACE:
   4492         for (i = 1; i <= min; i++)
   4493           {
   4494           if (eptr >= md->end_subject)
   4495             {
   4496             SCHECK_PARTIAL();
   4497             RRETURN(MATCH_NOMATCH);
   4498             }
   4499           GETCHARINC(c, eptr);
   4500           switch(c)
   4501             {
   4502             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   4503             default: break;
   4504             }
   4505           }
   4506         break;
   4507 
   4508         case OP_VSPACE:
   4509         for (i = 1; i <= min; i++)
   4510           {
   4511           if (eptr >= md->end_subject)
   4512             {
   4513             SCHECK_PARTIAL();
   4514             RRETURN(MATCH_NOMATCH);
   4515             }
   4516           GETCHARINC(c, eptr);
   4517           switch(c)
   4518             {
   4519             VSPACE_CASES: break;
   4520             default: RRETURN(MATCH_NOMATCH);
   4521             }
   4522           }
   4523         break;
   4524 
   4525         case OP_NOT_DIGIT:
   4526         for (i = 1; i <= min; i++)
   4527           {
   4528           if (eptr >= md->end_subject)
   4529             {
   4530             SCHECK_PARTIAL();
   4531             RRETURN(MATCH_NOMATCH);
   4532             }
   4533           GETCHARINC(c, eptr);
   4534           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
   4535             RRETURN(MATCH_NOMATCH);
   4536           }
   4537         break;
   4538 
   4539         case OP_DIGIT:
   4540         for (i = 1; i <= min; i++)
   4541           {
   4542           pcre_uint32 cc;
   4543           if (eptr >= md->end_subject)
   4544             {
   4545             SCHECK_PARTIAL();
   4546             RRETURN(MATCH_NOMATCH);
   4547             }
   4548           cc = UCHAR21(eptr);
   4549           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
   4550             RRETURN(MATCH_NOMATCH);
   4551           eptr++;
   4552           /* No need to skip more bytes - we know it's a 1-byte character */
   4553           }
   4554         break;
   4555 
   4556         case OP_NOT_WHITESPACE:
   4557         for (i = 1; i <= min; i++)
   4558           {
   4559           pcre_uint32 cc;
   4560           if (eptr >= md->end_subject)
   4561             {
   4562             SCHECK_PARTIAL();
   4563             RRETURN(MATCH_NOMATCH);
   4564             }
   4565           cc = UCHAR21(eptr);
   4566           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
   4567             RRETURN(MATCH_NOMATCH);
   4568           eptr++;
   4569           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4570           }
   4571         break;
   4572 
   4573         case OP_WHITESPACE:
   4574         for (i = 1; i <= min; i++)
   4575           {
   4576           pcre_uint32 cc;
   4577           if (eptr >= md->end_subject)
   4578             {
   4579             SCHECK_PARTIAL();
   4580             RRETURN(MATCH_NOMATCH);
   4581             }
   4582           cc = UCHAR21(eptr);
   4583           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
   4584             RRETURN(MATCH_NOMATCH);
   4585           eptr++;
   4586           /* No need to skip more bytes - we know it's a 1-byte character */
   4587           }
   4588         break;
   4589 
   4590         case OP_NOT_WORDCHAR:
   4591         for (i = 1; i <= min; i++)
   4592           {
   4593           pcre_uint32 cc;
   4594           if (eptr >= md->end_subject)
   4595             {
   4596             SCHECK_PARTIAL();
   4597             RRETURN(MATCH_NOMATCH);
   4598             }
   4599           cc = UCHAR21(eptr);
   4600           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
   4601             RRETURN(MATCH_NOMATCH);
   4602           eptr++;
   4603           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4604           }
   4605         break;
   4606 
   4607         case OP_WORDCHAR:
   4608         for (i = 1; i <= min; i++)
   4609           {
   4610           pcre_uint32 cc;
   4611           if (eptr >= md->end_subject)
   4612             {
   4613             SCHECK_PARTIAL();
   4614             RRETURN(MATCH_NOMATCH);
   4615             }
   4616           cc = UCHAR21(eptr);
   4617           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
   4618             RRETURN(MATCH_NOMATCH);
   4619           eptr++;
   4620           /* No need to skip more bytes - we know it's a 1-byte character */
   4621           }
   4622         break;
   4623 
   4624         default:
   4625         RRETURN(PCRE_ERROR_INTERNAL);
   4626         }  /* End switch(ctype) */
   4627 
   4628       else
   4629 #endif     /* SUPPORT_UTF */
   4630 
   4631       /* Code for the non-UTF-8 case for minimum matching of operators other
   4632       than OP_PROP and OP_NOTPROP. */
   4633 
   4634       switch(ctype)
   4635         {
   4636         case OP_ANY:
   4637         for (i = 1; i <= min; i++)
   4638           {
   4639           if (eptr >= md->end_subject)
   4640             {
   4641             SCHECK_PARTIAL();
   4642             RRETURN(MATCH_NOMATCH);
   4643             }
   4644           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   4645           if (md->partial != 0 &&
   4646               eptr + 1 >= md->end_subject &&
   4647               NLBLOCK->nltype == NLTYPE_FIXED &&
   4648               NLBLOCK->nllen == 2 &&
   4649               *eptr == NLBLOCK->nl[0])
   4650             {
   4651             md->hitend = TRUE;
   4652             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   4653             }
   4654           eptr++;
   4655           }
   4656         break;
   4657 
   4658         case OP_ALLANY:
   4659         if (eptr > md->end_subject - min)
   4660           {
   4661           SCHECK_PARTIAL();
   4662           RRETURN(MATCH_NOMATCH);
   4663           }
   4664         eptr += min;
   4665         break;
   4666 
   4667         case OP_ANYBYTE:
   4668         if (eptr > md->end_subject - min)
   4669           {
   4670           SCHECK_PARTIAL();
   4671           RRETURN(MATCH_NOMATCH);
   4672           }
   4673         eptr += min;
   4674         break;
   4675 
   4676         case OP_ANYNL:
   4677         for (i = 1; i <= min; i++)
   4678           {
   4679           if (eptr >= md->end_subject)
   4680             {
   4681             SCHECK_PARTIAL();
   4682             RRETURN(MATCH_NOMATCH);
   4683             }
   4684           switch(*eptr++)
   4685             {
   4686             default: RRETURN(MATCH_NOMATCH);
   4687 
   4688             case CHAR_CR:
   4689             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
   4690             break;
   4691 
   4692             case CHAR_LF:
   4693             break;
   4694 
   4695             case CHAR_VT:
   4696             case CHAR_FF:
   4697             case CHAR_NEL:
   4698 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4699             case 0x2028:
   4700             case 0x2029:
   4701 #endif
   4702             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   4703             break;
   4704             }
   4705           }
   4706         break;
   4707 
   4708         case OP_NOT_HSPACE:
   4709         for (i = 1; i <= min; i++)
   4710           {
   4711           if (eptr >= md->end_subject)
   4712             {
   4713             SCHECK_PARTIAL();
   4714             RRETURN(MATCH_NOMATCH);
   4715             }
   4716           switch(*eptr++)
   4717             {
   4718             default: break;
   4719             HSPACE_BYTE_CASES:
   4720 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4721             HSPACE_MULTIBYTE_CASES:
   4722 #endif
   4723             RRETURN(MATCH_NOMATCH);
   4724             }
   4725           }
   4726         break;
   4727 
   4728         case OP_HSPACE:
   4729         for (i = 1; i <= min; i++)
   4730           {
   4731           if (eptr >= md->end_subject)
   4732             {
   4733             SCHECK_PARTIAL();
   4734             RRETURN(MATCH_NOMATCH);
   4735             }
   4736           switch(*eptr++)
   4737             {
   4738             default: RRETURN(MATCH_NOMATCH);
   4739             HSPACE_BYTE_CASES:
   4740 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4741             HSPACE_MULTIBYTE_CASES:
   4742 #endif
   4743             break;
   4744             }
   4745           }
   4746         break;
   4747 
   4748         case OP_NOT_VSPACE:
   4749         for (i = 1; i <= min; i++)
   4750           {
   4751           if (eptr >= md->end_subject)
   4752             {
   4753             SCHECK_PARTIAL();
   4754             RRETURN(MATCH_NOMATCH);
   4755             }
   4756           switch(*eptr++)
   4757             {
   4758             VSPACE_BYTE_CASES:
   4759 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4760             VSPACE_MULTIBYTE_CASES:
   4761 #endif
   4762             RRETURN(MATCH_NOMATCH);
   4763             default: break;
   4764             }
   4765           }
   4766         break;
   4767 
   4768         case OP_VSPACE:
   4769         for (i = 1; i <= min; i++)
   4770           {
   4771           if (eptr >= md->end_subject)
   4772             {
   4773             SCHECK_PARTIAL();
   4774             RRETURN(MATCH_NOMATCH);
   4775             }
   4776           switch(*eptr++)
   4777             {
   4778             default: RRETURN(MATCH_NOMATCH);
   4779             VSPACE_BYTE_CASES:
   4780 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4781             VSPACE_MULTIBYTE_CASES:
   4782 #endif
   4783             break;
   4784             }
   4785           }
   4786         break;
   4787 
   4788         case OP_NOT_DIGIT:
   4789         for (i = 1; i <= min; i++)
   4790           {
   4791           if (eptr >= md->end_subject)
   4792             {
   4793             SCHECK_PARTIAL();
   4794             RRETURN(MATCH_NOMATCH);
   4795             }
   4796           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
   4797             RRETURN(MATCH_NOMATCH);
   4798           eptr++;
   4799           }
   4800         break;
   4801 
   4802         case OP_DIGIT:
   4803         for (i = 1; i <= min; i++)
   4804           {
   4805           if (eptr >= md->end_subject)
   4806             {
   4807             SCHECK_PARTIAL();
   4808             RRETURN(MATCH_NOMATCH);
   4809             }
   4810           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
   4811             RRETURN(MATCH_NOMATCH);
   4812           eptr++;
   4813           }
   4814         break;
   4815 
   4816         case OP_NOT_WHITESPACE:
   4817         for (i = 1; i <= min; i++)
   4818           {
   4819           if (eptr >= md->end_subject)
   4820             {
   4821             SCHECK_PARTIAL();
   4822             RRETURN(MATCH_NOMATCH);
   4823             }
   4824           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
   4825             RRETURN(MATCH_NOMATCH);
   4826           eptr++;
   4827           }
   4828         break;
   4829 
   4830         case OP_WHITESPACE:
   4831         for (i = 1; i <= min; i++)
   4832           {
   4833           if (eptr >= md->end_subject)
   4834             {
   4835             SCHECK_PARTIAL();
   4836             RRETURN(MATCH_NOMATCH);
   4837             }
   4838           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
   4839             RRETURN(MATCH_NOMATCH);
   4840           eptr++;
   4841           }
   4842         break;
   4843 
   4844         case OP_NOT_WORDCHAR:
   4845         for (i = 1; i <= min; i++)
   4846           {
   4847           if (eptr >= md->end_subject)
   4848             {
   4849             SCHECK_PARTIAL();
   4850             RRETURN(MATCH_NOMATCH);
   4851             }
   4852           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
   4853             RRETURN(MATCH_NOMATCH);
   4854           eptr++;
   4855           }
   4856         break;
   4857 
   4858         case OP_WORDCHAR:
   4859         for (i = 1; i <= min; i++)
   4860           {
   4861           if (eptr >= md->end_subject)
   4862             {
   4863             SCHECK_PARTIAL();
   4864             RRETURN(MATCH_NOMATCH);
   4865             }
   4866           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
   4867             RRETURN(MATCH_NOMATCH);
   4868           eptr++;
   4869           }
   4870         break;
   4871 
   4872         default:
   4873         RRETURN(PCRE_ERROR_INTERNAL);
   4874         }
   4875       }
   4876 
   4877     /* If min = max, continue at the same level without recursing */
   4878 
   4879     if (min == max) continue;
   4880 
   4881     /* If minimizing, we have to test the rest of the pattern before each
   4882     subsequent match. Again, separate the UTF-8 case for speed, and also
   4883     separate the UCP cases. */
   4884 
   4885     if (minimize)
   4886       {
   4887 #ifdef SUPPORT_UCP
   4888       if (prop_type >= 0)
   4889         {
   4890         switch(prop_type)
   4891           {
   4892           case PT_ANY:
   4893           for (fi = min;; fi++)
   4894             {
   4895             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
   4896             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4897             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4898             if (eptr >= md->end_subject)
   4899               {
   4900               SCHECK_PARTIAL();
   4901               RRETURN(MATCH_NOMATCH);
   4902               }
   4903             GETCHARINCTEST(c, eptr);
   4904             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4905             }
   4906           /* Control never gets here */
   4907 
   4908           case PT_LAMP:
   4909           for (fi = min;; fi++)
   4910             {
   4911             int chartype;
   4912             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
   4913             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4914             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4915             if (eptr >= md->end_subject)
   4916               {
   4917               SCHECK_PARTIAL();
   4918               RRETURN(MATCH_NOMATCH);
   4919               }
   4920             GETCHARINCTEST(c, eptr);
   4921             chartype = UCD_CHARTYPE(c);
   4922             if ((chartype == ucp_Lu ||
   4923                  chartype == ucp_Ll ||
   4924                  chartype == ucp_Lt) == prop_fail_result)
   4925               RRETURN(MATCH_NOMATCH);
   4926             }
   4927           /* Control never gets here */
   4928 
   4929           case PT_GC:
   4930           for (fi = min;; fi++)
   4931             {
   4932             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
   4933             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4934             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4935             if (eptr >= md->end_subject)
   4936               {
   4937               SCHECK_PARTIAL();
   4938               RRETURN(MATCH_NOMATCH);
   4939               }
   4940             GETCHARINCTEST(c, eptr);
   4941             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
   4942               RRETURN(MATCH_NOMATCH);
   4943             }
   4944           /* Control never gets here */
   4945 
   4946           case PT_PC:
   4947           for (fi = min;; fi++)
   4948             {
   4949             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
   4950             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4951             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4952             if (eptr >= md->end_subject)
   4953               {
   4954               SCHECK_PARTIAL();
   4955               RRETURN(MATCH_NOMATCH);
   4956               }
   4957             GETCHARINCTEST(c, eptr);
   4958             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
   4959               RRETURN(MATCH_NOMATCH);
   4960             }
   4961           /* Control never gets here */
   4962 
   4963           case PT_SC:
   4964           for (fi = min;; fi++)
   4965             {
   4966             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
   4967             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4968             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4969             if (eptr >= md->end_subject)
   4970               {
   4971               SCHECK_PARTIAL();
   4972               RRETURN(MATCH_NOMATCH);
   4973               }
   4974             GETCHARINCTEST(c, eptr);
   4975             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
   4976               RRETURN(MATCH_NOMATCH);
   4977             }
   4978           /* Control never gets here */
   4979 
   4980           case PT_ALNUM:
   4981           for (fi = min;; fi++)
   4982             {
   4983             int category;
   4984             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
   4985             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4986             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4987             if (eptr >= md->end_subject)
   4988               {
   4989               SCHECK_PARTIAL();
   4990               RRETURN(MATCH_NOMATCH);
   4991               }
   4992             GETCHARINCTEST(c, eptr);
   4993             category = UCD_CATEGORY(c);
   4994             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   4995               RRETURN(MATCH_NOMATCH);
   4996             }
   4997           /* Control never gets here */
   4998 
   4999           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   5000           which means that Perl space and POSIX space are now identical. PCRE
   5001           was changed at release 8.34. */
   5002 
   5003           case PT_SPACE:    /* Perl space */
   5004           case PT_PXSPACE:  /* POSIX space */
   5005           for (fi = min;; fi++)
   5006             {
   5007             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
   5008             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5009             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5010             if (eptr >= md->end_subject)
   5011               {
   5012               SCHECK_PARTIAL();
   5013               RRETURN(MATCH_NOMATCH);
   5014               }
   5015             GETCHARINCTEST(c, eptr);
   5016             switch(c)
   5017               {
   5018               HSPACE_CASES:
   5019               VSPACE_CASES:
   5020               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   5021               break;
   5022 
   5023               default:
   5024               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   5025                 RRETURN(MATCH_NOMATCH);
   5026               break;
   5027               }
   5028             }
   5029           /* Control never gets here */
   5030 
   5031           case PT_WORD:
   5032           for (fi = min;; fi++)
   5033             {
   5034             int category;
   5035             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
   5036             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5037             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5038             if (eptr >= md->end_subject)
   5039               {
   5040               SCHECK_PARTIAL();
   5041               RRETURN(MATCH_NOMATCH);
   5042               }
   5043             GETCHARINCTEST(c, eptr);
   5044             category = UCD_CATEGORY(c);
   5045             if ((category == ucp_L ||
   5046                  category == ucp_N ||
   5047                  c == CHAR_UNDERSCORE)
   5048                    == prop_fail_result)
   5049               RRETURN(MATCH_NOMATCH);
   5050             }
   5051           /* Control never gets here */
   5052 
   5053           case PT_CLIST:
   5054           for (fi = min;; fi++)
   5055             {
   5056             const pcre_uint32 *cp;
   5057             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
   5058             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5059             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5060             if (eptr >= md->end_subject)
   5061               {
   5062               SCHECK_PARTIAL();
   5063               RRETURN(MATCH_NOMATCH);
   5064               }
   5065             GETCHARINCTEST(c, eptr);
   5066             cp = PRIV(ucd_caseless_sets) + prop_value;
   5067             for (;;)
   5068               {
   5069               if (c < *cp)
   5070                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
   5071               if (c == *cp++)
   5072                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
   5073               }
   5074             }
   5075           /* Control never gets here */
   5076 
   5077           case PT_UCNC:
   5078           for (fi = min;; fi++)
   5079             {
   5080             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
   5081             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5082             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5083             if (eptr >= md->end_subject)
   5084               {
   5085               SCHECK_PARTIAL();
   5086               RRETURN(MATCH_NOMATCH);
   5087               }
   5088             GETCHARINCTEST(c, eptr);
   5089             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   5090                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   5091                  c >= 0xe000) == prop_fail_result)
   5092               RRETURN(MATCH_NOMATCH);
   5093             }
   5094           /* Control never gets here */
   5095 
   5096           /* This should never occur */
   5097           default:
   5098           RRETURN(PCRE_ERROR_INTERNAL);
   5099           }
   5100         }
   5101 
   5102       /* Match extended Unicode sequences. We will get here only if the
   5103       support is in the binary; otherwise a compile-time error occurs. */
   5104 
   5105       else if (ctype == OP_EXTUNI)
   5106         {
   5107         for (fi = min;; fi++)
   5108           {
   5109           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
   5110           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5111           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5112           if (eptr >= md->end_subject)
   5113             {
   5114             SCHECK_PARTIAL();
   5115             RRETURN(MATCH_NOMATCH);
   5116             }
   5117           else
   5118             {
   5119             int lgb, rgb;
   5120             GETCHARINCTEST(c, eptr);
   5121             lgb = UCD_GRAPHBREAK(c);
   5122             while (eptr < md->end_subject)
   5123               {
   5124               int len = 1;
   5125               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   5126               rgb = UCD_GRAPHBREAK(c);
   5127               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5128               lgb = rgb;
   5129               eptr += len;
   5130               }
   5131             }
   5132           CHECK_PARTIAL();
   5133           }
   5134         }
   5135       else
   5136 #endif     /* SUPPORT_UCP */
   5137 
   5138 #ifdef SUPPORT_UTF
   5139       if (utf)
   5140         {
   5141         for (fi = min;; fi++)
   5142           {
   5143           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
   5144           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5145           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5146           if (eptr >= md->end_subject)
   5147             {
   5148             SCHECK_PARTIAL();
   5149             RRETURN(MATCH_NOMATCH);
   5150             }
   5151           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   5152             RRETURN(MATCH_NOMATCH);
   5153           GETCHARINC(c, eptr);
   5154           switch(ctype)
   5155             {
   5156             case OP_ANY:               /* This is the non-NL case */
   5157             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5158                 eptr >= md->end_subject &&
   5159                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5160                 NLBLOCK->nllen == 2 &&
   5161                 c == NLBLOCK->nl[0])
   5162               {
   5163               md->hitend = TRUE;
   5164               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5165               }
   5166             break;
   5167 
   5168             case OP_ALLANY:
   5169             case OP_ANYBYTE:
   5170             break;
   5171 
   5172             case OP_ANYNL:
   5173             switch(c)
   5174               {
   5175               default: RRETURN(MATCH_NOMATCH);
   5176               case CHAR_CR:
   5177               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
   5178               break;
   5179 
   5180               case CHAR_LF:
   5181               break;
   5182 
   5183               case CHAR_VT:
   5184               case CHAR_FF:
   5185               case CHAR_NEL:
   5186 #ifndef EBCDIC
   5187               case 0x2028:
   5188               case 0x2029:
   5189 #endif  /* Not EBCDIC */
   5190               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   5191               break;
   5192               }
   5193             break;
   5194 
   5195             case OP_NOT_HSPACE:
   5196             switch(c)
   5197               {
   5198               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
   5199               default: break;
   5200               }
   5201             break;
   5202 
   5203             case OP_HSPACE:
   5204             switch(c)
   5205               {
   5206               HSPACE_CASES: break;
   5207               default: RRETURN(MATCH_NOMATCH);
   5208               }
   5209             break;
   5210 
   5211             case OP_NOT_VSPACE:
   5212             switch(c)
   5213               {
   5214               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   5215               default: break;
   5216               }
   5217             break;
   5218 
   5219             case OP_VSPACE:
   5220             switch(c)
   5221               {
   5222               VSPACE_CASES: break;
   5223               default: RRETURN(MATCH_NOMATCH);
   5224               }
   5225             break;
   5226 
   5227             case OP_NOT_DIGIT:
   5228             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
   5229               RRETURN(MATCH_NOMATCH);
   5230             break;
   5231 
   5232             case OP_DIGIT:
   5233             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
   5234               RRETURN(MATCH_NOMATCH);
   5235             break;
   5236 
   5237             case OP_NOT_WHITESPACE:
   5238             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
   5239               RRETURN(MATCH_NOMATCH);
   5240             break;
   5241 
   5242             case OP_WHITESPACE:
   5243             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
   5244               RRETURN(MATCH_NOMATCH);
   5245             break;
   5246 
   5247             case OP_NOT_WORDCHAR:
   5248             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
   5249               RRETURN(MATCH_NOMATCH);
   5250             break;
   5251 
   5252             case OP_WORDCHAR:
   5253             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
   5254               RRETURN(MATCH_NOMATCH);
   5255             break;
   5256 
   5257             default:
   5258             RRETURN(PCRE_ERROR_INTERNAL);
   5259             }
   5260           }
   5261         }
   5262       else
   5263 #endif
   5264       /* Not UTF mode */
   5265         {
   5266         for (fi = min;; fi++)
   5267           {
   5268           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
   5269           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5270           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5271           if (eptr >= md->end_subject)
   5272             {
   5273             SCHECK_PARTIAL();
   5274             RRETURN(MATCH_NOMATCH);
   5275             }
   5276           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   5277             RRETURN(MATCH_NOMATCH);
   5278           c = *eptr++;
   5279           switch(ctype)
   5280             {
   5281             case OP_ANY:               /* This is the non-NL case */
   5282             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5283                 eptr >= md->end_subject &&
   5284                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5285                 NLBLOCK->nllen == 2 &&
   5286                 c == NLBLOCK->nl[0])
   5287               {
   5288               md->hitend = TRUE;
   5289               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5290               }
   5291             break;
   5292 
   5293             case OP_ALLANY:
   5294             case OP_ANYBYTE:
   5295             break;
   5296 
   5297             case OP_ANYNL:
   5298             switch(c)
   5299               {
   5300               default: RRETURN(MATCH_NOMATCH);
   5301               case CHAR_CR:
   5302               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
   5303               break;
   5304 
   5305               case CHAR_LF:
   5306               break;
   5307 
   5308               case CHAR_VT:
   5309               case CHAR_FF:
   5310               case CHAR_NEL:
   5311 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5312               case 0x2028:
   5313               case 0x2029:
   5314 #endif
   5315               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   5316               break;
   5317               }
   5318             break;
   5319 
   5320             case OP_NOT_HSPACE:
   5321             switch(c)
   5322               {
   5323               default: break;
   5324               HSPACE_BYTE_CASES:
   5325 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5326               HSPACE_MULTIBYTE_CASES:
   5327 #endif
   5328               RRETURN(MATCH_NOMATCH);
   5329               }
   5330             break;
   5331 
   5332             case OP_HSPACE:
   5333             switch(c)
   5334               {
   5335               default: RRETURN(MATCH_NOMATCH);
   5336               HSPACE_BYTE_CASES:
   5337 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5338               HSPACE_MULTIBYTE_CASES:
   5339 #endif
   5340               break;
   5341               }
   5342             break;
   5343 
   5344             case OP_NOT_VSPACE:
   5345             switch(c)
   5346               {
   5347               default: break;
   5348               VSPACE_BYTE_CASES:
   5349 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5350               VSPACE_MULTIBYTE_CASES:
   5351 #endif
   5352               RRETURN(MATCH_NOMATCH);
   5353               }
   5354             break;
   5355 
   5356             case OP_VSPACE:
   5357             switch(c)
   5358               {
   5359               default: RRETURN(MATCH_NOMATCH);
   5360               VSPACE_BYTE_CASES:
   5361 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5362               VSPACE_MULTIBYTE_CASES:
   5363 #endif
   5364               break;
   5365               }
   5366             break;
   5367 
   5368             case OP_NOT_DIGIT:
   5369             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
   5370             break;
   5371 
   5372             case OP_DIGIT:
   5373             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
   5374             break;
   5375 
   5376             case OP_NOT_WHITESPACE:
   5377             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
   5378             break;
   5379 
   5380             case OP_WHITESPACE:
   5381             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
   5382             break;
   5383 
   5384             case OP_NOT_WORDCHAR:
   5385             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
   5386             break;
   5387 
   5388             case OP_WORDCHAR:
   5389             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
   5390             break;
   5391 
   5392             default:
   5393             RRETURN(PCRE_ERROR_INTERNAL);
   5394             }
   5395           }
   5396         }
   5397       /* Control never gets here */
   5398       }
   5399 
   5400     /* If maximizing, it is worth using inline code for speed, doing the type
   5401     test once at the start (i.e. keep it out of the loop). Again, keep the
   5402     UTF-8 and UCP stuff separate. */
   5403 
   5404     else
   5405       {
   5406       pp = eptr;  /* Remember where we started */
   5407 
   5408 #ifdef SUPPORT_UCP
   5409       if (prop_type >= 0)
   5410         {
   5411         switch(prop_type)
   5412           {
   5413           case PT_ANY:
   5414           for (i = min; i < max; i++)
   5415             {
   5416             int len = 1;
   5417             if (eptr >= md->end_subject)
   5418               {
   5419               SCHECK_PARTIAL();
   5420               break;
   5421               }
   5422             GETCHARLENTEST(c, eptr, len);
   5423             if (prop_fail_result) break;
   5424             eptr+= len;
   5425             }
   5426           break;
   5427 
   5428           case PT_LAMP:
   5429           for (i = min; i < max; i++)
   5430             {
   5431             int chartype;
   5432             int len = 1;
   5433             if (eptr >= md->end_subject)
   5434               {
   5435               SCHECK_PARTIAL();
   5436               break;
   5437               }
   5438             GETCHARLENTEST(c, eptr, len);
   5439             chartype = UCD_CHARTYPE(c);
   5440             if ((chartype == ucp_Lu ||
   5441                  chartype == ucp_Ll ||
   5442                  chartype == ucp_Lt) == prop_fail_result)
   5443               break;
   5444             eptr+= len;
   5445             }
   5446           break;
   5447 
   5448           case PT_GC:
   5449           for (i = min; i < max; i++)
   5450             {
   5451             int len = 1;
   5452             if (eptr >= md->end_subject)
   5453               {
   5454               SCHECK_PARTIAL();
   5455               break;
   5456               }
   5457             GETCHARLENTEST(c, eptr, len);
   5458             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
   5459             eptr+= len;
   5460             }
   5461           break;
   5462 
   5463           case PT_PC:
   5464           for (i = min; i < max; i++)
   5465             {
   5466             int len = 1;
   5467             if (eptr >= md->end_subject)
   5468               {
   5469               SCHECK_PARTIAL();
   5470               break;
   5471               }
   5472             GETCHARLENTEST(c, eptr, len);
   5473             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
   5474             eptr+= len;
   5475             }
   5476           break;
   5477 
   5478           case PT_SC:
   5479           for (i = min; i < max; i++)
   5480             {
   5481             int len = 1;
   5482             if (eptr >= md->end_subject)
   5483               {
   5484               SCHECK_PARTIAL();
   5485               break;
   5486               }
   5487             GETCHARLENTEST(c, eptr, len);
   5488             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
   5489             eptr+= len;
   5490             }
   5491           break;
   5492 
   5493           case PT_ALNUM:
   5494           for (i = min; i < max; i++)
   5495             {
   5496             int category;
   5497             int len = 1;
   5498             if (eptr >= md->end_subject)
   5499               {
   5500               SCHECK_PARTIAL();
   5501               break;
   5502               }
   5503             GETCHARLENTEST(c, eptr, len);
   5504             category = UCD_CATEGORY(c);
   5505             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   5506               break;
   5507             eptr+= len;
   5508             }
   5509           break;
   5510 
   5511           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   5512           which means that Perl space and POSIX space are now identical. PCRE
   5513           was changed at release 8.34. */
   5514 
   5515           case PT_SPACE:    /* Perl space */
   5516           case PT_PXSPACE:  /* POSIX space */
   5517           for (i = min; i < max; i++)
   5518             {
   5519             int len = 1;
   5520             if (eptr >= md->end_subject)
   5521               {
   5522               SCHECK_PARTIAL();
   5523               break;
   5524               }
   5525             GETCHARLENTEST(c, eptr, len);
   5526             switch(c)
   5527               {
   5528               HSPACE_CASES:
   5529               VSPACE_CASES:
   5530               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
   5531               break;
   5532 
   5533               default:
   5534               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   5535                 goto ENDLOOP99;   /* Break the loop */
   5536               break;
   5537               }
   5538             eptr+= len;
   5539             }
   5540           ENDLOOP99:
   5541           break;
   5542 
   5543           case PT_WORD:
   5544           for (i = min; i < max; i++)
   5545             {
   5546             int category;
   5547             int len = 1;
   5548             if (eptr >= md->end_subject)
   5549               {
   5550               SCHECK_PARTIAL();
   5551               break;
   5552               }
   5553             GETCHARLENTEST(c, eptr, len);
   5554             category = UCD_CATEGORY(c);
   5555             if ((category == ucp_L || category == ucp_N ||
   5556                  c == CHAR_UNDERSCORE) == prop_fail_result)
   5557               break;
   5558             eptr+= len;
   5559             }
   5560           break;
   5561 
   5562           case PT_CLIST:
   5563           for (i = min; i < max; i++)
   5564             {
   5565             const pcre_uint32 *cp;
   5566             int len = 1;
   5567             if (eptr >= md->end_subject)
   5568               {
   5569               SCHECK_PARTIAL();
   5570               break;
   5571               }
   5572             GETCHARLENTEST(c, eptr, len);
   5573             cp = PRIV(ucd_caseless_sets) + prop_value;
   5574             for (;;)
   5575               {
   5576               if (c < *cp)
   5577                 { if (prop_fail_result) break; else goto GOT_MAX; }
   5578               if (c == *cp++)
   5579                 { if (prop_fail_result) goto GOT_MAX; else break; }
   5580               }
   5581             eptr += len;
   5582             }
   5583           GOT_MAX:
   5584           break;
   5585 
   5586           case PT_UCNC:
   5587           for (i = min; i < max; i++)
   5588             {
   5589             int len = 1;
   5590             if (eptr >= md->end_subject)
   5591               {
   5592               SCHECK_PARTIAL();
   5593               break;
   5594               }
   5595             GETCHARLENTEST(c, eptr, len);
   5596             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   5597                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   5598                  c >= 0xe000) == prop_fail_result)
   5599               break;
   5600             eptr += len;
   5601             }
   5602           break;
   5603 
   5604           default:
   5605           RRETURN(PCRE_ERROR_INTERNAL);
   5606           }
   5607 
   5608         /* eptr is now past the end of the maximum run */
   5609 
   5610         if (possessive) continue;    /* No backtracking */
   5611         for(;;)
   5612           {
   5613           if (eptr <= pp) goto TAIL_RECURSE;
   5614           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
   5615           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5616           eptr--;
   5617           if (utf) BACKCHAR(eptr);
   5618           }
   5619         }
   5620 
   5621       /* Match extended Unicode grapheme clusters. We will get here only if the
   5622       support is in the binary; otherwise a compile-time error occurs. */
   5623 
   5624       else if (ctype == OP_EXTUNI)
   5625         {
   5626         for (i = min; i < max; i++)
   5627           {
   5628           if (eptr >= md->end_subject)
   5629             {
   5630             SCHECK_PARTIAL();
   5631             break;
   5632             }
   5633           else
   5634             {
   5635             int lgb, rgb;
   5636             GETCHARINCTEST(c, eptr);
   5637             lgb = UCD_GRAPHBREAK(c);
   5638             while (eptr < md->end_subject)
   5639               {
   5640               int len = 1;
   5641               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   5642               rgb = UCD_GRAPHBREAK(c);
   5643               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5644               lgb = rgb;
   5645               eptr += len;
   5646               }
   5647             }
   5648           CHECK_PARTIAL();
   5649           }
   5650 
   5651         /* eptr is now past the end of the maximum run */
   5652 
   5653         if (possessive) continue;    /* No backtracking */
   5654 
   5655         /* We use <= pp rather than == pp to detect the start of the run while
   5656         backtracking because the use of \C in UTF mode can cause BACKCHAR to
   5657         move back past pp. This is just palliative; the use of \C in UTF mode
   5658         is fraught with danger. */
   5659 
   5660         for(;;)
   5661           {
   5662           int lgb, rgb;
   5663           PCRE_PUCHAR fptr;
   5664 
   5665           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
   5666           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
   5667           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5668 
   5669           /* Backtracking over an extended grapheme cluster involves inspecting
   5670           the previous two characters (if present) to see if a break is
   5671           permitted between them. */
   5672 
   5673           eptr--;
   5674           if (!utf) c = *eptr; else
   5675             {
   5676             BACKCHAR(eptr);
   5677             GETCHAR(c, eptr);
   5678             }
   5679           rgb = UCD_GRAPHBREAK(c);
   5680 
   5681           for (;;)
   5682             {
   5683             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
   5684             fptr = eptr - 1;
   5685             if (!utf) c = *fptr; else
   5686               {
   5687               BACKCHAR(fptr);
   5688               GETCHAR(c, fptr);
   5689               }
   5690             lgb = UCD_GRAPHBREAK(c);
   5691             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5692             eptr = fptr;
   5693             rgb = lgb;
   5694             }
   5695           }
   5696         }
   5697 
   5698       else
   5699 #endif   /* SUPPORT_UCP */
   5700 
   5701 #ifdef SUPPORT_UTF
   5702       if (utf)
   5703         {
   5704         switch(ctype)
   5705           {
   5706           case OP_ANY:
   5707           for (i = min; i < max; i++)
   5708             {
   5709             if (eptr >= md->end_subject)
   5710               {
   5711               SCHECK_PARTIAL();
   5712               break;
   5713               }
   5714             if (IS_NEWLINE(eptr)) break;
   5715             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5716                 eptr + 1 >= md->end_subject &&
   5717                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5718                 NLBLOCK->nllen == 2 &&
   5719                 UCHAR21(eptr) == NLBLOCK->nl[0])
   5720               {
   5721               md->hitend = TRUE;
   5722               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5723               }
   5724             eptr++;
   5725             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   5726             }
   5727           break;
   5728 
   5729           case OP_ALLANY:
   5730           if (max < INT_MAX)
   5731             {
   5732             for (i = min; i < max; i++)
   5733               {
   5734               if (eptr >= md->end_subject)
   5735                 {
   5736                 SCHECK_PARTIAL();
   5737                 break;
   5738                 }
   5739               eptr++;
   5740               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   5741               }
   5742             }
   5743           else
   5744             {
   5745             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
   5746             SCHECK_PARTIAL();
   5747             }
   5748           break;
   5749 
   5750           /* The byte case is the same as non-UTF8 */
   5751 
   5752           case OP_ANYBYTE:
   5753           c = max - min;
   5754           if (c > (unsigned int)(md->end_subject - eptr))
   5755             {
   5756             eptr = md->end_subject;
   5757             SCHECK_PARTIAL();
   5758             }
   5759           else eptr += c;
   5760           break;
   5761 
   5762           case OP_ANYNL:
   5763           for (i = min; i < max; i++)
   5764             {
   5765             int len = 1;
   5766             if (eptr >= md->end_subject)
   5767               {
   5768               SCHECK_PARTIAL();
   5769               break;
   5770               }
   5771             GETCHARLEN(c, eptr, len);
   5772             if (c == CHAR_CR)
   5773               {
   5774               if (++eptr >= md->end_subject) break;
   5775               if (UCHAR21(eptr) == CHAR_LF) eptr++;
   5776               }
   5777             else
   5778               {
   5779               if (c != CHAR_LF &&
   5780                   (md->bsr_anycrlf ||
   5781                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
   5782 #ifndef EBCDIC
   5783                     && c != 0x2028 && c != 0x2029
   5784 #endif  /* Not EBCDIC */
   5785                     )))
   5786                 break;
   5787               eptr += len;
   5788               }
   5789             }
   5790           break;
   5791 
   5792           case OP_NOT_HSPACE:
   5793           case OP_HSPACE:
   5794           for (i = min; i < max; i++)
   5795             {
   5796             BOOL gotspace;
   5797             int len = 1;
   5798             if (eptr >= md->end_subject)
   5799               {
   5800               SCHECK_PARTIAL();
   5801               break;
   5802               }
   5803             GETCHARLEN(c, eptr, len);
   5804             switch(c)
   5805               {
   5806               HSPACE_CASES: gotspace = TRUE; break;
   5807               default: gotspace = FALSE; break;
   5808               }
   5809             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
   5810             eptr += len;
   5811             }
   5812           break;
   5813 
   5814           case OP_NOT_VSPACE:
   5815           case OP_VSPACE:
   5816           for (i = min; i < max; i++)
   5817             {
   5818             BOOL gotspace;
   5819             int len = 1;
   5820             if (eptr >= md->end_subject)
   5821               {
   5822               SCHECK_PARTIAL();
   5823               break;
   5824               }
   5825             GETCHARLEN(c, eptr, len);
   5826             switch(c)
   5827               {
   5828               VSPACE_CASES: gotspace = TRUE; break;
   5829               default: gotspace = FALSE; break;
   5830               }
   5831             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
   5832             eptr += len;
   5833             }
   5834           break;
   5835 
   5836           case OP_NOT_DIGIT:
   5837           for (i = min; i < max; i++)
   5838             {
   5839             int len = 1;
   5840             if (eptr >= md->end_subject)
   5841               {
   5842               SCHECK_PARTIAL();
   5843               break;
   5844               }
   5845             GETCHARLEN(c, eptr, len);
   5846             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
   5847             eptr+= len;
   5848             }
   5849           break;
   5850 
   5851           case OP_DIGIT:
   5852           for (i = min; i < max; i++)
   5853             {
   5854             int len = 1;
   5855             if (eptr >= md->end_subject)
   5856               {
   5857               SCHECK_PARTIAL();
   5858               break;
   5859               }
   5860             GETCHARLEN(c, eptr, len);
   5861             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
   5862             eptr+= len;
   5863             }
   5864           break;
   5865 
   5866           case OP_NOT_WHITESPACE:
   5867           for (i = min; i < max; i++)
   5868             {
   5869             int len = 1;
   5870             if (eptr >= md->end_subject)
   5871               {
   5872               SCHECK_PARTIAL();
   5873               break;
   5874               }
   5875             GETCHARLEN(c, eptr, len);
   5876             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
   5877             eptr+= len;
   5878             }
   5879           break;
   5880 
   5881           case OP_WHITESPACE:
   5882           for (i = min; i < max; i++)
   5883             {
   5884             int len = 1;
   5885             if (eptr >= md->end_subject)
   5886               {
   5887               SCHECK_PARTIAL();
   5888               break;
   5889               }
   5890             GETCHARLEN(c, eptr, len);
   5891             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
   5892             eptr+= len;
   5893             }
   5894           break;
   5895 
   5896           case OP_NOT_WORDCHAR:
   5897           for (i = min; i < max; i++)
   5898             {
   5899             int len = 1;
   5900             if (eptr >= md->end_subject)
   5901               {
   5902               SCHECK_PARTIAL();
   5903               break;
   5904               }
   5905             GETCHARLEN(c, eptr, len);
   5906             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
   5907             eptr+= len;
   5908             }
   5909           break;
   5910 
   5911           case OP_WORDCHAR:
   5912           for (i = min; i < max; i++)
   5913             {
   5914             int len = 1;
   5915             if (eptr >= md->end_subject)
   5916               {
   5917               SCHECK_PARTIAL();
   5918               break;
   5919               }
   5920             GETCHARLEN(c, eptr, len);
   5921             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
   5922             eptr+= len;
   5923             }
   5924           break;
   5925 
   5926           default:
   5927           RRETURN(PCRE_ERROR_INTERNAL);
   5928           }
   5929 
   5930         if (possessive) continue;    /* No backtracking */
   5931         for(;;)
   5932           {
   5933           if (eptr <= pp) goto TAIL_RECURSE;
   5934           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
   5935           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5936           eptr--;
   5937           BACKCHAR(eptr);
   5938           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
   5939               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
   5940           }
   5941         }
   5942       else
   5943 #endif  /* SUPPORT_UTF */
   5944       /* Not UTF mode */
   5945         {
   5946         switch(ctype)
   5947           {
   5948           case OP_ANY:
   5949           for (i = min; i < max; i++)
   5950             {
   5951             if (eptr >= md->end_subject)
   5952               {
   5953               SCHECK_PARTIAL();
   5954               break;
   5955               }
   5956             if (IS_NEWLINE(eptr)) break;
   5957             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5958                 eptr + 1 >= md->end_subject &&
   5959                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5960                 NLBLOCK->nllen == 2 &&
   5961                 *eptr == NLBLOCK->nl[0])
   5962               {
   5963               md->hitend = TRUE;
   5964               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5965               }
   5966             eptr++;
   5967             }
   5968           break;
   5969 
   5970           case OP_ALLANY:
   5971           case OP_ANYBYTE:
   5972           c = max - min;
   5973           if (c > (unsigned int)(md->end_subject - eptr))
   5974             {
   5975             eptr = md->end_subject;
   5976             SCHECK_PARTIAL();
   5977             }
   5978           else eptr += c;
   5979           break;
   5980 
   5981           case OP_ANYNL:
   5982           for (i = min; i < max; i++)
   5983             {
   5984             if (eptr >= md->end_subject)
   5985               {
   5986               SCHECK_PARTIAL();
   5987               break;
   5988               }
   5989             c = *eptr;
   5990             if (c == CHAR_CR)
   5991               {
   5992               if (++eptr >= md->end_subject) break;
   5993               if (*eptr == CHAR_LF) eptr++;
   5994               }
   5995             else
   5996               {
   5997               if (c != CHAR_LF && (md->bsr_anycrlf ||
   5998                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
   5999 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6000                  && c != 0x2028 && c != 0x2029
   6001 #endif
   6002                  ))) break;
   6003               eptr++;
   6004               }
   6005             }
   6006           break;
   6007 
   6008           case OP_NOT_HSPACE:
   6009           for (i = min; i < max; i++)
   6010             {
   6011             if (eptr >= md->end_subject)
   6012               {
   6013               SCHECK_PARTIAL();
   6014               break;
   6015               }
   6016             switch(*eptr)
   6017               {
   6018               default: eptr++; break;
   6019               HSPACE_BYTE_CASES:
   6020 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6021               HSPACE_MULTIBYTE_CASES:
   6022 #endif
   6023               goto ENDLOOP00;
   6024               }
   6025             }
   6026           ENDLOOP00:
   6027           break;
   6028 
   6029           case OP_HSPACE:
   6030           for (i = min; i < max; i++)
   6031             {
   6032             if (eptr >= md->end_subject)
   6033               {
   6034               SCHECK_PARTIAL();
   6035               break;
   6036               }
   6037             switch(*eptr)
   6038               {
   6039               default: goto ENDLOOP01;
   6040               HSPACE_BYTE_CASES:
   6041 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6042               HSPACE_MULTIBYTE_CASES:
   6043 #endif
   6044               eptr++; break;
   6045               }
   6046             }
   6047           ENDLOOP01:
   6048           break;
   6049 
   6050           case OP_NOT_VSPACE:
   6051           for (i = min; i < max; i++)
   6052             {
   6053             if (eptr >= md->end_subject)
   6054               {
   6055               SCHECK_PARTIAL();
   6056               break;
   6057               }
   6058             switch(*eptr)
   6059               {
   6060               default: eptr++; break;
   6061               VSPACE_BYTE_CASES:
   6062 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6063               VSPACE_MULTIBYTE_CASES:
   6064 #endif
   6065               goto ENDLOOP02;
   6066               }
   6067             }
   6068           ENDLOOP02:
   6069           break;
   6070 
   6071           case OP_VSPACE:
   6072           for (i = min; i < max; i++)
   6073             {
   6074             if (eptr >= md->end_subject)
   6075               {
   6076               SCHECK_PARTIAL();
   6077               break;
   6078               }
   6079             switch(*eptr)
   6080               {
   6081               default: goto ENDLOOP03;
   6082               VSPACE_BYTE_CASES:
   6083 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6084               VSPACE_MULTIBYTE_CASES:
   6085 #endif
   6086               eptr++; break;
   6087               }
   6088             }
   6089           ENDLOOP03:
   6090           break;
   6091 
   6092           case OP_NOT_DIGIT:
   6093           for (i = min; i < max; i++)
   6094             {
   6095             if (eptr >= md->end_subject)
   6096               {
   6097               SCHECK_PARTIAL();
   6098               break;
   6099               }
   6100             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
   6101             eptr++;
   6102             }
   6103           break;
   6104 
   6105           case OP_DIGIT:
   6106           for (i = min; i < max; i++)
   6107             {
   6108             if (eptr >= md->end_subject)
   6109               {
   6110               SCHECK_PARTIAL();
   6111               break;
   6112               }
   6113             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
   6114             eptr++;
   6115             }
   6116           break;
   6117 
   6118           case OP_NOT_WHITESPACE:
   6119           for (i = min; i < max; i++)
   6120             {
   6121             if (eptr >= md->end_subject)
   6122               {
   6123               SCHECK_PARTIAL();
   6124               break;
   6125               }
   6126             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
   6127             eptr++;
   6128             }
   6129           break;
   6130 
   6131           case OP_WHITESPACE:
   6132           for (i = min; i < max; i++)
   6133             {
   6134             if (eptr >= md->end_subject)
   6135               {
   6136               SCHECK_PARTIAL();
   6137               break;
   6138               }
   6139             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
   6140             eptr++;
   6141             }
   6142           break;
   6143 
   6144           case OP_NOT_WORDCHAR:
   6145           for (i = min; i < max; i++)
   6146             {
   6147             if (eptr >= md->end_subject)
   6148               {
   6149               SCHECK_PARTIAL();
   6150               break;
   6151               }
   6152             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
   6153             eptr++;
   6154             }
   6155           break;
   6156 
   6157           case OP_WORDCHAR:
   6158           for (i = min; i < max; i++)
   6159             {
   6160             if (eptr >= md->end_subject)
   6161               {
   6162               SCHECK_PARTIAL();
   6163               break;
   6164               }
   6165             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
   6166             eptr++;
   6167             }
   6168           break;
   6169 
   6170           default:
   6171           RRETURN(PCRE_ERROR_INTERNAL);
   6172           }
   6173 
   6174         if (possessive) continue;    /* No backtracking */
   6175         for (;;)
   6176           {
   6177           if (eptr == pp) goto TAIL_RECURSE;
   6178           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
   6179           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   6180           eptr--;
   6181           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
   6182               eptr[-1] == CHAR_CR) eptr--;
   6183           }
   6184         }
   6185 
   6186       /* Control never gets here */
   6187       }
   6188 
   6189     /* There's been some horrible disaster. Arrival here can only mean there is
   6190     something seriously wrong in the code above or the OP_xxx definitions. */
   6191 
   6192     default:
   6193     DPRINTF(("Unknown opcode %d\n", *ecode));
   6194     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
   6195     }
   6196 
   6197   /* Do not stick any code in here without much thought; it is assumed
   6198   that "continue" in the code above comes out to here to repeat the main
   6199   loop. */
   6200 
   6201   }             /* End of main loop */
   6202 /* Control never reaches here */
   6203 
   6204 
   6205 /* When compiling to use the heap rather than the stack for recursive calls to
   6206 match(), the RRETURN() macro jumps here. The number that is saved in
   6207 frame->Xwhere indicates which label we actually want to return to. */
   6208 
   6209 #ifdef NO_RECURSE
   6210 #define LBL(val) case val: goto L_RM##val;
   6211 HEAP_RETURN:
   6212 switch (frame->Xwhere)
   6213   {
   6214   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
   6215   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
   6216   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
   6217   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
   6218   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
   6219   LBL(65) LBL(66)
   6220 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   6221   LBL(20) LBL(21)
   6222 #endif
   6223 #ifdef SUPPORT_UTF
   6224   LBL(16) LBL(18)
   6225   LBL(22) LBL(23) LBL(28) LBL(30)
   6226   LBL(32) LBL(34) LBL(42) LBL(46)
   6227 #ifdef SUPPORT_UCP
   6228   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
   6229   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
   6230 #endif  /* SUPPORT_UCP */
   6231 #endif  /* SUPPORT_UTF */
   6232   default:
   6233   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   6234   return PCRE_ERROR_INTERNAL;
   6235   }
   6236 #undef LBL
   6237 #endif  /* NO_RECURSE */
   6238 }
   6239 
   6240 
   6241 /***************************************************************************
   6242 ****************************************************************************
   6243                    RECURSION IN THE match() FUNCTION
   6244 
   6245 Undefine all the macros that were defined above to handle this. */
   6246 
   6247 #ifdef NO_RECURSE
   6248 #undef eptr
   6249 #undef ecode
   6250 #undef mstart
   6251 #undef offset_top
   6252 #undef eptrb
   6253 #undef flags
   6254 
   6255 #undef callpat
   6256 #undef charptr
   6257 #undef data
   6258 #undef next
   6259 #undef pp
   6260 #undef prev
   6261 #undef saved_eptr
   6262 
   6263 #undef new_recursive
   6264 
   6265 #undef cur_is_word
   6266 #undef condition
   6267 #undef prev_is_word
   6268 
   6269 #undef ctype
   6270 #undef length
   6271 #undef max
   6272 #undef min
   6273 #undef number
   6274 #undef offset
   6275 #undef op
   6276 #undef save_capture_last
   6277 #undef save_offset1
   6278 #undef save_offset2
   6279 #undef save_offset3
   6280 #undef stacksave
   6281 
   6282 #undef newptrb
   6283 
   6284 #endif
   6285 
   6286 /* These two are defined as macros in both cases */
   6287 
   6288 #undef fc
   6289 #undef fi
   6290 
   6291 /***************************************************************************
   6292 ***************************************************************************/
   6293 
   6294 
   6295 #ifdef NO_RECURSE
   6296 /*************************************************
   6297 *          Release allocated heap frames         *
   6298 *************************************************/
   6299 
   6300 /* This function releases all the allocated frames. The base frame is on the
   6301 machine stack, and so must not be freed.
   6302 
   6303 Argument: the address of the base frame
   6304 Returns:  nothing
   6305 */
   6306 
   6307 static void
   6308 release_match_heapframes (heapframe *frame_base)
   6309 {
   6310 heapframe *nextframe = frame_base->Xnextframe;
   6311 while (nextframe != NULL)
   6312   {
   6313   heapframe *oldframe = nextframe;
   6314   nextframe = nextframe->Xnextframe;
   6315   (PUBL(stack_free))(oldframe);
   6316   }
   6317 }
   6318 #endif
   6319 
   6320 
   6321 /*************************************************
   6322 *         Execute a Regular Expression           *
   6323 *************************************************/
   6324 
   6325 /* This function applies a compiled re to a subject string and picks out
   6326 portions of the string if it matches. Two elements in the vector are set for
   6327 each substring: the offsets to the start and end of the substring.
   6328 
   6329 Arguments:
   6330   argument_re     points to the compiled expression
   6331   extra_data      points to extra data or is NULL
   6332   subject         points to the subject string
   6333   length          length of subject string (may contain binary zeros)
   6334   start_offset    where to start in the subject string
   6335   options         option bits
   6336   offsets         points to a vector of ints to be filled in with offsets
   6337   offsetcount     the number of elements in the vector
   6338 
   6339 Returns:          > 0 => success; value is the number of elements filled in
   6340                   = 0 => success, but offsets is not big enough
   6341                    -1 => failed to match
   6342                  < -1 => some kind of unexpected problem
   6343 */
   6344 
   6345 #if defined COMPILE_PCRE8
   6346 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   6347 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
   6348   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
   6349   int offsetcount)
   6350 #elif defined COMPILE_PCRE16
   6351 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   6352 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
   6353   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
   6354   int offsetcount)
   6355 #elif defined COMPILE_PCRE32
   6356 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   6357 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
   6358   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
   6359   int offsetcount)
   6360 #endif
   6361 {
   6362 int rc, ocount, arg_offset_max;
   6363 int newline;
   6364 BOOL using_temporary_offsets = FALSE;
   6365 BOOL anchored;
   6366 BOOL startline;
   6367 BOOL firstline;
   6368 BOOL utf;
   6369 BOOL has_first_char = FALSE;
   6370 BOOL has_req_char = FALSE;
   6371 pcre_uchar first_char = 0;
   6372 pcre_uchar first_char2 = 0;
   6373 pcre_uchar req_char = 0;
   6374 pcre_uchar req_char2 = 0;
   6375 match_data match_block;
   6376 match_data *md = &match_block;
   6377 const pcre_uint8 *tables;
   6378 const pcre_uint8 *start_bits = NULL;
   6379 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
   6380 PCRE_PUCHAR end_subject;
   6381 PCRE_PUCHAR start_partial = NULL;
   6382 PCRE_PUCHAR match_partial = NULL;
   6383 PCRE_PUCHAR req_char_ptr = start_match - 1;
   6384 
   6385 const pcre_study_data *study;
   6386 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
   6387 
   6388 #ifdef NO_RECURSE
   6389 heapframe frame_zero;
   6390 frame_zero.Xprevframe = NULL;            /* Marks the top level */
   6391 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
   6392 md->match_frames_base = &frame_zero;
   6393 #endif
   6394 
   6395 /* Check for the special magic call that measures the size of the stack used
   6396 per recursive call of match(). Without the funny casting for sizeof, a Windows
   6397 compiler gave this error: "unary minus operator applied to unsigned type,
   6398 result still unsigned". Hopefully the cast fixes that. */
   6399 
   6400 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
   6401     start_offset == -999)
   6402 #ifdef NO_RECURSE
   6403   return -((int)sizeof(heapframe));
   6404 #else
   6405   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
   6406 #endif
   6407 
   6408 /* Plausibility checks */
   6409 
   6410 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
   6411 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
   6412   return PCRE_ERROR_NULL;
   6413 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
   6414 if (length < 0) return PCRE_ERROR_BADLENGTH;
   6415 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
   6416 
   6417 /* Check that the first field in the block is the magic number. If it is not,
   6418 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
   6419 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
   6420 means that the pattern is likely compiled with different endianness. */
   6421 
   6422 if (re->magic_number != MAGIC_NUMBER)
   6423   return re->magic_number == REVERSED_MAGIC_NUMBER?
   6424     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
   6425 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   6426 
   6427 /* These two settings are used in the code for checking a UTF-8 string that
   6428 follows immediately afterwards. Other values in the md block are used only
   6429 during "normal" pcre_exec() processing, not when the JIT support is in use,
   6430 so they are set up later. */
   6431 
   6432 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
   6433 utf = md->utf = (re->options & PCRE_UTF8) != 0;
   6434 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
   6435               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
   6436 
   6437 /* Check a UTF-8 string if required. Pass back the character offset and error
   6438 code for an invalid string if a results vector is available. */
   6439 
   6440 #ifdef SUPPORT_UTF
   6441 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
   6442   {
   6443   int erroroffset;
   6444   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
   6445   if (errorcode != 0)
   6446     {
   6447     if (offsetcount >= 2)
   6448       {
   6449       offsets[0] = erroroffset;
   6450       offsets[1] = errorcode;
   6451       }
   6452 #if defined COMPILE_PCRE8
   6453     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
   6454       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
   6455 #elif defined COMPILE_PCRE16
   6456     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
   6457       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
   6458 #elif defined COMPILE_PCRE32
   6459     return PCRE_ERROR_BADUTF32;
   6460 #endif
   6461     }
   6462 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
   6463   /* Check that a start_offset points to the start of a UTF character. */
   6464   if (start_offset > 0 && start_offset < length &&
   6465       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
   6466     return PCRE_ERROR_BADUTF8_OFFSET;
   6467 #endif
   6468   }
   6469 #endif
   6470 
   6471 /* If the pattern was successfully studied with JIT support, run the JIT
   6472 executable instead of the rest of this function. Most options must be set at
   6473 compile time for the JIT code to be usable. Fallback to the normal code path if
   6474 an unsupported flag is set. */
   6475 
   6476 #ifdef SUPPORT_JIT
   6477 if (extra_data != NULL
   6478     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
   6479                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
   6480     && extra_data->executable_jit != NULL
   6481     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
   6482   {
   6483   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
   6484        start_offset, options, offsets, offsetcount);
   6485 
   6486   /* PCRE_ERROR_NULL means that the selected normal or partial matching
   6487   mode is not compiled. In this case we simply fallback to interpreter. */
   6488 
   6489   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
   6490   }
   6491 #endif
   6492 
   6493 /* Carry on with non-JIT matching. This information is for finding all the
   6494 numbers associated with a given name, for condition testing. */
   6495 
   6496 md->name_table = (pcre_uchar *)re + re->name_table_offset;
   6497 md->name_count = re->name_count;
   6498 md->name_entry_size = re->name_entry_size;
   6499 
   6500 /* Fish out the optional data from the extra_data structure, first setting
   6501 the default values. */
   6502 
   6503 study = NULL;
   6504 md->match_limit = MATCH_LIMIT;
   6505 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
   6506 md->callout_data = NULL;
   6507 
   6508 /* The table pointer is always in native byte order. */
   6509 
   6510 tables = re->tables;
   6511 
   6512 /* The two limit values override the defaults, whatever their value. */
   6513 
   6514 if (extra_data != NULL)
   6515   {
   6516   unsigned long int flags = extra_data->flags;
   6517   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
   6518     study = (const pcre_study_data *)extra_data->study_data;
   6519   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
   6520     md->match_limit = extra_data->match_limit;
   6521   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
   6522     md->match_limit_recursion = extra_data->match_limit_recursion;
   6523   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
   6524     md->callout_data = extra_data->callout_data;
   6525   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
   6526   }
   6527 
   6528 /* Limits in the regex override only if they are smaller. */
   6529 
   6530 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
   6531   md->match_limit = re->limit_match;
   6532 
   6533 if ((re->flags & PCRE_RLSET) != 0 &&
   6534     re->limit_recursion < md->match_limit_recursion)
   6535   md->match_limit_recursion = re->limit_recursion;
   6536 
   6537 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
   6538 is a feature that makes it possible to save compiled regex and re-use them
   6539 in other programs later. */
   6540 
   6541 if (tables == NULL) tables = PRIV(default_tables);
   6542 
   6543 /* Set up other data */
   6544 
   6545 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
   6546 startline = (re->flags & PCRE_STARTLINE) != 0;
   6547 firstline = (re->options & PCRE_FIRSTLINE) != 0;
   6548 
   6549 /* The code starts after the real_pcre block and the capture name table. */
   6550 
   6551 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
   6552   re->name_count * re->name_entry_size;
   6553 
   6554 md->start_subject = (PCRE_PUCHAR)subject;
   6555 md->start_offset = start_offset;
   6556 md->end_subject = md->start_subject + length;
   6557 end_subject = md->end_subject;
   6558 
   6559 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
   6560 md->use_ucp = (re->options & PCRE_UCP) != 0;
   6561 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
   6562 md->ignore_skip_arg = 0;
   6563 
   6564 /* Some options are unpacked into BOOL variables in the hope that testing
   6565 them will be faster than individual option bits. */
   6566 
   6567 md->notbol = (options & PCRE_NOTBOL) != 0;
   6568 md->noteol = (options & PCRE_NOTEOL) != 0;
   6569 md->notempty = (options & PCRE_NOTEMPTY) != 0;
   6570 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
   6571 
   6572 md->hitend = FALSE;
   6573 md->mark = md->nomatch_mark = NULL;     /* In case never set */
   6574 
   6575 md->recursive = NULL;                   /* No recursion at top level */
   6576 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
   6577 
   6578 md->lcc = tables + lcc_offset;
   6579 md->fcc = tables + fcc_offset;
   6580 md->ctypes = tables + ctypes_offset;
   6581 
   6582 /* Handle different \R options. */
   6583 
   6584 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
   6585   {
   6586   case 0:
   6587   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
   6588     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
   6589   else
   6590 #ifdef BSR_ANYCRLF
   6591   md->bsr_anycrlf = TRUE;
   6592 #else
   6593   md->bsr_anycrlf = FALSE;
   6594 #endif
   6595   break;
   6596 
   6597   case PCRE_BSR_ANYCRLF:
   6598   md->bsr_anycrlf = TRUE;
   6599   break;
   6600 
   6601   case PCRE_BSR_UNICODE:
   6602   md->bsr_anycrlf = FALSE;
   6603   break;
   6604 
   6605   default: return PCRE_ERROR_BADNEWLINE;
   6606   }
   6607 
   6608 /* Handle different types of newline. The three bits give eight cases. If
   6609 nothing is set at run time, whatever was used at compile time applies. */
   6610 
   6611 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
   6612         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
   6613   {
   6614   case 0: newline = NEWLINE; break;   /* Compile-time default */
   6615   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
   6616   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
   6617   case PCRE_NEWLINE_CR+
   6618        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
   6619   case PCRE_NEWLINE_ANY: newline = -1; break;
   6620   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
   6621   default: return PCRE_ERROR_BADNEWLINE;
   6622   }
   6623 
   6624 if (newline == -2)
   6625   {
   6626   md->nltype = NLTYPE_ANYCRLF;
   6627   }
   6628 else if (newline < 0)
   6629   {
   6630   md->nltype = NLTYPE_ANY;
   6631   }
   6632 else
   6633   {
   6634   md->nltype = NLTYPE_FIXED;
   6635   if (newline > 255)
   6636     {
   6637     md->nllen = 2;
   6638     md->nl[0] = (newline >> 8) & 255;
   6639     md->nl[1] = newline & 255;
   6640     }
   6641   else
   6642     {
   6643     md->nllen = 1;
   6644     md->nl[0] = newline;
   6645     }
   6646   }
   6647 
   6648 /* Partial matching was originally supported only for a restricted set of
   6649 regexes; from release 8.00 there are no restrictions, but the bits are still
   6650 defined (though never set). So there's no harm in leaving this code. */
   6651 
   6652 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
   6653   return PCRE_ERROR_BADPARTIAL;
   6654 
   6655 /* If the expression has got more back references than the offsets supplied can
   6656 hold, we get a temporary chunk of working store to use during the matching.
   6657 Otherwise, we can use the vector supplied, rounding down its size to a multiple
   6658 of 3. */
   6659 
   6660 ocount = offsetcount - (offsetcount % 3);
   6661 arg_offset_max = (2*ocount)/3;
   6662 
   6663 if (re->top_backref > 0 && re->top_backref >= ocount/3)
   6664   {
   6665   ocount = re->top_backref * 3 + 3;
   6666   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
   6667   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
   6668   using_temporary_offsets = TRUE;
   6669   DPRINTF(("Got memory to hold back references\n"));
   6670   }
   6671 else md->offset_vector = offsets;
   6672 md->offset_end = ocount;
   6673 md->offset_max = (2*ocount)/3;
   6674 md->capture_last = 0;
   6675 
   6676 /* Reset the working variable associated with each extraction. These should
   6677 never be used unless previously set, but they get saved and restored, and so we
   6678 initialize them to avoid reading uninitialized locations. Also, unset the
   6679 offsets for the matched string. This is really just for tidiness with callouts,
   6680 in case they inspect these fields. */
   6681 
   6682 if (md->offset_vector != NULL)
   6683   {
   6684   register int *iptr = md->offset_vector + ocount;
   6685   register int *iend = iptr - re->top_bracket;
   6686   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
   6687   while (--iptr >= iend) *iptr = -1;
   6688   if (offsetcount > 0) md->offset_vector[0] = -1;
   6689   if (offsetcount > 1) md->offset_vector[1] = -1;
   6690   }
   6691 
   6692 /* Set up the first character to match, if available. The first_char value is
   6693 never set for an anchored regular expression, but the anchoring may be forced
   6694 at run time, so we have to test for anchoring. The first char may be unset for
   6695 an unanchored pattern, of course. If there's no first char and the pattern was
   6696 studied, there may be a bitmap of possible first characters. */
   6697 
   6698 if (!anchored)
   6699   {
   6700   if ((re->flags & PCRE_FIRSTSET) != 0)
   6701     {
   6702     has_first_char = TRUE;
   6703     first_char = first_char2 = (pcre_uchar)(re->first_char);
   6704     if ((re->flags & PCRE_FCH_CASELESS) != 0)
   6705       {
   6706       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
   6707 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
   6708       if (utf && first_char > 127)
   6709         first_char2 = UCD_OTHERCASE(first_char);
   6710 #endif
   6711       }
   6712     }
   6713   else
   6714     if (!startline && study != NULL &&
   6715       (study->flags & PCRE_STUDY_MAPPED) != 0)
   6716         start_bits = study->start_bits;
   6717   }
   6718 
   6719 /* For anchored or unanchored matches, there may be a "last known required
   6720 character" set. */
   6721 
   6722 if ((re->flags & PCRE_REQCHSET) != 0)
   6723   {
   6724   has_req_char = TRUE;
   6725   req_char = req_char2 = (pcre_uchar)(re->req_char);
   6726   if ((re->flags & PCRE_RCH_CASELESS) != 0)
   6727     {
   6728     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
   6729 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
   6730     if (utf && req_char > 127)
   6731       req_char2 = UCD_OTHERCASE(req_char);
   6732 #endif
   6733     }
   6734   }
   6735 
   6736 
   6737 /* ==========================================================================*/
   6738 
   6739 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
   6740 the loop runs just once. */
   6741 
   6742 for(;;)
   6743   {
   6744   PCRE_PUCHAR save_end_subject = end_subject;
   6745   PCRE_PUCHAR new_start_match;
   6746 
   6747   /* If firstline is TRUE, the start of the match is constrained to the first
   6748   line of a multiline string. That is, the match must be before or at the first
   6749   newline. Implement this by temporarily adjusting end_subject so that we stop
   6750   scanning at a newline. If the match fails at the newline, later code breaks
   6751   this loop. */
   6752 
   6753   if (firstline)
   6754     {
   6755     PCRE_PUCHAR t = start_match;
   6756 #ifdef SUPPORT_UTF
   6757     if (utf)
   6758       {
   6759       while (t < md->end_subject && !IS_NEWLINE(t))
   6760         {
   6761         t++;
   6762         ACROSSCHAR(t < end_subject, *t, t++);
   6763         }
   6764       }
   6765     else
   6766 #endif
   6767     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
   6768     end_subject = t;
   6769     }
   6770 
   6771   /* There are some optimizations that avoid running the match if a known
   6772   starting point is not found, or if a known later character is not present.
   6773   However, there is an option that disables these, for testing and for ensuring
   6774   that all callouts do actually occur. The option can be set in the regex by
   6775   (*NO_START_OPT) or passed in match-time options. */
   6776 
   6777   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
   6778     {
   6779     /* Advance to a unique first char if there is one. */
   6780 
   6781     if (has_first_char)
   6782       {
   6783       pcre_uchar smc;
   6784 
   6785       if (first_char != first_char2)
   6786         while (start_match < end_subject &&
   6787           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
   6788           start_match++;
   6789       else
   6790         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
   6791           start_match++;
   6792       }
   6793 
   6794     /* Or to just after a linebreak for a multiline match */
   6795 
   6796     else if (startline)
   6797       {
   6798       if (start_match > md->start_subject + start_offset)
   6799         {
   6800 #ifdef SUPPORT_UTF
   6801         if (utf)
   6802           {
   6803           while (start_match < end_subject && !WAS_NEWLINE(start_match))
   6804             {
   6805             start_match++;
   6806             ACROSSCHAR(start_match < end_subject, *start_match,
   6807               start_match++);
   6808             }
   6809           }
   6810         else
   6811 #endif
   6812         while (start_match < end_subject && !WAS_NEWLINE(start_match))
   6813           start_match++;
   6814 
   6815         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
   6816         and we are now at a LF, advance the match position by one more character.
   6817         */
   6818 
   6819         if (start_match[-1] == CHAR_CR &&
   6820              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
   6821              start_match < end_subject &&
   6822              UCHAR21TEST(start_match) == CHAR_NL)
   6823           start_match++;
   6824         }
   6825       }
   6826 
   6827     /* Or to a non-unique first byte after study */
   6828 
   6829     else if (start_bits != NULL)
   6830       {
   6831       while (start_match < end_subject)
   6832         {
   6833         register pcre_uint32 c = UCHAR21TEST(start_match);
   6834 #ifndef COMPILE_PCRE8
   6835         if (c > 255) c = 255;
   6836 #endif
   6837         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
   6838         start_match++;
   6839         }
   6840       }
   6841     }   /* Starting optimizations */
   6842 
   6843   /* Restore fudged end_subject */
   6844 
   6845   end_subject = save_end_subject;
   6846 
   6847   /* The following two optimizations are disabled for partial matching or if
   6848   disabling is explicitly requested. */
   6849 
   6850   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
   6851     {
   6852     /* If the pattern was studied, a minimum subject length may be set. This is
   6853     a lower bound; no actual string of that length may actually match the
   6854     pattern. Although the value is, strictly, in characters, we treat it as
   6855     bytes to avoid spending too much time in this optimization. */
   6856 
   6857     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
   6858         (pcre_uint32)(end_subject - start_match) < study->minlength)
   6859       {
   6860       rc = MATCH_NOMATCH;
   6861       break;
   6862       }
   6863 
   6864     /* If req_char is set, we know that that character must appear in the
   6865     subject for the match to succeed. If the first character is set, req_char
   6866     must be later in the subject; otherwise the test starts at the match point.
   6867     This optimization can save a huge amount of backtracking in patterns with
   6868     nested unlimited repeats that aren't going to match. Writing separate code
   6869     for cased/caseless versions makes it go faster, as does using an
   6870     autoincrement and backing off on a match.
   6871 
   6872     HOWEVER: when the subject string is very, very long, searching to its end
   6873     can take a long time, and give bad performance on quite ordinary patterns.
   6874     This showed up when somebody was matching something like /^\d+C/ on a
   6875     32-megabyte string... so we don't do this when the string is sufficiently
   6876     long. */
   6877 
   6878     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
   6879       {
   6880       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
   6881 
   6882       /* We don't need to repeat the search if we haven't yet reached the
   6883       place we found it at last time. */
   6884 
   6885       if (p > req_char_ptr)
   6886         {
   6887         if (req_char != req_char2)
   6888           {
   6889           while (p < end_subject)
   6890             {
   6891             register pcre_uint32 pp = UCHAR21INCTEST(p);
   6892             if (pp == req_char || pp == req_char2) { p--; break; }
   6893             }
   6894           }
   6895         else
   6896           {
   6897           while (p < end_subject)
   6898             {
   6899             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
   6900             }
   6901           }
   6902 
   6903         /* If we can't find the required character, break the matching loop,
   6904         forcing a match failure. */
   6905 
   6906         if (p >= end_subject)
   6907           {
   6908           rc = MATCH_NOMATCH;
   6909           break;
   6910           }
   6911 
   6912         /* If we have found the required character, save the point where we
   6913         found it, so that we don't search again next time round the loop if
   6914         the start hasn't passed this character yet. */
   6915 
   6916         req_char_ptr = p;
   6917         }
   6918       }
   6919     }
   6920 
   6921 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
   6922   printf(">>>> Match against: ");
   6923   pchars(start_match, end_subject - start_match, TRUE, md);
   6924   printf("\n");
   6925 #endif
   6926 
   6927   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
   6928   first starting point for which a partial match was found. */
   6929 
   6930   md->start_match_ptr = start_match;
   6931   md->start_used_ptr = start_match;
   6932   md->match_call_count = 0;
   6933   md->match_function_type = 0;
   6934   md->end_offset_top = 0;
   6935   md->skip_arg_count = 0;
   6936   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
   6937   if (md->hitend && start_partial == NULL)
   6938     {
   6939     start_partial = md->start_used_ptr;
   6940     match_partial = start_match;
   6941     }
   6942 
   6943   switch(rc)
   6944     {
   6945     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
   6946     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
   6947     entirely. The only way we can do that is to re-do the match at the same
   6948     point, with a flag to force SKIP with an argument to be ignored. Just
   6949     treating this case as NOMATCH does not work because it does not check other
   6950     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
   6951 
   6952     case MATCH_SKIP_ARG:
   6953     new_start_match = start_match;
   6954     md->ignore_skip_arg = md->skip_arg_count;
   6955     break;
   6956 
   6957     /* SKIP passes back the next starting point explicitly, but if it is no
   6958     greater than the match we have just done, treat it as NOMATCH. */
   6959 
   6960     case MATCH_SKIP:
   6961     if (md->start_match_ptr > start_match)
   6962       {
   6963       new_start_match = md->start_match_ptr;
   6964       break;
   6965       }
   6966     /* Fall through */
   6967 
   6968     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
   6969     exactly like PRUNE. Unset ignore SKIP-with-argument. */
   6970 
   6971     case MATCH_NOMATCH:
   6972     case MATCH_PRUNE:
   6973     case MATCH_THEN:
   6974     md->ignore_skip_arg = 0;
   6975     new_start_match = start_match + 1;
   6976 #ifdef SUPPORT_UTF
   6977     if (utf)
   6978       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
   6979         new_start_match++);
   6980 #endif
   6981     break;
   6982 
   6983     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
   6984 
   6985     case MATCH_COMMIT:
   6986     rc = MATCH_NOMATCH;
   6987     goto ENDLOOP;
   6988 
   6989     /* Any other return is either a match, or some kind of error. */
   6990 
   6991     default:
   6992     goto ENDLOOP;
   6993     }
   6994 
   6995   /* Control reaches here for the various types of "no match at this point"
   6996   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
   6997 
   6998   rc = MATCH_NOMATCH;
   6999 
   7000   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
   7001   newline in the subject (though it may continue over the newline). Therefore,
   7002   if we have just failed to match, starting at a newline, do not continue. */
   7003 
   7004   if (firstline && IS_NEWLINE(start_match)) break;
   7005 
   7006   /* Advance to new matching position */
   7007 
   7008   start_match = new_start_match;
   7009 
   7010   /* Break the loop if the pattern is anchored or if we have passed the end of
   7011   the subject. */
   7012 
   7013   if (anchored || start_match > end_subject) break;
   7014 
   7015   /* If we have just passed a CR and we are now at a LF, and the pattern does
   7016   not contain any explicit matches for \r or \n, and the newline option is CRLF
   7017   or ANY or ANYCRLF, advance the match position by one more character. In
   7018   normal matching start_match will aways be greater than the first position at
   7019   this stage, but a failed *SKIP can cause a return at the same point, which is
   7020   why the first test exists. */
   7021 
   7022   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
   7023       start_match[-1] == CHAR_CR &&
   7024       start_match < end_subject &&
   7025       *start_match == CHAR_NL &&
   7026       (re->flags & PCRE_HASCRORLF) == 0 &&
   7027         (md->nltype == NLTYPE_ANY ||
   7028          md->nltype == NLTYPE_ANYCRLF ||
   7029          md->nllen == 2))
   7030     start_match++;
   7031 
   7032   md->mark = NULL;   /* Reset for start of next match attempt */
   7033   }                  /* End of for(;;) "bumpalong" loop */
   7034 
   7035 /* ==========================================================================*/
   7036 
   7037 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
   7038 conditions is true:
   7039 
   7040 (1) The pattern is anchored or the match was failed by (*COMMIT);
   7041 
   7042 (2) We are past the end of the subject;
   7043 
   7044 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
   7045     this option requests that a match occur at or before the first newline in
   7046     the subject.
   7047 
   7048 When we have a match and the offset vector is big enough to deal with any
   7049 backreferences, captured substring offsets will already be set up. In the case
   7050 where we had to get some local store to hold offsets for backreference
   7051 processing, copy those that we can. In this case there need not be overflow if
   7052 certain parts of the pattern were not used, even though there are more
   7053 capturing parentheses than vector slots. */
   7054 
   7055 ENDLOOP:
   7056 
   7057 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   7058   {
   7059   if (using_temporary_offsets)
   7060     {
   7061     if (arg_offset_max >= 4)
   7062       {
   7063       memcpy(offsets + 2, md->offset_vector + 2,
   7064         (arg_offset_max - 2) * sizeof(int));
   7065       DPRINTF(("Copied offsets from temporary memory\n"));
   7066       }
   7067     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
   7068     DPRINTF(("Freeing temporary memory\n"));
   7069     (PUBL(free))(md->offset_vector);
   7070     }
   7071 
   7072   /* Set the return code to the number of captured strings, or 0 if there were
   7073   too many to fit into the vector. */
   7074 
   7075   rc = ((md->capture_last & OVFLBIT) != 0 &&
   7076          md->end_offset_top >= arg_offset_max)?
   7077     0 : md->end_offset_top/2;
   7078 
   7079   /* If there is space in the offset vector, set any unused pairs at the end of
   7080   the pattern to -1 for backwards compatibility. It is documented that this
   7081   happens. In earlier versions, the whole set of potential capturing offsets
   7082   was set to -1 each time round the loop, but this is handled differently now.
   7083   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
   7084   those at the end that need unsetting here. We can't just unset them all at
   7085   the start of the whole thing because they may get set in one branch that is
   7086   not the final matching branch. */
   7087 
   7088   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
   7089     {
   7090     register int *iptr, *iend;
   7091     int resetcount = 2 + re->top_bracket * 2;
   7092     if (resetcount > offsetcount) resetcount = offsetcount;
   7093     iptr = offsets + md->end_offset_top;
   7094     iend = offsets + resetcount;
   7095     while (iptr < iend) *iptr++ = -1;
   7096     }
   7097 
   7098   /* If there is space, set up the whole thing as substring 0. The value of
   7099   md->start_match_ptr might be modified if \K was encountered on the success
   7100   matching path. */
   7101 
   7102   if (offsetcount < 2) rc = 0; else
   7103     {
   7104     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
   7105     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
   7106     }
   7107 
   7108   /* Return MARK data if requested */
   7109 
   7110   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
   7111     *(extra_data->mark) = (pcre_uchar *)md->mark;
   7112   DPRINTF((">>>> returning %d\n", rc));
   7113 #ifdef NO_RECURSE
   7114   release_match_heapframes(&frame_zero);
   7115 #endif
   7116   return rc;
   7117   }
   7118 
   7119 /* Control gets here if there has been an error, or if the overall match
   7120 attempt has failed at all permitted starting positions. */
   7121 
   7122 if (using_temporary_offsets)
   7123   {
   7124   DPRINTF(("Freeing temporary memory\n"));
   7125   (PUBL(free))(md->offset_vector);
   7126   }
   7127 
   7128 /* For anything other than nomatch or partial match, just return the code. */
   7129 
   7130 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
   7131   {
   7132   DPRINTF((">>>> error: returning %d\n", rc));
   7133 #ifdef NO_RECURSE
   7134   release_match_heapframes(&frame_zero);
   7135 #endif
   7136   return rc;
   7137   }
   7138 
   7139 /* Handle partial matches - disable any mark data */
   7140 
   7141 if (match_partial != NULL)
   7142   {
   7143   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
   7144   md->mark = NULL;
   7145   if (offsetcount > 1)
   7146     {
   7147     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
   7148     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
   7149     if (offsetcount > 2)
   7150       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
   7151     }
   7152   rc = PCRE_ERROR_PARTIAL;
   7153   }
   7154 
   7155 /* This is the classic nomatch case */
   7156 
   7157 else
   7158   {
   7159   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
   7160   rc = PCRE_ERROR_NOMATCH;
   7161   }
   7162 
   7163 /* Return the MARK data if it has been requested. */
   7164 
   7165 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
   7166   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
   7167 #ifdef NO_RECURSE
   7168   release_match_heapframes(&frame_zero);
   7169 #endif
   7170 return rc;
   7171 }
   7172 
   7173 /* End of pcre_exec.c */
   7174