Home | History | Annotate | Download | only in dist
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9            Copyright (c) 1997-2014 University of Cambridge
     10 
     11 -----------------------------------------------------------------------------
     12 Redistribution and use in source and binary forms, with or without
     13 modification, are permitted provided that the following conditions are met:
     14 
     15     * Redistributions of source code must retain the above copyright notice,
     16       this list of conditions and the following disclaimer.
     17 
     18     * Redistributions in binary form must reproduce the above copyright
     19       notice, this list of conditions and the following disclaimer in the
     20       documentation and/or other materials provided with the distribution.
     21 
     22     * Neither the name of the University of Cambridge nor the names of its
     23       contributors may be used to endorse or promote products derived from
     24       this software without specific prior written permission.
     25 
     26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36 POSSIBILITY OF SUCH DAMAGE.
     37 -----------------------------------------------------------------------------
     38 */
     39 
     40 /* This module contains pcre_exec(), the externally visible function that does
     41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
     42 possible. There are also some static supporting functions. */
     43 
     44 #ifdef HAVE_CONFIG_H
     45 #include "config.h"
     46 #endif
     47 
     48 #define NLBLOCK md             /* Block containing newline information */
     49 #define PSSTART start_subject  /* Field containing processed string start */
     50 #define PSEND   end_subject    /* Field containing processed string end */
     51 
     52 #include "pcre_internal.h"
     53 
     54 /* Undefine some potentially clashing cpp symbols */
     55 
     56 #undef min
     57 #undef max
     58 
     59 /* The md->capture_last field uses the lower 16 bits for the last captured
     60 substring (which can never be greater than 65535) and a bit in the top half
     61 to mean "capture vector overflowed". This odd way of doing things was
     62 implemented when it was realized that preserving and restoring the overflow bit
     63 whenever the last capture number was saved/restored made for a neater
     64 interface, and doing it this way saved on (a) another variable, which would
     65 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
     66 separate set of save/restore instructions. The following defines are used in
     67 implementing this. */
     68 
     69 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
     70 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
     71 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
     72 
     73 /* Values for setting in md->match_function_type to indicate two special types
     74 of call to match(). We do it this way to save on using another stack variable,
     75 as stack usage is to be discouraged. */
     76 
     77 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
     78 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
     79 
     80 /* Non-error returns from the match() function. Error returns are externally
     81 defined PCRE_ERROR_xxx codes, which are all negative. */
     82 
     83 #define MATCH_MATCH        1
     84 #define MATCH_NOMATCH      0
     85 
     86 /* Special internal returns from the match() function. Make them sufficiently
     87 negative to avoid the external error codes. */
     88 
     89 #define MATCH_ACCEPT       (-999)
     90 #define MATCH_KETRPOS      (-998)
     91 #define MATCH_ONCE         (-997)
     92 /* The next 5 must be kept together and in sequence so that a test that checks
     93 for any one of them can use a range. */
     94 #define MATCH_COMMIT       (-996)
     95 #define MATCH_PRUNE        (-995)
     96 #define MATCH_SKIP         (-994)
     97 #define MATCH_SKIP_ARG     (-993)
     98 #define MATCH_THEN         (-992)
     99 #define MATCH_BACKTRACK_MAX MATCH_THEN
    100 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
    101 
    102 /* Maximum number of ints of offset to save on the stack for recursive calls.
    103 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
    104 because the offset vector is always a multiple of 3 long. */
    105 
    106 #define REC_STACK_SAVE_MAX 30
    107 
    108 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
    109 
    110 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
    111 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
    112 
    113 #ifdef PCRE_DEBUG
    114 /*************************************************
    115 *        Debugging function to print chars       *
    116 *************************************************/
    117 
    118 /* Print a sequence of chars in printable format, stopping at the end of the
    119 subject if the requested.
    120 
    121 Arguments:
    122   p           points to characters
    123   length      number to print
    124   is_subject  TRUE if printing from within md->start_subject
    125   md          pointer to matching data block, if is_subject is TRUE
    126 
    127 Returns:     nothing
    128 */
    129 
    130 static void
    131 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
    132 {
    133 pcre_uint32 c;
    134 BOOL utf = md->utf;
    135 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
    136 while (length-- > 0)
    137   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
    138 }
    139 #endif
    140 
    141 
    142 
    143 /*************************************************
    144 *          Match a back-reference                *
    145 *************************************************/
    146 
    147 /* Normally, if a back reference hasn't been set, the length that is passed is
    148 negative, so the match always fails. However, in JavaScript compatibility mode,
    149 the length passed is zero. Note that in caseless UTF-8 mode, the number of
    150 subject bytes matched may be different to the number of reference bytes.
    151 
    152 Arguments:
    153   offset      index into the offset vector
    154   eptr        pointer into the subject
    155   length      length of reference to be matched (number of bytes)
    156   md          points to match data block
    157   caseless    TRUE if caseless
    158 
    159 Returns:      >= 0 the number of subject bytes matched
    160               -1 no match
    161               -2 partial match; always given if at end subject
    162 */
    163 
    164 static int
    165 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
    166   BOOL caseless)
    167 {
    168 PCRE_PUCHAR eptr_start = eptr;
    169 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
    170 #if defined SUPPORT_UTF && defined SUPPORT_UCP
    171 BOOL utf = md->utf;
    172 #endif
    173 
    174 #ifdef PCRE_DEBUG
    175 if (eptr >= md->end_subject)
    176   printf("matching subject <null>");
    177 else
    178   {
    179   printf("matching subject ");
    180   pchars(eptr, length, TRUE, md);
    181   }
    182 printf(" against backref ");
    183 pchars(p, length, FALSE, md);
    184 printf("\n");
    185 #endif
    186 
    187 /* Always fail if reference not set (and not JavaScript compatible - in that
    188 case the length is passed as zero). */
    189 
    190 if (length < 0) return -1;
    191 
    192 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
    193 properly if Unicode properties are supported. Otherwise, we can check only
    194 ASCII characters. */
    195 
    196 if (caseless)
    197   {
    198 #if defined SUPPORT_UTF && defined SUPPORT_UCP
    199   if (utf)
    200     {
    201     /* Match characters up to the end of the reference. NOTE: the number of
    202     data units matched may differ, because in UTF-8 there are some characters
    203     whose upper and lower case versions code have different numbers of bytes.
    204     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
    205     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
    206     sequence of two of the latter. It is important, therefore, to check the
    207     length along the reference, not along the subject (earlier code did this
    208     wrong). */
    209 
    210     PCRE_PUCHAR endptr = p + length;
    211     while (p < endptr)
    212       {
    213       pcre_uint32 c, d;
    214       const ucd_record *ur;
    215       if (eptr >= md->end_subject) return -2;   /* Partial match */
    216       GETCHARINC(c, eptr);
    217       GETCHARINC(d, p);
    218       ur = GET_UCD(d);
    219       if (c != d && c != d + ur->other_case)
    220         {
    221         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
    222         for (;;)
    223           {
    224           if (c < *pp) return -1;
    225           if (c == *pp++) break;
    226           }
    227         }
    228       }
    229     }
    230   else
    231 #endif
    232 
    233   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
    234   is no UCP support. */
    235     {
    236     while (length-- > 0)
    237       {
    238       pcre_uint32 cc, cp;
    239       if (eptr >= md->end_subject) return -2;   /* Partial match */
    240       cc = UCHAR21TEST(eptr);
    241       cp = UCHAR21TEST(p);
    242       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
    243       p++;
    244       eptr++;
    245       }
    246     }
    247   }
    248 
    249 /* In the caseful case, we can just compare the bytes, whether or not we
    250 are in UTF-8 mode. */
    251 
    252 else
    253   {
    254   while (length-- > 0)
    255     {
    256     if (eptr >= md->end_subject) return -2;   /* Partial match */
    257     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
    258     }
    259   }
    260 
    261 return (int)(eptr - eptr_start);
    262 }
    263 
    264 
    265 
    266 /***************************************************************************
    267 ****************************************************************************
    268                    RECURSION IN THE match() FUNCTION
    269 
    270 The match() function is highly recursive, though not every recursive call
    271 increases the recursive depth. Nevertheless, some regular expressions can cause
    272 it to recurse to a great depth. I was writing for Unix, so I just let it call
    273 itself recursively. This uses the stack for saving everything that has to be
    274 saved for a recursive call. On Unix, the stack can be large, and this works
    275 fine.
    276 
    277 It turns out that on some non-Unix-like systems there are problems with
    278 programs that use a lot of stack. (This despite the fact that every last chip
    279 has oodles of memory these days, and techniques for extending the stack have
    280 been known for decades.) So....
    281 
    282 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
    283 calls by keeping local variables that need to be preserved in blocks of memory
    284 obtained from malloc() instead instead of on the stack. Macros are used to
    285 achieve this so that the actual code doesn't look very different to what it
    286 always used to.
    287 
    288 The original heap-recursive code used longjmp(). However, it seems that this
    289 can be very slow on some operating systems. Following a suggestion from Stan
    290 Switzer, the use of longjmp() has been abolished, at the cost of having to
    291 provide a unique number for each call to RMATCH. There is no way of generating
    292 a sequence of numbers at compile time in C. I have given them names, to make
    293 them stand out more clearly.
    294 
    295 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
    296 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
    297 tests. Furthermore, not using longjmp() means that local dynamic variables
    298 don't have indeterminate values; this has meant that the frame size can be
    299 reduced because the result can be "passed back" by straight setting of the
    300 variable instead of being passed in the frame.
    301 ****************************************************************************
    302 ***************************************************************************/
    303 
    304 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
    305 below must be updated in sync.  */
    306 
    307 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
    308        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
    309        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
    310        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
    311        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
    312        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
    313        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
    314 
    315 /* These versions of the macros use the stack, as normal. There are debugging
    316 versions and production versions. Note that the "rw" argument of RMATCH isn't
    317 actually used in this definition. */
    318 
    319 #ifndef NO_RECURSE
    320 #define REGISTER register
    321 
    322 #ifdef PCRE_DEBUG
    323 #define RMATCH(ra,rb,rc,rd,re,rw) \
    324   { \
    325   printf("match() called in line %d\n", __LINE__); \
    326   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
    327   printf("to line %d\n", __LINE__); \
    328   }
    329 #define RRETURN(ra) \
    330   { \
    331   printf("match() returned %d from line %d\n", ra, __LINE__); \
    332   return ra; \
    333   }
    334 #else
    335 #define RMATCH(ra,rb,rc,rd,re,rw) \
    336   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
    337 #define RRETURN(ra) return ra
    338 #endif
    339 
    340 #else
    341 
    342 
    343 /* These versions of the macros manage a private stack on the heap. Note that
    344 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
    345 argument of match(), which never changes. */
    346 
    347 #define REGISTER
    348 
    349 #define RMATCH(ra,rb,rc,rd,re,rw)\
    350   {\
    351   heapframe *newframe = frame->Xnextframe;\
    352   if (newframe == NULL)\
    353     {\
    354     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
    355     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
    356     newframe->Xnextframe = NULL;\
    357     frame->Xnextframe = newframe;\
    358     }\
    359   frame->Xwhere = rw;\
    360   newframe->Xeptr = ra;\
    361   newframe->Xecode = rb;\
    362   newframe->Xmstart = mstart;\
    363   newframe->Xoffset_top = rc;\
    364   newframe->Xeptrb = re;\
    365   newframe->Xrdepth = frame->Xrdepth + 1;\
    366   newframe->Xprevframe = frame;\
    367   frame = newframe;\
    368   DPRINTF(("restarting from line %d\n", __LINE__));\
    369   goto HEAP_RECURSE;\
    370   L_##rw:\
    371   DPRINTF(("jumped back to line %d\n", __LINE__));\
    372   }
    373 
    374 #define RRETURN(ra)\
    375   {\
    376   heapframe *oldframe = frame;\
    377   frame = oldframe->Xprevframe;\
    378   if (frame != NULL)\
    379     {\
    380     rrc = ra;\
    381     goto HEAP_RETURN;\
    382     }\
    383   return ra;\
    384   }
    385 
    386 
    387 /* Structure for remembering the local variables in a private frame */
    388 
    389 typedef struct heapframe {
    390   struct heapframe *Xprevframe;
    391   struct heapframe *Xnextframe;
    392 
    393   /* Function arguments that may change */
    394 
    395   PCRE_PUCHAR Xeptr;
    396   const pcre_uchar *Xecode;
    397   PCRE_PUCHAR Xmstart;
    398   int Xoffset_top;
    399   eptrblock *Xeptrb;
    400   unsigned int Xrdepth;
    401 
    402   /* Function local variables */
    403 
    404   PCRE_PUCHAR Xcallpat;
    405 #ifdef SUPPORT_UTF
    406   PCRE_PUCHAR Xcharptr;
    407 #endif
    408   PCRE_PUCHAR Xdata;
    409   PCRE_PUCHAR Xnext;
    410   PCRE_PUCHAR Xpp;
    411   PCRE_PUCHAR Xprev;
    412   PCRE_PUCHAR Xsaved_eptr;
    413 
    414   recursion_info Xnew_recursive;
    415 
    416   BOOL Xcur_is_word;
    417   BOOL Xcondition;
    418   BOOL Xprev_is_word;
    419 
    420 #ifdef SUPPORT_UCP
    421   int Xprop_type;
    422   unsigned int Xprop_value;
    423   int Xprop_fail_result;
    424   int Xoclength;
    425   pcre_uchar Xocchars[6];
    426 #endif
    427 
    428   int Xcodelink;
    429   int Xctype;
    430   unsigned int Xfc;
    431   int Xfi;
    432   int Xlength;
    433   int Xmax;
    434   int Xmin;
    435   unsigned int Xnumber;
    436   int Xoffset;
    437   unsigned int Xop;
    438   pcre_int32 Xsave_capture_last;
    439   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
    440   int Xstacksave[REC_STACK_SAVE_MAX];
    441 
    442   eptrblock Xnewptrb;
    443 
    444   /* Where to jump back to */
    445 
    446   int Xwhere;
    447 
    448 } heapframe;
    449 
    450 #endif
    451 
    452 
    453 /***************************************************************************
    454 ***************************************************************************/
    455 
    456 
    457 
    458 /*************************************************
    459 *         Match from current position            *
    460 *************************************************/
    461 
    462 /* This function is called recursively in many circumstances. Whenever it
    463 returns a negative (error) response, the outer incarnation must also return the
    464 same response. */
    465 
    466 /* These macros pack up tests that are used for partial matching, and which
    467 appear several times in the code. We set the "hit end" flag if the pointer is
    468 at the end of the subject and also past the start of the subject (i.e.
    469 something has been matched). For hard partial matching, we then return
    470 immediately. The second one is used when we already know we are past the end of
    471 the subject. */
    472 
    473 #define CHECK_PARTIAL()\
    474   if (md->partial != 0 && eptr >= md->end_subject && \
    475       eptr > md->start_used_ptr) \
    476     { \
    477     md->hitend = TRUE; \
    478     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
    479     }
    480 
    481 #define SCHECK_PARTIAL()\
    482   if (md->partial != 0 && eptr > md->start_used_ptr) \
    483     { \
    484     md->hitend = TRUE; \
    485     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
    486     }
    487 
    488 
    489 /* Performance note: It might be tempting to extract commonly used fields from
    490 the md structure (e.g. utf, end_subject) into individual variables to improve
    491 performance. Tests using gcc on a SPARC disproved this; in the first case, it
    492 made performance worse.
    493 
    494 Arguments:
    495    eptr        pointer to current character in subject
    496    ecode       pointer to current position in compiled code
    497    mstart      pointer to the current match start position (can be modified
    498                  by encountering \K)
    499    offset_top  current top pointer
    500    md          pointer to "static" info for the match
    501    eptrb       pointer to chain of blocks containing eptr at start of
    502                  brackets - for testing for empty matches
    503    rdepth      the recursion depth
    504 
    505 Returns:       MATCH_MATCH if matched            )  these values are >= 0
    506                MATCH_NOMATCH if failed to match  )
    507                a negative MATCH_xxx value for PRUNE, SKIP, etc
    508                a negative PCRE_ERROR_xxx value if aborted by an error condition
    509                  (e.g. stopped by repeated call or recursion limit)
    510 */
    511 
    512 static int
    513 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
    514   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
    515   unsigned int rdepth)
    516 {
    517 /* These variables do not need to be preserved over recursion in this function,
    518 so they can be ordinary variables in all cases. Mark some of them with
    519 "register" because they are used a lot in loops. */
    520 
    521 register int  rrc;         /* Returns from recursive calls */
    522 register int  i;           /* Used for loops not involving calls to RMATCH() */
    523 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
    524 register BOOL utf;         /* Local copy of UTF flag for speed */
    525 
    526 BOOL minimize, possessive; /* Quantifier options */
    527 BOOL caseless;
    528 int condcode;
    529 
    530 /* When recursion is not being used, all "local" variables that have to be
    531 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
    532 frame on the stack here; subsequent instantiations are obtained from the heap
    533 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
    534 the top-level on the stack rather than malloc-ing them all gives a performance
    535 boost in many cases where there is not much "recursion". */
    536 
    537 #ifdef NO_RECURSE
    538 heapframe *frame = (heapframe *)md->match_frames_base;
    539 
    540 /* Copy in the original argument variables */
    541 
    542 frame->Xeptr = eptr;
    543 frame->Xecode = ecode;
    544 frame->Xmstart = mstart;
    545 frame->Xoffset_top = offset_top;
    546 frame->Xeptrb = eptrb;
    547 frame->Xrdepth = rdepth;
    548 
    549 /* This is where control jumps back to to effect "recursion" */
    550 
    551 HEAP_RECURSE:
    552 
    553 /* Macros make the argument variables come from the current frame */
    554 
    555 #define eptr               frame->Xeptr
    556 #define ecode              frame->Xecode
    557 #define mstart             frame->Xmstart
    558 #define offset_top         frame->Xoffset_top
    559 #define eptrb              frame->Xeptrb
    560 #define rdepth             frame->Xrdepth
    561 
    562 /* Ditto for the local variables */
    563 
    564 #ifdef SUPPORT_UTF
    565 #define charptr            frame->Xcharptr
    566 #endif
    567 #define callpat            frame->Xcallpat
    568 #define codelink           frame->Xcodelink
    569 #define data               frame->Xdata
    570 #define next               frame->Xnext
    571 #define pp                 frame->Xpp
    572 #define prev               frame->Xprev
    573 #define saved_eptr         frame->Xsaved_eptr
    574 
    575 #define new_recursive      frame->Xnew_recursive
    576 
    577 #define cur_is_word        frame->Xcur_is_word
    578 #define condition          frame->Xcondition
    579 #define prev_is_word       frame->Xprev_is_word
    580 
    581 #ifdef SUPPORT_UCP
    582 #define prop_type          frame->Xprop_type
    583 #define prop_value         frame->Xprop_value
    584 #define prop_fail_result   frame->Xprop_fail_result
    585 #define oclength           frame->Xoclength
    586 #define occhars            frame->Xocchars
    587 #endif
    588 
    589 #define ctype              frame->Xctype
    590 #define fc                 frame->Xfc
    591 #define fi                 frame->Xfi
    592 #define length             frame->Xlength
    593 #define max                frame->Xmax
    594 #define min                frame->Xmin
    595 #define number             frame->Xnumber
    596 #define offset             frame->Xoffset
    597 #define op                 frame->Xop
    598 #define save_capture_last  frame->Xsave_capture_last
    599 #define save_offset1       frame->Xsave_offset1
    600 #define save_offset2       frame->Xsave_offset2
    601 #define save_offset3       frame->Xsave_offset3
    602 #define stacksave          frame->Xstacksave
    603 
    604 #define newptrb            frame->Xnewptrb
    605 
    606 /* When recursion is being used, local variables are allocated on the stack and
    607 get preserved during recursion in the normal way. In this environment, fi and
    608 i, and fc and c, can be the same variables. */
    609 
    610 #else         /* NO_RECURSE not defined */
    611 #define fi i
    612 #define fc c
    613 
    614 /* Many of the following variables are used only in small blocks of the code.
    615 My normal style of coding would have declared them within each of those blocks.
    616 However, in order to accommodate the version of this code that uses an external
    617 "stack" implemented on the heap, it is easier to declare them all here, so the
    618 declarations can be cut out in a block. The only declarations within blocks
    619 below are for variables that do not have to be preserved over a recursive call
    620 to RMATCH(). */
    621 
    622 #ifdef SUPPORT_UTF
    623 const pcre_uchar *charptr;
    624 #endif
    625 const pcre_uchar *callpat;
    626 const pcre_uchar *data;
    627 const pcre_uchar *next;
    628 PCRE_PUCHAR       pp;
    629 const pcre_uchar *prev;
    630 PCRE_PUCHAR       saved_eptr;
    631 
    632 recursion_info new_recursive;
    633 
    634 BOOL cur_is_word;
    635 BOOL condition;
    636 BOOL prev_is_word;
    637 
    638 #ifdef SUPPORT_UCP
    639 int prop_type;
    640 unsigned int prop_value;
    641 int prop_fail_result;
    642 int oclength;
    643 pcre_uchar occhars[6];
    644 #endif
    645 
    646 int codelink;
    647 int ctype;
    648 int length;
    649 int max;
    650 int min;
    651 unsigned int number;
    652 int offset;
    653 unsigned int op;
    654 pcre_int32 save_capture_last;
    655 int save_offset1, save_offset2, save_offset3;
    656 int stacksave[REC_STACK_SAVE_MAX];
    657 
    658 eptrblock newptrb;
    659 
    660 /* There is a special fudge for calling match() in a way that causes it to
    661 measure the size of its basic stack frame when the stack is being used for
    662 recursion. The second argument (ecode) being NULL triggers this behaviour. It
    663 cannot normally ever be NULL. The return is the negated value of the frame
    664 size. */
    665 
    666 if (ecode == NULL)
    667   {
    668   if (rdepth == 0)
    669     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
    670   else
    671     {
    672     int len = (char *)&rdepth - (char *)eptr;
    673     return (len > 0)? -len : len;
    674     }
    675   }
    676 #endif     /* NO_RECURSE */
    677 
    678 /* To save space on the stack and in the heap frame, I have doubled up on some
    679 of the local variables that are used only in localised parts of the code, but
    680 still need to be preserved over recursive calls of match(). These macros define
    681 the alternative names that are used. */
    682 
    683 #define allow_zero    cur_is_word
    684 #define cbegroup      condition
    685 #define code_offset   codelink
    686 #define condassert    condition
    687 #define matched_once  prev_is_word
    688 #define foc           number
    689 #define save_mark     data
    690 
    691 /* These statements are here to stop the compiler complaining about unitialized
    692 variables. */
    693 
    694 #ifdef SUPPORT_UCP
    695 prop_value = 0;
    696 prop_fail_result = 0;
    697 #endif
    698 
    699 
    700 /* This label is used for tail recursion, which is used in a few cases even
    701 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
    702 used. Thanks to Ian Taylor for noticing this possibility and sending the
    703 original patch. */
    704 
    705 TAIL_RECURSE:
    706 
    707 /* OK, now we can get on with the real code of the function. Recursive calls
    708 are specified by the macro RMATCH and RRETURN is used to return. When
    709 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
    710 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
    711 defined). However, RMATCH isn't like a function call because it's quite a
    712 complicated macro. It has to be used in one particular way. This shouldn't,
    713 however, impact performance when true recursion is being used. */
    714 
    715 #ifdef SUPPORT_UTF
    716 utf = md->utf;       /* Local copy of the flag */
    717 #else
    718 utf = FALSE;
    719 #endif
    720 
    721 /* First check that we haven't called match() too many times, or that we
    722 haven't exceeded the recursive call limit. */
    723 
    724 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
    725 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
    726 
    727 /* At the start of a group with an unlimited repeat that may match an empty
    728 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
    729 done this way to save having to use another function argument, which would take
    730 up space on the stack. See also MATCH_CONDASSERT below.
    731 
    732 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
    733 such remembered pointers, to be checked when we hit the closing ket, in order
    734 to break infinite loops that match no characters. When match() is called in
    735 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
    736 NOT be used with tail recursion, because the memory block that is used is on
    737 the stack, so a new one may be required for each match(). */
    738 
    739 if (md->match_function_type == MATCH_CBEGROUP)
    740   {
    741   newptrb.epb_saved_eptr = eptr;
    742   newptrb.epb_prev = eptrb;
    743   eptrb = &newptrb;
    744   md->match_function_type = 0;
    745   }
    746 
    747 /* Now start processing the opcodes. */
    748 
    749 for (;;)
    750   {
    751   minimize = possessive = FALSE;
    752   op = *ecode;
    753 
    754   switch(op)
    755     {
    756     case OP_MARK:
    757     md->nomatch_mark = ecode + 2;
    758     md->mark = NULL;    /* In case previously set by assertion */
    759     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
    760       eptrb, RM55);
    761     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    762          md->mark == NULL) md->mark = ecode + 2;
    763 
    764     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
    765     argument, and we must check whether that argument matches this MARK's
    766     argument. It is passed back in md->start_match_ptr (an overloading of that
    767     variable). If it does match, we reset that variable to the current subject
    768     position and return MATCH_SKIP. Otherwise, pass back the return code
    769     unaltered. */
    770 
    771     else if (rrc == MATCH_SKIP_ARG &&
    772         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
    773       {
    774       md->start_match_ptr = eptr;
    775       RRETURN(MATCH_SKIP);
    776       }
    777     RRETURN(rrc);
    778 
    779     case OP_FAIL:
    780     RRETURN(MATCH_NOMATCH);
    781 
    782     case OP_COMMIT:
    783     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    784       eptrb, RM52);
    785     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    786     RRETURN(MATCH_COMMIT);
    787 
    788     case OP_PRUNE:
    789     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    790       eptrb, RM51);
    791     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    792     RRETURN(MATCH_PRUNE);
    793 
    794     case OP_PRUNE_ARG:
    795     md->nomatch_mark = ecode + 2;
    796     md->mark = NULL;    /* In case previously set by assertion */
    797     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
    798       eptrb, RM56);
    799     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    800          md->mark == NULL) md->mark = ecode + 2;
    801     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    802     RRETURN(MATCH_PRUNE);
    803 
    804     case OP_SKIP:
    805     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    806       eptrb, RM53);
    807     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    808     md->start_match_ptr = eptr;   /* Pass back current position */
    809     RRETURN(MATCH_SKIP);
    810 
    811     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
    812     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
    813     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
    814     that failed and any that precede it (either they also failed, or were not
    815     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
    816     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
    817     set to the count of the one that failed. */
    818 
    819     case OP_SKIP_ARG:
    820     md->skip_arg_count++;
    821     if (md->skip_arg_count <= md->ignore_skip_arg)
    822       {
    823       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
    824       break;
    825       }
    826     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
    827       eptrb, RM57);
    828     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    829 
    830     /* Pass back the current skip name by overloading md->start_match_ptr and
    831     returning the special MATCH_SKIP_ARG return code. This will either be
    832     caught by a matching MARK, or get to the top, where it causes a rematch
    833     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
    834 
    835     md->start_match_ptr = ecode + 2;
    836     RRETURN(MATCH_SKIP_ARG);
    837 
    838     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
    839     the branch in which it occurs can be determined. Overload the start of
    840     match pointer to do this. */
    841 
    842     case OP_THEN:
    843     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    844       eptrb, RM54);
    845     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    846     md->start_match_ptr = ecode;
    847     RRETURN(MATCH_THEN);
    848 
    849     case OP_THEN_ARG:
    850     md->nomatch_mark = ecode + 2;
    851     md->mark = NULL;    /* In case previously set by assertion */
    852     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
    853       md, eptrb, RM58);
    854     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    855          md->mark == NULL) md->mark = ecode + 2;
    856     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    857     md->start_match_ptr = ecode;
    858     RRETURN(MATCH_THEN);
    859 
    860     /* Handle an atomic group that does not contain any capturing parentheses.
    861     This can be handled like an assertion. Prior to 8.13, all atomic groups
    862     were handled this way. In 8.13, the code was changed as below for ONCE, so
    863     that backups pass through the group and thereby reset captured values.
    864     However, this uses a lot more stack, so in 8.20, atomic groups that do not
    865     contain any captures generate OP_ONCE_NC, which can be handled in the old,
    866     less stack intensive way.
    867 
    868     Check the alternative branches in turn - the matching won't pass the KET
    869     for this kind of subpattern. If any one branch matches, we carry on as at
    870     the end of a normal bracket, leaving the subject pointer, but resetting
    871     the start-of-match value in case it was changed by \K. */
    872 
    873     case OP_ONCE_NC:
    874     prev = ecode;
    875     saved_eptr = eptr;
    876     save_mark = md->mark;
    877     do
    878       {
    879       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
    880       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
    881         {
    882         mstart = md->start_match_ptr;
    883         break;
    884         }
    885       if (rrc == MATCH_THEN)
    886         {
    887         next = ecode + GET(ecode,1);
    888         if (md->start_match_ptr < next &&
    889             (*ecode == OP_ALT || *next == OP_ALT))
    890           rrc = MATCH_NOMATCH;
    891         }
    892 
    893       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    894       ecode += GET(ecode,1);
    895       md->mark = save_mark;
    896       }
    897     while (*ecode == OP_ALT);
    898 
    899     /* If hit the end of the group (which could be repeated), fail */
    900 
    901     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
    902 
    903     /* Continue as from after the group, updating the offsets high water
    904     mark, since extracts may have been taken. */
    905 
    906     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
    907 
    908     offset_top = md->end_offset_top;
    909     eptr = md->end_match_ptr;
    910 
    911     /* For a non-repeating ket, just continue at this level. This also
    912     happens for a repeating ket if no characters were matched in the group.
    913     This is the forcible breaking of infinite loops as implemented in Perl
    914     5.005. */
    915 
    916     if (*ecode == OP_KET || eptr == saved_eptr)
    917       {
    918       ecode += 1+LINK_SIZE;
    919       break;
    920       }
    921 
    922     /* The repeating kets try the rest of the pattern or restart from the
    923     preceding bracket, in the appropriate order. The second "call" of match()
    924     uses tail recursion, to avoid using another stack frame. */
    925 
    926     if (*ecode == OP_KETRMIN)
    927       {
    928       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
    929       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    930       ecode = prev;
    931       goto TAIL_RECURSE;
    932       }
    933     else  /* OP_KETRMAX */
    934       {
    935       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
    936       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    937       ecode += 1 + LINK_SIZE;
    938       goto TAIL_RECURSE;
    939       }
    940     /* Control never gets here */
    941 
    942     /* Handle a capturing bracket, other than those that are possessive with an
    943     unlimited repeat. If there is space in the offset vector, save the current
    944     subject position in the working slot at the top of the vector. We mustn't
    945     change the current values of the data slot, because they may be set from a
    946     previous iteration of this group, and be referred to by a reference inside
    947     the group. A failure to match might occur after the group has succeeded,
    948     if something later on doesn't match. For this reason, we need to restore
    949     the working value and also the values of the final offsets, in case they
    950     were set by a previous iteration of the same bracket.
    951 
    952     If there isn't enough space in the offset vector, treat this as if it were
    953     a non-capturing bracket. Don't worry about setting the flag for the error
    954     case here; that is handled in the code for KET. */
    955 
    956     case OP_CBRA:
    957     case OP_SCBRA:
    958     number = GET2(ecode, 1+LINK_SIZE);
    959     offset = number << 1;
    960 
    961 #ifdef PCRE_DEBUG
    962     printf("start bracket %d\n", number);
    963     printf("subject=");
    964     pchars(eptr, 16, TRUE, md);
    965     printf("\n");
    966 #endif
    967 
    968     if (offset < md->offset_max)
    969       {
    970       save_offset1 = md->offset_vector[offset];
    971       save_offset2 = md->offset_vector[offset+1];
    972       save_offset3 = md->offset_vector[md->offset_end - number];
    973       save_capture_last = md->capture_last;
    974       save_mark = md->mark;
    975 
    976       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
    977       md->offset_vector[md->offset_end - number] =
    978         (int)(eptr - md->start_subject);
    979 
    980       for (;;)
    981         {
    982         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
    983         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
    984           eptrb, RM1);
    985         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
    986 
    987         /* If we backed up to a THEN, check whether it is within the current
    988         branch by comparing the address of the THEN that is passed back with
    989         the end of the branch. If it is within the current branch, and the
    990         branch is one of two or more alternatives (it either starts or ends
    991         with OP_ALT), we have reached the limit of THEN's action, so convert
    992         the return code to NOMATCH, which will cause normal backtracking to
    993         happen from now on. Otherwise, THEN is passed back to an outer
    994         alternative. This implements Perl's treatment of parenthesized groups,
    995         where a group not containing | does not affect the current alternative,
    996         that is, (X) is NOT the same as (X|(*F)). */
    997 
    998         if (rrc == MATCH_THEN)
    999           {
   1000           next = ecode + GET(ecode,1);
   1001           if (md->start_match_ptr < next &&
   1002               (*ecode == OP_ALT || *next == OP_ALT))
   1003             rrc = MATCH_NOMATCH;
   1004           }
   1005 
   1006         /* Anything other than NOMATCH is passed back. */
   1007 
   1008         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1009         md->capture_last = save_capture_last;
   1010         ecode += GET(ecode, 1);
   1011         md->mark = save_mark;
   1012         if (*ecode != OP_ALT) break;
   1013         }
   1014 
   1015       DPRINTF(("bracket %d failed\n", number));
   1016       md->offset_vector[offset] = save_offset1;
   1017       md->offset_vector[offset+1] = save_offset2;
   1018       md->offset_vector[md->offset_end - number] = save_offset3;
   1019 
   1020       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
   1021 
   1022       RRETURN(rrc);
   1023       }
   1024 
   1025     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
   1026     as a non-capturing bracket. */
   1027 
   1028     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1029     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1030 
   1031     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
   1032 
   1033     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1034     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1035 
   1036     /* Non-capturing or atomic group, except for possessive with unlimited
   1037     repeat and ONCE group with no captures. Loop for all the alternatives.
   1038 
   1039     When we get to the final alternative within the brackets, we used to return
   1040     the result of a recursive call to match() whatever happened so it was
   1041     possible to reduce stack usage by turning this into a tail recursion,
   1042     except in the case of a possibly empty group. However, now that there is
   1043     the possiblity of (*THEN) occurring in the final alternative, this
   1044     optimization is no longer always possible.
   1045 
   1046     We can optimize if we know there are no (*THEN)s in the pattern; at present
   1047     this is the best that can be done.
   1048 
   1049     MATCH_ONCE is returned when the end of an atomic group is successfully
   1050     reached, but subsequent matching fails. It passes back up the tree (causing
   1051     captured values to be reset) until the original atomic group level is
   1052     reached. This is tested by comparing md->once_target with the start of the
   1053     group. At this point, the return is converted into MATCH_NOMATCH so that
   1054     previous backup points can be taken. */
   1055 
   1056     case OP_ONCE:
   1057     case OP_BRA:
   1058     case OP_SBRA:
   1059     DPRINTF(("start non-capturing bracket\n"));
   1060 
   1061     for (;;)
   1062       {
   1063       if (op >= OP_SBRA || op == OP_ONCE)
   1064         md->match_function_type = MATCH_CBEGROUP;
   1065 
   1066       /* If this is not a possibly empty group, and there are no (*THEN)s in
   1067       the pattern, and this is the final alternative, optimize as described
   1068       above. */
   1069 
   1070       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
   1071         {
   1072         ecode += PRIV(OP_lengths)[*ecode];
   1073         goto TAIL_RECURSE;
   1074         }
   1075 
   1076       /* In all other cases, we have to make another call to match(). */
   1077 
   1078       save_mark = md->mark;
   1079       save_capture_last = md->capture_last;
   1080       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
   1081         RM2);
   1082 
   1083       /* See comment in the code for capturing groups above about handling
   1084       THEN. */
   1085 
   1086       if (rrc == MATCH_THEN)
   1087         {
   1088         next = ecode + GET(ecode,1);
   1089         if (md->start_match_ptr < next &&
   1090             (*ecode == OP_ALT || *next == OP_ALT))
   1091           rrc = MATCH_NOMATCH;
   1092         }
   1093 
   1094       if (rrc != MATCH_NOMATCH)
   1095         {
   1096         if (rrc == MATCH_ONCE)
   1097           {
   1098           const pcre_uchar *scode = ecode;
   1099           if (*scode != OP_ONCE)           /* If not at start, find it */
   1100             {
   1101             while (*scode == OP_ALT) scode += GET(scode, 1);
   1102             scode -= GET(scode, 1);
   1103             }
   1104           if (md->once_target == scode) rrc = MATCH_NOMATCH;
   1105           }
   1106         RRETURN(rrc);
   1107         }
   1108       ecode += GET(ecode, 1);
   1109       md->mark = save_mark;
   1110       if (*ecode != OP_ALT) break;
   1111       md->capture_last = save_capture_last;
   1112       }
   1113 
   1114     RRETURN(MATCH_NOMATCH);
   1115 
   1116     /* Handle possessive capturing brackets with an unlimited repeat. We come
   1117     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
   1118     handled similarly to the normal case above. However, the matching is
   1119     different. The end of these brackets will always be OP_KETRPOS, which
   1120     returns MATCH_KETRPOS without going further in the pattern. By this means
   1121     we can handle the group by iteration rather than recursion, thereby
   1122     reducing the amount of stack needed. */
   1123 
   1124     case OP_CBRAPOS:
   1125     case OP_SCBRAPOS:
   1126     allow_zero = FALSE;
   1127 
   1128     POSSESSIVE_CAPTURE:
   1129     number = GET2(ecode, 1+LINK_SIZE);
   1130     offset = number << 1;
   1131 
   1132 #ifdef PCRE_DEBUG
   1133     printf("start possessive bracket %d\n", number);
   1134     printf("subject=");
   1135     pchars(eptr, 16, TRUE, md);
   1136     printf("\n");
   1137 #endif
   1138 
   1139     if (offset < md->offset_max)
   1140       {
   1141       matched_once = FALSE;
   1142       code_offset = (int)(ecode - md->start_code);
   1143 
   1144       save_offset1 = md->offset_vector[offset];
   1145       save_offset2 = md->offset_vector[offset+1];
   1146       save_offset3 = md->offset_vector[md->offset_end - number];
   1147       save_capture_last = md->capture_last;
   1148 
   1149       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
   1150 
   1151       /* Each time round the loop, save the current subject position for use
   1152       when the group matches. For MATCH_MATCH, the group has matched, so we
   1153       restart it with a new subject starting position, remembering that we had
   1154       at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
   1155       usual. If we haven't matched any alternatives in any iteration, check to
   1156       see if a previous iteration matched. If so, the group has matched;
   1157       continue from afterwards. Otherwise it has failed; restore the previous
   1158       capture values before returning NOMATCH. */
   1159 
   1160       for (;;)
   1161         {
   1162         md->offset_vector[md->offset_end - number] =
   1163           (int)(eptr - md->start_subject);
   1164         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
   1165         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
   1166           eptrb, RM63);
   1167         if (rrc == MATCH_KETRPOS)
   1168           {
   1169           offset_top = md->end_offset_top;
   1170           ecode = md->start_code + code_offset;
   1171           save_capture_last = md->capture_last;
   1172           matched_once = TRUE;
   1173           mstart = md->start_match_ptr;    /* In case \K changed it */
   1174           if (eptr == md->end_match_ptr)   /* Matched an empty string */
   1175             {
   1176             do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1177             break;
   1178             }
   1179           eptr = md->end_match_ptr;
   1180           continue;
   1181           }
   1182 
   1183         /* See comment in the code for capturing groups above about handling
   1184         THEN. */
   1185 
   1186         if (rrc == MATCH_THEN)
   1187           {
   1188           next = ecode + GET(ecode,1);
   1189           if (md->start_match_ptr < next &&
   1190               (*ecode == OP_ALT || *next == OP_ALT))
   1191             rrc = MATCH_NOMATCH;
   1192           }
   1193 
   1194         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1195         md->capture_last = save_capture_last;
   1196         ecode += GET(ecode, 1);
   1197         if (*ecode != OP_ALT) break;
   1198         }
   1199 
   1200       if (!matched_once)
   1201         {
   1202         md->offset_vector[offset] = save_offset1;
   1203         md->offset_vector[offset+1] = save_offset2;
   1204         md->offset_vector[md->offset_end - number] = save_offset3;
   1205         }
   1206 
   1207       if (allow_zero || matched_once)
   1208         {
   1209         ecode += 1 + LINK_SIZE;
   1210         break;
   1211         }
   1212 
   1213       RRETURN(MATCH_NOMATCH);
   1214       }
   1215 
   1216     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
   1217     as a non-capturing bracket. */
   1218 
   1219     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1220     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1221 
   1222     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
   1223 
   1224     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1225     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
   1226 
   1227     /* Non-capturing possessive bracket with unlimited repeat. We come here
   1228     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
   1229     without the capturing complication. It is written out separately for speed
   1230     and cleanliness. */
   1231 
   1232     case OP_BRAPOS:
   1233     case OP_SBRAPOS:
   1234     allow_zero = FALSE;
   1235 
   1236     POSSESSIVE_NON_CAPTURE:
   1237     matched_once = FALSE;
   1238     code_offset = (int)(ecode - md->start_code);
   1239     save_capture_last = md->capture_last;
   1240 
   1241     for (;;)
   1242       {
   1243       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
   1244       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
   1245         eptrb, RM48);
   1246       if (rrc == MATCH_KETRPOS)
   1247         {
   1248         offset_top = md->end_offset_top;
   1249         ecode = md->start_code + code_offset;
   1250         matched_once = TRUE;
   1251         mstart = md->start_match_ptr;   /* In case \K reset it */
   1252         if (eptr == md->end_match_ptr)  /* Matched an empty string */
   1253           {
   1254           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1255           break;
   1256           }
   1257         eptr = md->end_match_ptr;
   1258         continue;
   1259         }
   1260 
   1261       /* See comment in the code for capturing groups above about handling
   1262       THEN. */
   1263 
   1264       if (rrc == MATCH_THEN)
   1265         {
   1266         next = ecode + GET(ecode,1);
   1267         if (md->start_match_ptr < next &&
   1268             (*ecode == OP_ALT || *next == OP_ALT))
   1269           rrc = MATCH_NOMATCH;
   1270         }
   1271 
   1272       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1273       ecode += GET(ecode, 1);
   1274       if (*ecode != OP_ALT) break;
   1275       md->capture_last = save_capture_last;
   1276       }
   1277 
   1278     if (matched_once || allow_zero)
   1279       {
   1280       ecode += 1 + LINK_SIZE;
   1281       break;
   1282       }
   1283     RRETURN(MATCH_NOMATCH);
   1284 
   1285     /* Control never reaches here. */
   1286 
   1287     /* Conditional group: compilation checked that there are no more than two
   1288     branches. If the condition is false, skipping the first branch takes us
   1289     past the end of the item if there is only one branch, but that's exactly
   1290     what we want. */
   1291 
   1292     case OP_COND:
   1293     case OP_SCOND:
   1294 
   1295     /* The variable codelink will be added to ecode when the condition is
   1296     false, to get to the second branch. Setting it to the offset to the ALT
   1297     or KET, then incrementing ecode achieves this effect. We now have ecode
   1298     pointing to the condition or callout. */
   1299 
   1300     codelink = GET(ecode, 1);   /* Offset to the second branch */
   1301     ecode += 1 + LINK_SIZE;     /* From this opcode */
   1302 
   1303     /* Because of the way auto-callout works during compile, a callout item is
   1304     inserted between OP_COND and an assertion condition. */
   1305 
   1306     if (*ecode == OP_CALLOUT)
   1307       {
   1308       if (PUBL(callout) != NULL)
   1309         {
   1310         PUBL(callout_block) cb;
   1311         cb.version          = 2;   /* Version 1 of the callout block */
   1312         cb.callout_number   = ecode[1];
   1313         cb.offset_vector    = md->offset_vector;
   1314 #if defined COMPILE_PCRE8
   1315         cb.subject          = (PCRE_SPTR)md->start_subject;
   1316 #elif defined COMPILE_PCRE16
   1317         cb.subject          = (PCRE_SPTR16)md->start_subject;
   1318 #elif defined COMPILE_PCRE32
   1319         cb.subject          = (PCRE_SPTR32)md->start_subject;
   1320 #endif
   1321         cb.subject_length   = (int)(md->end_subject - md->start_subject);
   1322         cb.start_match      = (int)(mstart - md->start_subject);
   1323         cb.current_position = (int)(eptr - md->start_subject);
   1324         cb.pattern_position = GET(ecode, 2);
   1325         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
   1326         cb.capture_top      = offset_top/2;
   1327         cb.capture_last     = md->capture_last & CAPLMASK;
   1328         /* Internal change requires this for API compatibility. */
   1329         if (cb.capture_last == 0) cb.capture_last = -1;
   1330         cb.callout_data     = md->callout_data;
   1331         cb.mark             = md->nomatch_mark;
   1332         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
   1333         if (rrc < 0) RRETURN(rrc);
   1334         }
   1335 
   1336       /* Advance ecode past the callout, so it now points to the condition. We
   1337       must adjust codelink so that the value of ecode+codelink is unchanged. */
   1338 
   1339       ecode += PRIV(OP_lengths)[OP_CALLOUT];
   1340       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
   1341       }
   1342 
   1343     /* Test the various possible conditions */
   1344 
   1345     condition = FALSE;
   1346     switch(condcode = *ecode)
   1347       {
   1348       case OP_RREF:         /* Numbered group recursion test */
   1349       if (md->recursive != NULL)     /* Not recursing => FALSE */
   1350         {
   1351         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
   1352         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
   1353         }
   1354       break;
   1355 
   1356       case OP_DNRREF:       /* Duplicate named group recursion test */
   1357       if (md->recursive != NULL)
   1358         {
   1359         int count = GET2(ecode, 1 + IMM2_SIZE);
   1360         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
   1361         while (count-- > 0)
   1362           {
   1363           unsigned int recno = GET2(slot, 0);
   1364           condition = recno == md->recursive->group_num;
   1365           if (condition) break;
   1366           slot += md->name_entry_size;
   1367           }
   1368         }
   1369       break;
   1370 
   1371       case OP_CREF:         /* Numbered group used test */
   1372       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
   1373       condition = offset < offset_top && md->offset_vector[offset] >= 0;
   1374       break;
   1375 
   1376       case OP_DNCREF:      /* Duplicate named group used test */
   1377         {
   1378         int count = GET2(ecode, 1 + IMM2_SIZE);
   1379         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
   1380         while (count-- > 0)
   1381           {
   1382           offset = GET2(slot, 0) << 1;
   1383           condition = offset < offset_top && md->offset_vector[offset] >= 0;
   1384           if (condition) break;
   1385           slot += md->name_entry_size;
   1386           }
   1387         }
   1388       break;
   1389 
   1390       case OP_DEF:     /* DEFINE - always false */
   1391       break;
   1392 
   1393       /* The condition is an assertion. Call match() to evaluate it - setting
   1394       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
   1395       of an assertion. */
   1396 
   1397       default:
   1398       md->match_function_type = MATCH_CONDASSERT;
   1399       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
   1400       if (rrc == MATCH_MATCH)
   1401         {
   1402         if (md->end_offset_top > offset_top)
   1403           offset_top = md->end_offset_top;  /* Captures may have happened */
   1404         condition = TRUE;
   1405 
   1406         /* Advance ecode past the assertion to the start of the first branch,
   1407         but adjust it so that the general choosing code below works. */
   1408 
   1409         ecode += GET(ecode, 1);
   1410         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
   1411         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
   1412         }
   1413 
   1414       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
   1415       assertion; it is therefore treated as NOMATCH. Any other return is an
   1416       error. */
   1417 
   1418       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
   1419         {
   1420         RRETURN(rrc);         /* Need braces because of following else */
   1421         }
   1422       break;
   1423       }
   1424 
   1425     /* Choose branch according to the condition */
   1426 
   1427     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
   1428 
   1429     /* We are now at the branch that is to be obeyed. As there is only one, we
   1430     can use tail recursion to avoid using another stack frame, except when
   1431     there is unlimited repeat of a possibly empty group. In the latter case, a
   1432     recursive call to match() is always required, unless the second alternative
   1433     doesn't exist, in which case we can just plough on. Note that, for
   1434     compatibility with Perl, the | in a conditional group is NOT treated as
   1435     creating two alternatives. If a THEN is encountered in the branch, it
   1436     propagates out to the enclosing alternative (unless nested in a deeper set
   1437     of alternatives, of course). */
   1438 
   1439     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
   1440       {
   1441       if (op != OP_SCOND)
   1442         {
   1443         goto TAIL_RECURSE;
   1444         }
   1445 
   1446       md->match_function_type = MATCH_CBEGROUP;
   1447       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
   1448       RRETURN(rrc);
   1449       }
   1450 
   1451      /* Condition false & no alternative; continue after the group. */
   1452 
   1453     else
   1454       {
   1455       }
   1456     break;
   1457 
   1458 
   1459     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
   1460     to close any currently open capturing brackets. */
   1461 
   1462     case OP_CLOSE:
   1463     number = GET2(ecode, 1);   /* Must be less than 65536 */
   1464     offset = number << 1;
   1465 
   1466 #ifdef PCRE_DEBUG
   1467       printf("end bracket %d at *ACCEPT", number);
   1468       printf("\n");
   1469 #endif
   1470 
   1471     md->capture_last = (md->capture_last & OVFLMASK) | number;
   1472     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
   1473       {
   1474       md->offset_vector[offset] =
   1475         md->offset_vector[md->offset_end - number];
   1476       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
   1477       if (offset_top <= offset) offset_top = offset + 2;
   1478       }
   1479     ecode += 1 + IMM2_SIZE;
   1480     break;
   1481 
   1482 
   1483     /* End of the pattern, either real or forced. */
   1484 
   1485     case OP_END:
   1486     case OP_ACCEPT:
   1487     case OP_ASSERT_ACCEPT:
   1488 
   1489     /* If we have matched an empty string, fail if not in an assertion and not
   1490     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
   1491     is set and we have matched at the start of the subject. In both cases,
   1492     backtracking will then try other alternatives, if any. */
   1493 
   1494     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
   1495          md->recursive == NULL &&
   1496          (md->notempty ||
   1497            (md->notempty_atstart &&
   1498              mstart == md->start_subject + md->start_offset)))
   1499       RRETURN(MATCH_NOMATCH);
   1500 
   1501     /* Otherwise, we have a match. */
   1502 
   1503     md->end_match_ptr = eptr;           /* Record where we ended */
   1504     md->end_offset_top = offset_top;    /* and how many extracts were taken */
   1505     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
   1506 
   1507     /* For some reason, the macros don't work properly if an expression is
   1508     given as the argument to RRETURN when the heap is in use. */
   1509 
   1510     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
   1511     RRETURN(rrc);
   1512 
   1513     /* Assertion brackets. Check the alternative branches in turn - the
   1514     matching won't pass the KET for an assertion. If any one branch matches,
   1515     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
   1516     start of each branch to move the current point backwards, so the code at
   1517     this level is identical to the lookahead case. When the assertion is part
   1518     of a condition, we want to return immediately afterwards. The caller of
   1519     this incarnation of the match() function will have set MATCH_CONDASSERT in
   1520     md->match_function type, and one of these opcodes will be the first opcode
   1521     that is processed. We use a local variable that is preserved over calls to
   1522     match() to remember this case. */
   1523 
   1524     case OP_ASSERT:
   1525     case OP_ASSERTBACK:
   1526     save_mark = md->mark;
   1527     if (md->match_function_type == MATCH_CONDASSERT)
   1528       {
   1529       condassert = TRUE;
   1530       md->match_function_type = 0;
   1531       }
   1532     else condassert = FALSE;
   1533 
   1534     /* Loop for each branch */
   1535 
   1536     do
   1537       {
   1538       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
   1539 
   1540       /* A match means that the assertion is true; break out of the loop
   1541       that matches its alternatives. */
   1542 
   1543       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1544         {
   1545         mstart = md->start_match_ptr;   /* In case \K reset it */
   1546         break;
   1547         }
   1548 
   1549       /* If not matched, restore the previous mark setting. */
   1550 
   1551       md->mark = save_mark;
   1552 
   1553       /* See comment in the code for capturing groups above about handling
   1554       THEN. */
   1555 
   1556       if (rrc == MATCH_THEN)
   1557         {
   1558         next = ecode + GET(ecode,1);
   1559         if (md->start_match_ptr < next &&
   1560             (*ecode == OP_ALT || *next == OP_ALT))
   1561           rrc = MATCH_NOMATCH;
   1562         }
   1563 
   1564       /* Anything other than NOMATCH causes the entire assertion to fail,
   1565       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
   1566       uncaptured THEN, which means they take their normal effect. This
   1567       consistent approach does not always have exactly the same effect as in
   1568       Perl. */
   1569 
   1570       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1571       ecode += GET(ecode, 1);
   1572       }
   1573     while (*ecode == OP_ALT);   /* Continue for next alternative */
   1574 
   1575     /* If we have tried all the alternative branches, the assertion has
   1576     failed. If not, we broke out after a match. */
   1577 
   1578     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
   1579 
   1580     /* If checking an assertion for a condition, return MATCH_MATCH. */
   1581 
   1582     if (condassert) RRETURN(MATCH_MATCH);
   1583 
   1584     /* Continue from after a successful assertion, updating the offsets high
   1585     water mark, since extracts may have been taken during the assertion. */
   1586 
   1587     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1588     ecode += 1 + LINK_SIZE;
   1589     offset_top = md->end_offset_top;
   1590     continue;
   1591 
   1592     /* Negative assertion: all branches must fail to match for the assertion to
   1593     succeed. */
   1594 
   1595     case OP_ASSERT_NOT:
   1596     case OP_ASSERTBACK_NOT:
   1597     save_mark = md->mark;
   1598     if (md->match_function_type == MATCH_CONDASSERT)
   1599       {
   1600       condassert = TRUE;
   1601       md->match_function_type = 0;
   1602       }
   1603     else condassert = FALSE;
   1604 
   1605     /* Loop for each alternative branch. */
   1606 
   1607     do
   1608       {
   1609       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
   1610       md->mark = save_mark;   /* Always restore the mark setting */
   1611 
   1612       switch(rrc)
   1613         {
   1614         case MATCH_MATCH:            /* A successful match means */
   1615         case MATCH_ACCEPT:           /* the assertion has failed. */
   1616         RRETURN(MATCH_NOMATCH);
   1617 
   1618         case MATCH_NOMATCH:          /* Carry on with next branch */
   1619         break;
   1620 
   1621         /* See comment in the code for capturing groups above about handling
   1622         THEN. */
   1623 
   1624         case MATCH_THEN:
   1625         next = ecode + GET(ecode,1);
   1626         if (md->start_match_ptr < next &&
   1627             (*ecode == OP_ALT || *next == OP_ALT))
   1628           {
   1629           rrc = MATCH_NOMATCH;
   1630           break;
   1631           }
   1632         /* Otherwise fall through. */
   1633 
   1634         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
   1635         assertion to fail to match, without considering any more alternatives.
   1636         Failing to match means the assertion is true. This is a consistent
   1637         approach, but does not always have the same effect as in Perl. */
   1638 
   1639         case MATCH_COMMIT:
   1640         case MATCH_SKIP:
   1641         case MATCH_SKIP_ARG:
   1642         case MATCH_PRUNE:
   1643         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1644         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
   1645 
   1646         /* Anything else is an error */
   1647 
   1648         default:
   1649         RRETURN(rrc);
   1650         }
   1651 
   1652       /* Continue with next branch */
   1653 
   1654       ecode += GET(ecode,1);
   1655       }
   1656     while (*ecode == OP_ALT);
   1657 
   1658     /* All branches in the assertion failed to match. */
   1659 
   1660     NEG_ASSERT_TRUE:
   1661     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
   1662     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
   1663     continue;
   1664 
   1665     /* Move the subject pointer back. This occurs only at the start of
   1666     each branch of a lookbehind assertion. If we are too close to the start to
   1667     move back, this match function fails. When working with UTF-8 we move
   1668     back a number of characters, not bytes. */
   1669 
   1670     case OP_REVERSE:
   1671 #ifdef SUPPORT_UTF
   1672     if (utf)
   1673       {
   1674       i = GET(ecode, 1);
   1675       while (i-- > 0)
   1676         {
   1677         eptr--;
   1678         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
   1679         BACKCHAR(eptr);
   1680         }
   1681       }
   1682     else
   1683 #endif
   1684 
   1685     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
   1686 
   1687       {
   1688       eptr -= GET(ecode, 1);
   1689       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
   1690       }
   1691 
   1692     /* Save the earliest consulted character, then skip to next op code */
   1693 
   1694     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
   1695     ecode += 1 + LINK_SIZE;
   1696     break;
   1697 
   1698     /* The callout item calls an external function, if one is provided, passing
   1699     details of the match so far. This is mainly for debugging, though the
   1700     function is able to force a failure. */
   1701 
   1702     case OP_CALLOUT:
   1703     if (PUBL(callout) != NULL)
   1704       {
   1705       PUBL(callout_block) cb;
   1706       cb.version          = 2;   /* Version 1 of the callout block */
   1707       cb.callout_number   = ecode[1];
   1708       cb.offset_vector    = md->offset_vector;
   1709 #if defined COMPILE_PCRE8
   1710       cb.subject          = (PCRE_SPTR)md->start_subject;
   1711 #elif defined COMPILE_PCRE16
   1712       cb.subject          = (PCRE_SPTR16)md->start_subject;
   1713 #elif defined COMPILE_PCRE32
   1714       cb.subject          = (PCRE_SPTR32)md->start_subject;
   1715 #endif
   1716       cb.subject_length   = (int)(md->end_subject - md->start_subject);
   1717       cb.start_match      = (int)(mstart - md->start_subject);
   1718       cb.current_position = (int)(eptr - md->start_subject);
   1719       cb.pattern_position = GET(ecode, 2);
   1720       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
   1721       cb.capture_top      = offset_top/2;
   1722       cb.capture_last     = md->capture_last & CAPLMASK;
   1723       /* Internal change requires this for API compatibility. */
   1724       if (cb.capture_last == 0) cb.capture_last = -1;
   1725       cb.callout_data     = md->callout_data;
   1726       cb.mark             = md->nomatch_mark;
   1727       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
   1728       if (rrc < 0) RRETURN(rrc);
   1729       }
   1730     ecode += 2 + 2*LINK_SIZE;
   1731     break;
   1732 
   1733     /* Recursion either matches the current regex, or some subexpression. The
   1734     offset data is the offset to the starting bracket from the start of the
   1735     whole pattern. (This is so that it works from duplicated subpatterns.)
   1736 
   1737     The state of the capturing groups is preserved over recursion, and
   1738     re-instated afterwards. We don't know how many are started and not yet
   1739     finished (offset_top records the completed total) so we just have to save
   1740     all the potential data. There may be up to 65535 such values, which is too
   1741     large to put on the stack, but using malloc for small numbers seems
   1742     expensive. As a compromise, the stack is used when there are no more than
   1743     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
   1744 
   1745     There are also other values that have to be saved. We use a chained
   1746     sequence of blocks that actually live on the stack. Thanks to Robin Houston
   1747     for the original version of this logic. It has, however, been hacked around
   1748     a lot, so he is not to blame for the current way it works. */
   1749 
   1750     case OP_RECURSE:
   1751       {
   1752       recursion_info *ri;
   1753       unsigned int recno;
   1754 
   1755       callpat = md->start_code + GET(ecode, 1);
   1756       recno = (callpat == md->start_code)? 0 :
   1757         GET2(callpat, 1 + LINK_SIZE);
   1758 
   1759       /* Check for repeating a recursion without advancing the subject pointer.
   1760       This should catch convoluted mutual recursions. (Some simple cases are
   1761       caught at compile time.) */
   1762 
   1763       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
   1764         if (recno == ri->group_num && eptr == ri->subject_position)
   1765           RRETURN(PCRE_ERROR_RECURSELOOP);
   1766 
   1767       /* Add to "recursing stack" */
   1768 
   1769       new_recursive.group_num = recno;
   1770       new_recursive.saved_capture_last = md->capture_last;
   1771       new_recursive.subject_position = eptr;
   1772       new_recursive.prevrec = md->recursive;
   1773       md->recursive = &new_recursive;
   1774 
   1775       /* Where to continue from afterwards */
   1776 
   1777       ecode += 1 + LINK_SIZE;
   1778 
   1779       /* Now save the offset data */
   1780 
   1781       new_recursive.saved_max = md->offset_end;
   1782       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
   1783         new_recursive.offset_save = stacksave;
   1784       else
   1785         {
   1786         new_recursive.offset_save =
   1787           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
   1788         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
   1789         }
   1790       memcpy(new_recursive.offset_save, md->offset_vector,
   1791             new_recursive.saved_max * sizeof(int));
   1792 
   1793       /* OK, now we can do the recursion. After processing each alternative,
   1794       restore the offset data and the last captured value. If there were nested
   1795       recursions, md->recursive might be changed, so reset it before looping.
   1796       */
   1797 
   1798       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
   1799       cbegroup = (*callpat >= OP_SBRA);
   1800       do
   1801         {
   1802         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
   1803         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
   1804           md, eptrb, RM6);
   1805         memcpy(md->offset_vector, new_recursive.offset_save,
   1806             new_recursive.saved_max * sizeof(int));
   1807         md->capture_last = new_recursive.saved_capture_last;
   1808         md->recursive = new_recursive.prevrec;
   1809         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1810           {
   1811           DPRINTF(("Recursion matched\n"));
   1812           if (new_recursive.offset_save != stacksave)
   1813             (PUBL(free))(new_recursive.offset_save);
   1814 
   1815           /* Set where we got to in the subject, and reset the start in case
   1816           it was changed by \K. This *is* propagated back out of a recursion,
   1817           for Perl compatibility. */
   1818 
   1819           eptr = md->end_match_ptr;
   1820           mstart = md->start_match_ptr;
   1821           goto RECURSION_MATCHED;        /* Exit loop; end processing */
   1822           }
   1823 
   1824         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
   1825         recursion; they cause a NOMATCH for the entire recursion. These codes
   1826         are defined in a range that can be tested for. */
   1827 
   1828         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
   1829           RRETURN(MATCH_NOMATCH);
   1830 
   1831         /* Any return code other than NOMATCH is an error. */
   1832 
   1833         if (rrc != MATCH_NOMATCH)
   1834           {
   1835           DPRINTF(("Recursion gave error %d\n", rrc));
   1836           if (new_recursive.offset_save != stacksave)
   1837             (PUBL(free))(new_recursive.offset_save);
   1838           RRETURN(rrc);
   1839           }
   1840 
   1841         md->recursive = &new_recursive;
   1842         callpat += GET(callpat, 1);
   1843         }
   1844       while (*callpat == OP_ALT);
   1845 
   1846       DPRINTF(("Recursion didn't match\n"));
   1847       md->recursive = new_recursive.prevrec;
   1848       if (new_recursive.offset_save != stacksave)
   1849         (PUBL(free))(new_recursive.offset_save);
   1850       RRETURN(MATCH_NOMATCH);
   1851       }
   1852 
   1853     RECURSION_MATCHED:
   1854     break;
   1855 
   1856     /* An alternation is the end of a branch; scan along to find the end of the
   1857     bracketed group and go to there. */
   1858 
   1859     case OP_ALT:
   1860     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1861     break;
   1862 
   1863     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
   1864     indicating that it may occur zero times. It may repeat infinitely, or not
   1865     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
   1866     with fixed upper repeat limits are compiled as a number of copies, with the
   1867     optional ones preceded by BRAZERO or BRAMINZERO. */
   1868 
   1869     case OP_BRAZERO:
   1870     next = ecode + 1;
   1871     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
   1872     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1873     do next += GET(next, 1); while (*next == OP_ALT);
   1874     ecode = next + 1 + LINK_SIZE;
   1875     break;
   1876 
   1877     case OP_BRAMINZERO:
   1878     next = ecode + 1;
   1879     do next += GET(next, 1); while (*next == OP_ALT);
   1880     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
   1881     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1882     ecode++;
   1883     break;
   1884 
   1885     case OP_SKIPZERO:
   1886     next = ecode+1;
   1887     do next += GET(next,1); while (*next == OP_ALT);
   1888     ecode = next + 1 + LINK_SIZE;
   1889     break;
   1890 
   1891     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
   1892     here; just jump to the group, with allow_zero set TRUE. */
   1893 
   1894     case OP_BRAPOSZERO:
   1895     op = *(++ecode);
   1896     allow_zero = TRUE;
   1897     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
   1898       goto POSSESSIVE_NON_CAPTURE;
   1899 
   1900     /* End of a group, repeated or non-repeating. */
   1901 
   1902     case OP_KET:
   1903     case OP_KETRMIN:
   1904     case OP_KETRMAX:
   1905     case OP_KETRPOS:
   1906     prev = ecode - GET(ecode, 1);
   1907 
   1908     /* If this was a group that remembered the subject start, in order to break
   1909     infinite repeats of empty string matches, retrieve the subject start from
   1910     the chain. Otherwise, set it NULL. */
   1911 
   1912     if (*prev >= OP_SBRA || *prev == OP_ONCE)
   1913       {
   1914       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
   1915       eptrb = eptrb->epb_prev;              /* Backup to previous group */
   1916       }
   1917     else saved_eptr = NULL;
   1918 
   1919     /* If we are at the end of an assertion group or a non-capturing atomic
   1920     group, stop matching and return MATCH_MATCH, but record the current high
   1921     water mark for use by positive assertions. We also need to record the match
   1922     start in case it was changed by \K. */
   1923 
   1924     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
   1925          *prev == OP_ONCE_NC)
   1926       {
   1927       md->end_match_ptr = eptr;      /* For ONCE_NC */
   1928       md->end_offset_top = offset_top;
   1929       md->start_match_ptr = mstart;
   1930       RRETURN(MATCH_MATCH);         /* Sets md->mark */
   1931       }
   1932 
   1933     /* For capturing groups we have to check the group number back at the start
   1934     and if necessary complete handling an extraction by setting the offsets and
   1935     bumping the high water mark. Whole-pattern recursion is coded as a recurse
   1936     into group 0, so it won't be picked up here. Instead, we catch it when the
   1937     OP_END is reached. Other recursion is handled here. We just have to record
   1938     the current subject position and start match pointer and give a MATCH
   1939     return. */
   1940 
   1941     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
   1942         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
   1943       {
   1944       number = GET2(prev, 1+LINK_SIZE);
   1945       offset = number << 1;
   1946 
   1947 #ifdef PCRE_DEBUG
   1948       printf("end bracket %d", number);
   1949       printf("\n");
   1950 #endif
   1951 
   1952       /* Handle a recursively called group. */
   1953 
   1954       if (md->recursive != NULL && md->recursive->group_num == number)
   1955         {
   1956         md->end_match_ptr = eptr;
   1957         md->start_match_ptr = mstart;
   1958         RRETURN(MATCH_MATCH);
   1959         }
   1960 
   1961       /* Deal with capturing */
   1962 
   1963       md->capture_last = (md->capture_last & OVFLMASK) | number;
   1964       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
   1965         {
   1966         /* If offset is greater than offset_top, it means that we are
   1967         "skipping" a capturing group, and that group's offsets must be marked
   1968         unset. In earlier versions of PCRE, all the offsets were unset at the
   1969         start of matching, but this doesn't work because atomic groups and
   1970         assertions can cause a value to be set that should later be unset.
   1971         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
   1972         part of the atomic group, but this is not on the final matching path,
   1973         so must be unset when 2 is set. (If there is no group 2, there is no
   1974         problem, because offset_top will then be 2, indicating no capture.) */
   1975 
   1976         if (offset > offset_top)
   1977           {
   1978           register int *iptr = md->offset_vector + offset_top;
   1979           register int *iend = md->offset_vector + offset;
   1980           while (iptr < iend) *iptr++ = -1;
   1981           }
   1982 
   1983         /* Now make the extraction */
   1984 
   1985         md->offset_vector[offset] =
   1986           md->offset_vector[md->offset_end - number];
   1987         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
   1988         if (offset_top <= offset) offset_top = offset + 2;
   1989         }
   1990       }
   1991 
   1992     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
   1993     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
   1994     at a time from the outer level, thus saving stack. This must precede the
   1995     empty string test - in this case that test is done at the outer level. */
   1996 
   1997     if (*ecode == OP_KETRPOS)
   1998       {
   1999       md->start_match_ptr = mstart;    /* In case \K reset it */
   2000       md->end_match_ptr = eptr;
   2001       md->end_offset_top = offset_top;
   2002       RRETURN(MATCH_KETRPOS);
   2003       }
   2004 
   2005     /* For an ordinary non-repeating ket, just continue at this level. This
   2006     also happens for a repeating ket if no characters were matched in the
   2007     group. This is the forcible breaking of infinite loops as implemented in
   2008     Perl 5.005. For a non-repeating atomic group that includes captures,
   2009     establish a backup point by processing the rest of the pattern at a lower
   2010     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
   2011     original OP_ONCE level, thereby bypassing intermediate backup points, but
   2012     resetting any captures that happened along the way. */
   2013 
   2014     if (*ecode == OP_KET || eptr == saved_eptr)
   2015       {
   2016       if (*prev == OP_ONCE)
   2017         {
   2018         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
   2019         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2020         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
   2021         RRETURN(MATCH_ONCE);
   2022         }
   2023       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
   2024       break;
   2025       }
   2026 
   2027     /* The normal repeating kets try the rest of the pattern or restart from
   2028     the preceding bracket, in the appropriate order. In the second case, we can
   2029     use tail recursion to avoid using another stack frame, unless we have an
   2030     an atomic group or an unlimited repeat of a group that can match an empty
   2031     string. */
   2032 
   2033     if (*ecode == OP_KETRMIN)
   2034       {
   2035       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
   2036       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2037       if (*prev == OP_ONCE)
   2038         {
   2039         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
   2040         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2041         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
   2042         RRETURN(MATCH_ONCE);
   2043         }
   2044       if (*prev >= OP_SBRA)    /* Could match an empty string */
   2045         {
   2046         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
   2047         RRETURN(rrc);
   2048         }
   2049       ecode = prev;
   2050       goto TAIL_RECURSE;
   2051       }
   2052     else  /* OP_KETRMAX */
   2053       {
   2054       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
   2055       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
   2056       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2057       if (*prev == OP_ONCE)
   2058         {
   2059         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
   2060         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2061         md->once_target = prev;
   2062         RRETURN(MATCH_ONCE);
   2063         }
   2064       ecode += 1 + LINK_SIZE;
   2065       goto TAIL_RECURSE;
   2066       }
   2067     /* Control never gets here */
   2068 
   2069     /* Not multiline mode: start of subject assertion, unless notbol. */
   2070 
   2071     case OP_CIRC:
   2072     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
   2073 
   2074     /* Start of subject assertion */
   2075 
   2076     case OP_SOD:
   2077     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
   2078     ecode++;
   2079     break;
   2080 
   2081     /* Multiline mode: start of subject unless notbol, or after any newline. */
   2082 
   2083     case OP_CIRCM:
   2084     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
   2085     if (eptr != md->start_subject &&
   2086         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
   2087       RRETURN(MATCH_NOMATCH);
   2088     ecode++;
   2089     break;
   2090 
   2091     /* Start of match assertion */
   2092 
   2093     case OP_SOM:
   2094     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
   2095     ecode++;
   2096     break;
   2097 
   2098     /* Reset the start of match point */
   2099 
   2100     case OP_SET_SOM:
   2101     mstart = eptr;
   2102     ecode++;
   2103     break;
   2104 
   2105     /* Multiline mode: assert before any newline, or before end of subject
   2106     unless noteol is set. */
   2107 
   2108     case OP_DOLLM:
   2109     if (eptr < md->end_subject)
   2110       {
   2111       if (!IS_NEWLINE(eptr))
   2112         {
   2113         if (md->partial != 0 &&
   2114             eptr + 1 >= md->end_subject &&
   2115             NLBLOCK->nltype == NLTYPE_FIXED &&
   2116             NLBLOCK->nllen == 2 &&
   2117             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2118           {
   2119           md->hitend = TRUE;
   2120           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2121           }
   2122         RRETURN(MATCH_NOMATCH);
   2123         }
   2124       }
   2125     else
   2126       {
   2127       if (md->noteol) RRETURN(MATCH_NOMATCH);
   2128       SCHECK_PARTIAL();
   2129       }
   2130     ecode++;
   2131     break;
   2132 
   2133     /* Not multiline mode: assert before a terminating newline or before end of
   2134     subject unless noteol is set. */
   2135 
   2136     case OP_DOLL:
   2137     if (md->noteol) RRETURN(MATCH_NOMATCH);
   2138     if (!md->endonly) goto ASSERT_NL_OR_EOS;
   2139 
   2140     /* ... else fall through for endonly */
   2141 
   2142     /* End of subject assertion (\z) */
   2143 
   2144     case OP_EOD:
   2145     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
   2146     SCHECK_PARTIAL();
   2147     ecode++;
   2148     break;
   2149 
   2150     /* End of subject or ending \n assertion (\Z) */
   2151 
   2152     case OP_EODN:
   2153     ASSERT_NL_OR_EOS:
   2154     if (eptr < md->end_subject &&
   2155         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
   2156       {
   2157       if (md->partial != 0 &&
   2158           eptr + 1 >= md->end_subject &&
   2159           NLBLOCK->nltype == NLTYPE_FIXED &&
   2160           NLBLOCK->nllen == 2 &&
   2161           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2162         {
   2163         md->hitend = TRUE;
   2164         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2165         }
   2166       RRETURN(MATCH_NOMATCH);
   2167       }
   2168 
   2169     /* Either at end of string or \n before end. */
   2170 
   2171     SCHECK_PARTIAL();
   2172     ecode++;
   2173     break;
   2174 
   2175     /* Word boundary assertions */
   2176 
   2177     case OP_NOT_WORD_BOUNDARY:
   2178     case OP_WORD_BOUNDARY:
   2179       {
   2180 
   2181       /* Find out if the previous and current characters are "word" characters.
   2182       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
   2183       be "non-word" characters. Remember the earliest consulted character for
   2184       partial matching. */
   2185 
   2186 #ifdef SUPPORT_UTF
   2187       if (utf)
   2188         {
   2189         /* Get status of previous character */
   2190 
   2191         if (eptr == md->start_subject) prev_is_word = FALSE; else
   2192           {
   2193           PCRE_PUCHAR lastptr = eptr - 1;
   2194           BACKCHAR(lastptr);
   2195           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
   2196           GETCHAR(c, lastptr);
   2197 #ifdef SUPPORT_UCP
   2198           if (md->use_ucp)
   2199             {
   2200             if (c == '_') prev_is_word = TRUE; else
   2201               {
   2202               int cat = UCD_CATEGORY(c);
   2203               prev_is_word = (cat == ucp_L || cat == ucp_N);
   2204               }
   2205             }
   2206           else
   2207 #endif
   2208           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
   2209           }
   2210 
   2211         /* Get status of next character */
   2212 
   2213         if (eptr >= md->end_subject)
   2214           {
   2215           SCHECK_PARTIAL();
   2216           cur_is_word = FALSE;
   2217           }
   2218         else
   2219           {
   2220           GETCHAR(c, eptr);
   2221 #ifdef SUPPORT_UCP
   2222           if (md->use_ucp)
   2223             {
   2224             if (c == '_') cur_is_word = TRUE; else
   2225               {
   2226               int cat = UCD_CATEGORY(c);
   2227               cur_is_word = (cat == ucp_L || cat == ucp_N);
   2228               }
   2229             }
   2230           else
   2231 #endif
   2232           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
   2233           }
   2234         }
   2235       else
   2236 #endif
   2237 
   2238       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
   2239       consistency with the behaviour of \w we do use it in this case. */
   2240 
   2241         {
   2242         /* Get status of previous character */
   2243 
   2244         if (eptr == md->start_subject) prev_is_word = FALSE; else
   2245           {
   2246           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
   2247 #ifdef SUPPORT_UCP
   2248           if (md->use_ucp)
   2249             {
   2250             c = eptr[-1];
   2251             if (c == '_') prev_is_word = TRUE; else
   2252               {
   2253               int cat = UCD_CATEGORY(c);
   2254               prev_is_word = (cat == ucp_L || cat == ucp_N);
   2255               }
   2256             }
   2257           else
   2258 #endif
   2259           prev_is_word = MAX_255(eptr[-1])
   2260             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
   2261           }
   2262 
   2263         /* Get status of next character */
   2264 
   2265         if (eptr >= md->end_subject)
   2266           {
   2267           SCHECK_PARTIAL();
   2268           cur_is_word = FALSE;
   2269           }
   2270         else
   2271 #ifdef SUPPORT_UCP
   2272         if (md->use_ucp)
   2273           {
   2274           c = *eptr;
   2275           if (c == '_') cur_is_word = TRUE; else
   2276             {
   2277             int cat = UCD_CATEGORY(c);
   2278             cur_is_word = (cat == ucp_L || cat == ucp_N);
   2279             }
   2280           }
   2281         else
   2282 #endif
   2283         cur_is_word = MAX_255(*eptr)
   2284           && ((md->ctypes[*eptr] & ctype_word) != 0);
   2285         }
   2286 
   2287       /* Now see if the situation is what we want */
   2288 
   2289       if ((*ecode++ == OP_WORD_BOUNDARY)?
   2290            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
   2291         RRETURN(MATCH_NOMATCH);
   2292       }
   2293     break;
   2294 
   2295     /* Match any single character type except newline; have to take care with
   2296     CRLF newlines and partial matching. */
   2297 
   2298     case OP_ANY:
   2299     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   2300     if (md->partial != 0 &&
   2301         eptr + 1 >= md->end_subject &&
   2302         NLBLOCK->nltype == NLTYPE_FIXED &&
   2303         NLBLOCK->nllen == 2 &&
   2304         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
   2305       {
   2306       md->hitend = TRUE;
   2307       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2308       }
   2309 
   2310     /* Fall through */
   2311 
   2312     /* Match any single character whatsoever. */
   2313 
   2314     case OP_ALLANY:
   2315     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
   2316       {                            /* not be updated before SCHECK_PARTIAL. */
   2317       SCHECK_PARTIAL();
   2318       RRETURN(MATCH_NOMATCH);
   2319       }
   2320     eptr++;
   2321 #ifdef SUPPORT_UTF
   2322     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   2323 #endif
   2324     ecode++;
   2325     break;
   2326 
   2327     /* Match a single byte, even in UTF-8 mode. This opcode really does match
   2328     any byte, even newline, independent of the setting of PCRE_DOTALL. */
   2329 
   2330     case OP_ANYBYTE:
   2331     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
   2332       {                            /* not be updated before SCHECK_PARTIAL. */
   2333       SCHECK_PARTIAL();
   2334       RRETURN(MATCH_NOMATCH);
   2335       }
   2336     eptr++;
   2337     ecode++;
   2338     break;
   2339 
   2340     case OP_NOT_DIGIT:
   2341     if (eptr >= md->end_subject)
   2342       {
   2343       SCHECK_PARTIAL();
   2344       RRETURN(MATCH_NOMATCH);
   2345       }
   2346     GETCHARINCTEST(c, eptr);
   2347     if (
   2348 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2349        c < 256 &&
   2350 #endif
   2351        (md->ctypes[c] & ctype_digit) != 0
   2352        )
   2353       RRETURN(MATCH_NOMATCH);
   2354     ecode++;
   2355     break;
   2356 
   2357     case OP_DIGIT:
   2358     if (eptr >= md->end_subject)
   2359       {
   2360       SCHECK_PARTIAL();
   2361       RRETURN(MATCH_NOMATCH);
   2362       }
   2363     GETCHARINCTEST(c, eptr);
   2364     if (
   2365 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2366        c > 255 ||
   2367 #endif
   2368        (md->ctypes[c] & ctype_digit) == 0
   2369        )
   2370       RRETURN(MATCH_NOMATCH);
   2371     ecode++;
   2372     break;
   2373 
   2374     case OP_NOT_WHITESPACE:
   2375     if (eptr >= md->end_subject)
   2376       {
   2377       SCHECK_PARTIAL();
   2378       RRETURN(MATCH_NOMATCH);
   2379       }
   2380     GETCHARINCTEST(c, eptr);
   2381     if (
   2382 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2383        c < 256 &&
   2384 #endif
   2385        (md->ctypes[c] & ctype_space) != 0
   2386        )
   2387       RRETURN(MATCH_NOMATCH);
   2388     ecode++;
   2389     break;
   2390 
   2391     case OP_WHITESPACE:
   2392     if (eptr >= md->end_subject)
   2393       {
   2394       SCHECK_PARTIAL();
   2395       RRETURN(MATCH_NOMATCH);
   2396       }
   2397     GETCHARINCTEST(c, eptr);
   2398     if (
   2399 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2400        c > 255 ||
   2401 #endif
   2402        (md->ctypes[c] & ctype_space) == 0
   2403        )
   2404       RRETURN(MATCH_NOMATCH);
   2405     ecode++;
   2406     break;
   2407 
   2408     case OP_NOT_WORDCHAR:
   2409     if (eptr >= md->end_subject)
   2410       {
   2411       SCHECK_PARTIAL();
   2412       RRETURN(MATCH_NOMATCH);
   2413       }
   2414     GETCHARINCTEST(c, eptr);
   2415     if (
   2416 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2417        c < 256 &&
   2418 #endif
   2419        (md->ctypes[c] & ctype_word) != 0
   2420        )
   2421       RRETURN(MATCH_NOMATCH);
   2422     ecode++;
   2423     break;
   2424 
   2425     case OP_WORDCHAR:
   2426     if (eptr >= md->end_subject)
   2427       {
   2428       SCHECK_PARTIAL();
   2429       RRETURN(MATCH_NOMATCH);
   2430       }
   2431     GETCHARINCTEST(c, eptr);
   2432     if (
   2433 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
   2434        c > 255 ||
   2435 #endif
   2436        (md->ctypes[c] & ctype_word) == 0
   2437        )
   2438       RRETURN(MATCH_NOMATCH);
   2439     ecode++;
   2440     break;
   2441 
   2442     case OP_ANYNL:
   2443     if (eptr >= md->end_subject)
   2444       {
   2445       SCHECK_PARTIAL();
   2446       RRETURN(MATCH_NOMATCH);
   2447       }
   2448     GETCHARINCTEST(c, eptr);
   2449     switch(c)
   2450       {
   2451       default: RRETURN(MATCH_NOMATCH);
   2452 
   2453       case CHAR_CR:
   2454       if (eptr >= md->end_subject)
   2455         {
   2456         SCHECK_PARTIAL();
   2457         }
   2458       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
   2459       break;
   2460 
   2461       case CHAR_LF:
   2462       break;
   2463 
   2464       case CHAR_VT:
   2465       case CHAR_FF:
   2466       case CHAR_NEL:
   2467 #ifndef EBCDIC
   2468       case 0x2028:
   2469       case 0x2029:
   2470 #endif  /* Not EBCDIC */
   2471       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   2472       break;
   2473       }
   2474     ecode++;
   2475     break;
   2476 
   2477     case OP_NOT_HSPACE:
   2478     if (eptr >= md->end_subject)
   2479       {
   2480       SCHECK_PARTIAL();
   2481       RRETURN(MATCH_NOMATCH);
   2482       }
   2483     GETCHARINCTEST(c, eptr);
   2484     switch(c)
   2485       {
   2486       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
   2487       default: break;
   2488       }
   2489     ecode++;
   2490     break;
   2491 
   2492     case OP_HSPACE:
   2493     if (eptr >= md->end_subject)
   2494       {
   2495       SCHECK_PARTIAL();
   2496       RRETURN(MATCH_NOMATCH);
   2497       }
   2498     GETCHARINCTEST(c, eptr);
   2499     switch(c)
   2500       {
   2501       HSPACE_CASES: break;  /* Byte and multibyte cases */
   2502       default: RRETURN(MATCH_NOMATCH);
   2503       }
   2504     ecode++;
   2505     break;
   2506 
   2507     case OP_NOT_VSPACE:
   2508     if (eptr >= md->end_subject)
   2509       {
   2510       SCHECK_PARTIAL();
   2511       RRETURN(MATCH_NOMATCH);
   2512       }
   2513     GETCHARINCTEST(c, eptr);
   2514     switch(c)
   2515       {
   2516       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   2517       default: break;
   2518       }
   2519     ecode++;
   2520     break;
   2521 
   2522     case OP_VSPACE:
   2523     if (eptr >= md->end_subject)
   2524       {
   2525       SCHECK_PARTIAL();
   2526       RRETURN(MATCH_NOMATCH);
   2527       }
   2528     GETCHARINCTEST(c, eptr);
   2529     switch(c)
   2530       {
   2531       VSPACE_CASES: break;
   2532       default: RRETURN(MATCH_NOMATCH);
   2533       }
   2534     ecode++;
   2535     break;
   2536 
   2537 #ifdef SUPPORT_UCP
   2538     /* Check the next character by Unicode property. We will get here only
   2539     if the support is in the binary; otherwise a compile-time error occurs. */
   2540 
   2541     case OP_PROP:
   2542     case OP_NOTPROP:
   2543     if (eptr >= md->end_subject)
   2544       {
   2545       SCHECK_PARTIAL();
   2546       RRETURN(MATCH_NOMATCH);
   2547       }
   2548     GETCHARINCTEST(c, eptr);
   2549       {
   2550       const pcre_uint32 *cp;
   2551       const ucd_record *prop = GET_UCD(c);
   2552 
   2553       switch(ecode[1])
   2554         {
   2555         case PT_ANY:
   2556         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
   2557         break;
   2558 
   2559         case PT_LAMP:
   2560         if ((prop->chartype == ucp_Lu ||
   2561              prop->chartype == ucp_Ll ||
   2562              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
   2563           RRETURN(MATCH_NOMATCH);
   2564         break;
   2565 
   2566         case PT_GC:
   2567         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
   2568           RRETURN(MATCH_NOMATCH);
   2569         break;
   2570 
   2571         case PT_PC:
   2572         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
   2573           RRETURN(MATCH_NOMATCH);
   2574         break;
   2575 
   2576         case PT_SC:
   2577         if ((ecode[2] != prop->script) == (op == OP_PROP))
   2578           RRETURN(MATCH_NOMATCH);
   2579         break;
   2580 
   2581         /* These are specials */
   2582 
   2583         case PT_ALNUM:
   2584         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   2585              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
   2586           RRETURN(MATCH_NOMATCH);
   2587         break;
   2588 
   2589         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   2590         which means that Perl space and POSIX space are now identical. PCRE
   2591         was changed at release 8.34. */
   2592 
   2593         case PT_SPACE:    /* Perl space */
   2594         case PT_PXSPACE:  /* POSIX space */
   2595         switch(c)
   2596           {
   2597           HSPACE_CASES:
   2598           VSPACE_CASES:
   2599           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
   2600           break;
   2601 
   2602           default:
   2603           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
   2604             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
   2605           break;
   2606           }
   2607         break;
   2608 
   2609         case PT_WORD:
   2610         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   2611              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   2612              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
   2613           RRETURN(MATCH_NOMATCH);
   2614         break;
   2615 
   2616         case PT_CLIST:
   2617         cp = PRIV(ucd_caseless_sets) + ecode[2];
   2618         for (;;)
   2619           {
   2620           if (c < *cp)
   2621             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
   2622           if (c == *cp++)
   2623             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
   2624           }
   2625         break;
   2626 
   2627         case PT_UCNC:
   2628         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   2629              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   2630              c >= 0xe000) == (op == OP_NOTPROP))
   2631           RRETURN(MATCH_NOMATCH);
   2632         break;
   2633 
   2634         /* This should never occur */
   2635 
   2636         default:
   2637         RRETURN(PCRE_ERROR_INTERNAL);
   2638         }
   2639 
   2640       ecode += 3;
   2641       }
   2642     break;
   2643 
   2644     /* Match an extended Unicode sequence. We will get here only if the support
   2645     is in the binary; otherwise a compile-time error occurs. */
   2646 
   2647     case OP_EXTUNI:
   2648     if (eptr >= md->end_subject)
   2649       {
   2650       SCHECK_PARTIAL();
   2651       RRETURN(MATCH_NOMATCH);
   2652       }
   2653     else
   2654       {
   2655       int lgb, rgb;
   2656       GETCHARINCTEST(c, eptr);
   2657       lgb = UCD_GRAPHBREAK(c);
   2658       while (eptr < md->end_subject)
   2659         {
   2660         int len = 1;
   2661         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   2662         rgb = UCD_GRAPHBREAK(c);
   2663         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   2664         lgb = rgb;
   2665         eptr += len;
   2666         }
   2667       }
   2668     CHECK_PARTIAL();
   2669     ecode++;
   2670     break;
   2671 #endif  /* SUPPORT_UCP */
   2672 
   2673 
   2674     /* Match a back reference, possibly repeatedly. Look past the end of the
   2675     item to see if there is repeat information following. The code is similar
   2676     to that for character classes, but repeated for efficiency. Then obey
   2677     similar code to character type repeats - written out again for speed.
   2678     However, if the referenced string is the empty string, always treat
   2679     it as matched, any number of times (otherwise there could be infinite
   2680     loops). If the reference is unset, there are two possibilities:
   2681 
   2682     (a) In the default, Perl-compatible state, set the length negative;
   2683     this ensures that every attempt at a match fails. We can't just fail
   2684     here, because of the possibility of quantifiers with zero minima.
   2685 
   2686     (b) If the JavaScript compatibility flag is set, set the length to zero
   2687     so that the back reference matches an empty string.
   2688 
   2689     Otherwise, set the length to the length of what was matched by the
   2690     referenced subpattern.
   2691 
   2692     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
   2693     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
   2694     and OP_DNREFI are used. In this case we must scan the list of groups to
   2695     which the name refers, and use the first one that is set. */
   2696 
   2697     case OP_DNREF:
   2698     case OP_DNREFI:
   2699     caseless = op == OP_DNREFI;
   2700       {
   2701       int count = GET2(ecode, 1+IMM2_SIZE);
   2702       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
   2703       ecode += 1 + 2*IMM2_SIZE;
   2704 
   2705       /* Setting the default length first and initializing 'offset' avoids
   2706       compiler warnings in the REF_REPEAT code. */
   2707 
   2708       length = (md->jscript_compat)? 0 : -1;
   2709       offset = 0;
   2710 
   2711       while (count-- > 0)
   2712         {
   2713         offset = GET2(slot, 0) << 1;
   2714         if (offset < offset_top && md->offset_vector[offset] >= 0)
   2715           {
   2716           length = md->offset_vector[offset+1] - md->offset_vector[offset];
   2717           break;
   2718           }
   2719         slot += md->name_entry_size;
   2720         }
   2721       }
   2722     goto REF_REPEAT;
   2723 
   2724     case OP_REF:
   2725     case OP_REFI:
   2726     caseless = op == OP_REFI;
   2727     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
   2728     ecode += 1 + IMM2_SIZE;
   2729     if (offset >= offset_top || md->offset_vector[offset] < 0)
   2730       length = (md->jscript_compat)? 0 : -1;
   2731     else
   2732       length = md->offset_vector[offset+1] - md->offset_vector[offset];
   2733 
   2734     /* Set up for repetition, or handle the non-repeated case */
   2735 
   2736     REF_REPEAT:
   2737     switch (*ecode)
   2738       {
   2739       case OP_CRSTAR:
   2740       case OP_CRMINSTAR:
   2741       case OP_CRPLUS:
   2742       case OP_CRMINPLUS:
   2743       case OP_CRQUERY:
   2744       case OP_CRMINQUERY:
   2745       c = *ecode++ - OP_CRSTAR;
   2746       minimize = (c & 1) != 0;
   2747       min = rep_min[c];                 /* Pick up values from tables; */
   2748       max = rep_max[c];                 /* zero for max => infinity */
   2749       if (max == 0) max = INT_MAX;
   2750       break;
   2751 
   2752       case OP_CRRANGE:
   2753       case OP_CRMINRANGE:
   2754       minimize = (*ecode == OP_CRMINRANGE);
   2755       min = GET2(ecode, 1);
   2756       max = GET2(ecode, 1 + IMM2_SIZE);
   2757       if (max == 0) max = INT_MAX;
   2758       ecode += 1 + 2 * IMM2_SIZE;
   2759       break;
   2760 
   2761       default:               /* No repeat follows */
   2762       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
   2763         {
   2764         if (length == -2) eptr = md->end_subject;   /* Partial match */
   2765         CHECK_PARTIAL();
   2766         RRETURN(MATCH_NOMATCH);
   2767         }
   2768       eptr += length;
   2769       continue;              /* With the main loop */
   2770       }
   2771 
   2772     /* Handle repeated back references. If the length of the reference is
   2773     zero, just continue with the main loop. If the length is negative, it
   2774     means the reference is unset in non-Java-compatible mode. If the minimum is
   2775     zero, we can continue at the same level without recursion. For any other
   2776     minimum, carrying on will result in NOMATCH. */
   2777 
   2778     if (length == 0) continue;
   2779     if (length < 0 && min == 0) continue;
   2780 
   2781     /* First, ensure the minimum number of matches are present. We get back
   2782     the length of the reference string explicitly rather than passing the
   2783     address of eptr, so that eptr can be a register variable. */
   2784 
   2785     for (i = 1; i <= min; i++)
   2786       {
   2787       int slength;
   2788       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
   2789         {
   2790         if (slength == -2) eptr = md->end_subject;   /* Partial match */
   2791         CHECK_PARTIAL();
   2792         RRETURN(MATCH_NOMATCH);
   2793         }
   2794       eptr += slength;
   2795       }
   2796 
   2797     /* If min = max, continue at the same level without recursion.
   2798     They are not both allowed to be zero. */
   2799 
   2800     if (min == max) continue;
   2801 
   2802     /* If minimizing, keep trying and advancing the pointer */
   2803 
   2804     if (minimize)
   2805       {
   2806       for (fi = min;; fi++)
   2807         {
   2808         int slength;
   2809         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
   2810         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2811         if (fi >= max) RRETURN(MATCH_NOMATCH);
   2812         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
   2813           {
   2814           if (slength == -2) eptr = md->end_subject;   /* Partial match */
   2815           CHECK_PARTIAL();
   2816           RRETURN(MATCH_NOMATCH);
   2817           }
   2818         eptr += slength;
   2819         }
   2820       /* Control never gets here */
   2821       }
   2822 
   2823     /* If maximizing, find the longest string and work backwards */
   2824 
   2825     else
   2826       {
   2827       pp = eptr;
   2828       for (i = min; i < max; i++)
   2829         {
   2830         int slength;
   2831         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
   2832           {
   2833           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
   2834           the soft partial matching case. */
   2835 
   2836           if (slength == -2 && md->partial != 0 &&
   2837               md->end_subject > md->start_used_ptr)
   2838             {
   2839             md->hitend = TRUE;
   2840             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   2841             }
   2842           break;
   2843           }
   2844         eptr += slength;
   2845         }
   2846 
   2847       while (eptr >= pp)
   2848         {
   2849         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
   2850         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2851         eptr -= length;
   2852         }
   2853       RRETURN(MATCH_NOMATCH);
   2854       }
   2855     /* Control never gets here */
   2856 
   2857     /* Match a bit-mapped character class, possibly repeatedly. This op code is
   2858     used when all the characters in the class have values in the range 0-255,
   2859     and either the matching is caseful, or the characters are in the range
   2860     0-127 when UTF-8 processing is enabled. The only difference between
   2861     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
   2862     encountered.
   2863 
   2864     First, look past the end of the item to see if there is repeat information
   2865     following. Then obey similar code to character type repeats - written out
   2866     again for speed. */
   2867 
   2868     case OP_NCLASS:
   2869     case OP_CLASS:
   2870       {
   2871       /* The data variable is saved across frames, so the byte map needs to
   2872       be stored there. */
   2873 #define BYTE_MAP ((pcre_uint8 *)data)
   2874       data = ecode + 1;                /* Save for matching */
   2875       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
   2876 
   2877       switch (*ecode)
   2878         {
   2879         case OP_CRSTAR:
   2880         case OP_CRMINSTAR:
   2881         case OP_CRPLUS:
   2882         case OP_CRMINPLUS:
   2883         case OP_CRQUERY:
   2884         case OP_CRMINQUERY:
   2885         case OP_CRPOSSTAR:
   2886         case OP_CRPOSPLUS:
   2887         case OP_CRPOSQUERY:
   2888         c = *ecode++ - OP_CRSTAR;
   2889         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
   2890         else possessive = TRUE;
   2891         min = rep_min[c];                 /* Pick up values from tables; */
   2892         max = rep_max[c];                 /* zero for max => infinity */
   2893         if (max == 0) max = INT_MAX;
   2894         break;
   2895 
   2896         case OP_CRRANGE:
   2897         case OP_CRMINRANGE:
   2898         case OP_CRPOSRANGE:
   2899         minimize = (*ecode == OP_CRMINRANGE);
   2900         possessive = (*ecode == OP_CRPOSRANGE);
   2901         min = GET2(ecode, 1);
   2902         max = GET2(ecode, 1 + IMM2_SIZE);
   2903         if (max == 0) max = INT_MAX;
   2904         ecode += 1 + 2 * IMM2_SIZE;
   2905         break;
   2906 
   2907         default:               /* No repeat follows */
   2908         min = max = 1;
   2909         break;
   2910         }
   2911 
   2912       /* First, ensure the minimum number of matches are present. */
   2913 
   2914 #ifdef SUPPORT_UTF
   2915       if (utf)
   2916         {
   2917         for (i = 1; i <= min; i++)
   2918           {
   2919           if (eptr >= md->end_subject)
   2920             {
   2921             SCHECK_PARTIAL();
   2922             RRETURN(MATCH_NOMATCH);
   2923             }
   2924           GETCHARINC(c, eptr);
   2925           if (c > 255)
   2926             {
   2927             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   2928             }
   2929           else
   2930             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   2931           }
   2932         }
   2933       else
   2934 #endif
   2935       /* Not UTF mode */
   2936         {
   2937         for (i = 1; i <= min; i++)
   2938           {
   2939           if (eptr >= md->end_subject)
   2940             {
   2941             SCHECK_PARTIAL();
   2942             RRETURN(MATCH_NOMATCH);
   2943             }
   2944           c = *eptr++;
   2945 #ifndef COMPILE_PCRE8
   2946           if (c > 255)
   2947             {
   2948             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   2949             }
   2950           else
   2951 #endif
   2952             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   2953           }
   2954         }
   2955 
   2956       /* If max == min we can continue with the main loop without the
   2957       need to recurse. */
   2958 
   2959       if (min == max) continue;
   2960 
   2961       /* If minimizing, keep testing the rest of the expression and advancing
   2962       the pointer while it matches the class. */
   2963 
   2964       if (minimize)
   2965         {
   2966 #ifdef SUPPORT_UTF
   2967         if (utf)
   2968           {
   2969           for (fi = min;; fi++)
   2970             {
   2971             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
   2972             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2973             if (fi >= max) RRETURN(MATCH_NOMATCH);
   2974             if (eptr >= md->end_subject)
   2975               {
   2976               SCHECK_PARTIAL();
   2977               RRETURN(MATCH_NOMATCH);
   2978               }
   2979             GETCHARINC(c, eptr);
   2980             if (c > 255)
   2981               {
   2982               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   2983               }
   2984             else
   2985               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   2986             }
   2987           }
   2988         else
   2989 #endif
   2990         /* Not UTF mode */
   2991           {
   2992           for (fi = min;; fi++)
   2993             {
   2994             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
   2995             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2996             if (fi >= max) RRETURN(MATCH_NOMATCH);
   2997             if (eptr >= md->end_subject)
   2998               {
   2999               SCHECK_PARTIAL();
   3000               RRETURN(MATCH_NOMATCH);
   3001               }
   3002             c = *eptr++;
   3003 #ifndef COMPILE_PCRE8
   3004             if (c > 255)
   3005               {
   3006               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
   3007               }
   3008             else
   3009 #endif
   3010               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
   3011             }
   3012           }
   3013         /* Control never gets here */
   3014         }
   3015 
   3016       /* If maximizing, find the longest possible run, then work backwards. */
   3017 
   3018       else
   3019         {
   3020         pp = eptr;
   3021 
   3022 #ifdef SUPPORT_UTF
   3023         if (utf)
   3024           {
   3025           for (i = min; i < max; i++)
   3026             {
   3027             int len = 1;
   3028             if (eptr >= md->end_subject)
   3029               {
   3030               SCHECK_PARTIAL();
   3031               break;
   3032               }
   3033             GETCHARLEN(c, eptr, len);
   3034             if (c > 255)
   3035               {
   3036               if (op == OP_CLASS) break;
   3037               }
   3038             else
   3039               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
   3040             eptr += len;
   3041             }
   3042 
   3043           if (possessive) continue;    /* No backtracking */
   3044 
   3045           for (;;)
   3046             {
   3047             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
   3048             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3049             if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3050             BACKCHAR(eptr);
   3051             }
   3052           }
   3053         else
   3054 #endif
   3055           /* Not UTF mode */
   3056           {
   3057           for (i = min; i < max; i++)
   3058             {
   3059             if (eptr >= md->end_subject)
   3060               {
   3061               SCHECK_PARTIAL();
   3062               break;
   3063               }
   3064             c = *eptr;
   3065 #ifndef COMPILE_PCRE8
   3066             if (c > 255)
   3067               {
   3068               if (op == OP_CLASS) break;
   3069               }
   3070             else
   3071 #endif
   3072               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
   3073             eptr++;
   3074             }
   3075 
   3076           if (possessive) continue;    /* No backtracking */
   3077 
   3078           while (eptr >= pp)
   3079             {
   3080             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
   3081             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3082             eptr--;
   3083             }
   3084           }
   3085 
   3086         RRETURN(MATCH_NOMATCH);
   3087         }
   3088 #undef BYTE_MAP
   3089       }
   3090     /* Control never gets here */
   3091 
   3092 
   3093     /* Match an extended character class. In the 8-bit library, this opcode is
   3094     encountered only when UTF-8 mode mode is supported. In the 16-bit and
   3095     32-bit libraries, codepoints greater than 255 may be encountered even when
   3096     UTF is not supported. */
   3097 
   3098 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   3099     case OP_XCLASS:
   3100       {
   3101       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
   3102       ecode += GET(ecode, 1);                      /* Advance past the item */
   3103 
   3104       switch (*ecode)
   3105         {
   3106         case OP_CRSTAR:
   3107         case OP_CRMINSTAR:
   3108         case OP_CRPLUS:
   3109         case OP_CRMINPLUS:
   3110         case OP_CRQUERY:
   3111         case OP_CRMINQUERY:
   3112         case OP_CRPOSSTAR:
   3113         case OP_CRPOSPLUS:
   3114         case OP_CRPOSQUERY:
   3115         c = *ecode++ - OP_CRSTAR;
   3116         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
   3117         else possessive = TRUE;
   3118         min = rep_min[c];                 /* Pick up values from tables; */
   3119         max = rep_max[c];                 /* zero for max => infinity */
   3120         if (max == 0) max = INT_MAX;
   3121         break;
   3122 
   3123         case OP_CRRANGE:
   3124         case OP_CRMINRANGE:
   3125         case OP_CRPOSRANGE:
   3126         minimize = (*ecode == OP_CRMINRANGE);
   3127         possessive = (*ecode == OP_CRPOSRANGE);
   3128         min = GET2(ecode, 1);
   3129         max = GET2(ecode, 1 + IMM2_SIZE);
   3130         if (max == 0) max = INT_MAX;
   3131         ecode += 1 + 2 * IMM2_SIZE;
   3132         break;
   3133 
   3134         default:               /* No repeat follows */
   3135         min = max = 1;
   3136         break;
   3137         }
   3138 
   3139       /* First, ensure the minimum number of matches are present. */
   3140 
   3141       for (i = 1; i <= min; i++)
   3142         {
   3143         if (eptr >= md->end_subject)
   3144           {
   3145           SCHECK_PARTIAL();
   3146           RRETURN(MATCH_NOMATCH);
   3147           }
   3148         GETCHARINCTEST(c, eptr);
   3149         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
   3150         }
   3151 
   3152       /* If max == min we can continue with the main loop without the
   3153       need to recurse. */
   3154 
   3155       if (min == max) continue;
   3156 
   3157       /* If minimizing, keep testing the rest of the expression and advancing
   3158       the pointer while it matches the class. */
   3159 
   3160       if (minimize)
   3161         {
   3162         for (fi = min;; fi++)
   3163           {
   3164           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
   3165           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3166           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3167           if (eptr >= md->end_subject)
   3168             {
   3169             SCHECK_PARTIAL();
   3170             RRETURN(MATCH_NOMATCH);
   3171             }
   3172           GETCHARINCTEST(c, eptr);
   3173           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
   3174           }
   3175         /* Control never gets here */
   3176         }
   3177 
   3178       /* If maximizing, find the longest possible run, then work backwards. */
   3179 
   3180       else
   3181         {
   3182         pp = eptr;
   3183         for (i = min; i < max; i++)
   3184           {
   3185           int len = 1;
   3186           if (eptr >= md->end_subject)
   3187             {
   3188             SCHECK_PARTIAL();
   3189             break;
   3190             }
   3191 #ifdef SUPPORT_UTF
   3192           GETCHARLENTEST(c, eptr, len);
   3193 #else
   3194           c = *eptr;
   3195 #endif
   3196           if (!PRIV(xclass)(c, data, utf)) break;
   3197           eptr += len;
   3198           }
   3199 
   3200         if (possessive) continue;    /* No backtracking */
   3201 
   3202         for(;;)
   3203           {
   3204           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
   3205           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3206           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3207 #ifdef SUPPORT_UTF
   3208           if (utf) BACKCHAR(eptr);
   3209 #endif
   3210           }
   3211         RRETURN(MATCH_NOMATCH);
   3212         }
   3213 
   3214       /* Control never gets here */
   3215       }
   3216 #endif    /* End of XCLASS */
   3217 
   3218     /* Match a single character, casefully */
   3219 
   3220     case OP_CHAR:
   3221 #ifdef SUPPORT_UTF
   3222     if (utf)
   3223       {
   3224       length = 1;
   3225       ecode++;
   3226       GETCHARLEN(fc, ecode, length);
   3227       if (length > md->end_subject - eptr)
   3228         {
   3229         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
   3230         RRETURN(MATCH_NOMATCH);
   3231         }
   3232       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
   3233       }
   3234     else
   3235 #endif
   3236     /* Not UTF mode */
   3237       {
   3238       if (md->end_subject - eptr < 1)
   3239         {
   3240         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
   3241         RRETURN(MATCH_NOMATCH);
   3242         }
   3243       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
   3244       ecode += 2;
   3245       }
   3246     break;
   3247 
   3248     /* Match a single character, caselessly. If we are at the end of the
   3249     subject, give up immediately. */
   3250 
   3251     case OP_CHARI:
   3252     if (eptr >= md->end_subject)
   3253       {
   3254       SCHECK_PARTIAL();
   3255       RRETURN(MATCH_NOMATCH);
   3256       }
   3257 
   3258 #ifdef SUPPORT_UTF
   3259     if (utf)
   3260       {
   3261       length = 1;
   3262       ecode++;
   3263       GETCHARLEN(fc, ecode, length);
   3264 
   3265       /* If the pattern character's value is < 128, we have only one byte, and
   3266       we know that its other case must also be one byte long, so we can use the
   3267       fast lookup table. We know that there is at least one byte left in the
   3268       subject. */
   3269 
   3270       if (fc < 128)
   3271         {
   3272         pcre_uint32 cc = UCHAR21(eptr);
   3273         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
   3274         ecode++;
   3275         eptr++;
   3276         }
   3277 
   3278       /* Otherwise we must pick up the subject character. Note that we cannot
   3279       use the value of "length" to check for sufficient bytes left, because the
   3280       other case of the character may have more or fewer bytes.  */
   3281 
   3282       else
   3283         {
   3284         pcre_uint32 dc;
   3285         GETCHARINC(dc, eptr);
   3286         ecode += length;
   3287 
   3288         /* If we have Unicode property support, we can use it to test the other
   3289         case of the character, if there is one. */
   3290 
   3291         if (fc != dc)
   3292           {
   3293 #ifdef SUPPORT_UCP
   3294           if (dc != UCD_OTHERCASE(fc))
   3295 #endif
   3296             RRETURN(MATCH_NOMATCH);
   3297           }
   3298         }
   3299       }
   3300     else
   3301 #endif   /* SUPPORT_UTF */
   3302 
   3303     /* Not UTF mode */
   3304       {
   3305       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
   3306           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
   3307       eptr++;
   3308       ecode += 2;
   3309       }
   3310     break;
   3311 
   3312     /* Match a single character repeatedly. */
   3313 
   3314     case OP_EXACT:
   3315     case OP_EXACTI:
   3316     min = max = GET2(ecode, 1);
   3317     ecode += 1 + IMM2_SIZE;
   3318     goto REPEATCHAR;
   3319 
   3320     case OP_POSUPTO:
   3321     case OP_POSUPTOI:
   3322     possessive = TRUE;
   3323     /* Fall through */
   3324 
   3325     case OP_UPTO:
   3326     case OP_UPTOI:
   3327     case OP_MINUPTO:
   3328     case OP_MINUPTOI:
   3329     min = 0;
   3330     max = GET2(ecode, 1);
   3331     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
   3332     ecode += 1 + IMM2_SIZE;
   3333     goto REPEATCHAR;
   3334 
   3335     case OP_POSSTAR:
   3336     case OP_POSSTARI:
   3337     possessive = TRUE;
   3338     min = 0;
   3339     max = INT_MAX;
   3340     ecode++;
   3341     goto REPEATCHAR;
   3342 
   3343     case OP_POSPLUS:
   3344     case OP_POSPLUSI:
   3345     possessive = TRUE;
   3346     min = 1;
   3347     max = INT_MAX;
   3348     ecode++;
   3349     goto REPEATCHAR;
   3350 
   3351     case OP_POSQUERY:
   3352     case OP_POSQUERYI:
   3353     possessive = TRUE;
   3354     min = 0;
   3355     max = 1;
   3356     ecode++;
   3357     goto REPEATCHAR;
   3358 
   3359     case OP_STAR:
   3360     case OP_STARI:
   3361     case OP_MINSTAR:
   3362     case OP_MINSTARI:
   3363     case OP_PLUS:
   3364     case OP_PLUSI:
   3365     case OP_MINPLUS:
   3366     case OP_MINPLUSI:
   3367     case OP_QUERY:
   3368     case OP_QUERYI:
   3369     case OP_MINQUERY:
   3370     case OP_MINQUERYI:
   3371     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
   3372     minimize = (c & 1) != 0;
   3373     min = rep_min[c];                 /* Pick up values from tables; */
   3374     max = rep_max[c];                 /* zero for max => infinity */
   3375     if (max == 0) max = INT_MAX;
   3376 
   3377     /* Common code for all repeated single-character matches. We first check
   3378     for the minimum number of characters. If the minimum equals the maximum, we
   3379     are done. Otherwise, if minimizing, check the rest of the pattern for a
   3380     match; if there isn't one, advance up to the maximum, one character at a
   3381     time.
   3382 
   3383     If maximizing, advance up to the maximum number of matching characters,
   3384     until eptr is past the end of the maximum run. If possessive, we are
   3385     then done (no backing up). Otherwise, match at this position; anything
   3386     other than no match is immediately returned. For nomatch, back up one
   3387     character, unless we are matching \R and the last thing matched was
   3388     \r\n, in which case, back up two bytes. When we reach the first optional
   3389     character position, we can save stack by doing a tail recurse.
   3390 
   3391     The various UTF/non-UTF and caseful/caseless cases are handled separately,
   3392     for speed. */
   3393 
   3394     REPEATCHAR:
   3395 #ifdef SUPPORT_UTF
   3396     if (utf)
   3397       {
   3398       length = 1;
   3399       charptr = ecode;
   3400       GETCHARLEN(fc, ecode, length);
   3401       ecode += length;
   3402 
   3403       /* Handle multibyte character matching specially here. There is
   3404       support for caseless matching if UCP support is present. */
   3405 
   3406       if (length > 1)
   3407         {
   3408 #ifdef SUPPORT_UCP
   3409         pcre_uint32 othercase;
   3410         if (op >= OP_STARI &&     /* Caseless */
   3411             (othercase = UCD_OTHERCASE(fc)) != fc)
   3412           oclength = PRIV(ord2utf)(othercase, occhars);
   3413         else oclength = 0;
   3414 #endif  /* SUPPORT_UCP */
   3415 
   3416         for (i = 1; i <= min; i++)
   3417           {
   3418           if (eptr <= md->end_subject - length &&
   3419             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
   3420 #ifdef SUPPORT_UCP
   3421           else if (oclength > 0 &&
   3422                    eptr <= md->end_subject - oclength &&
   3423                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
   3424 #endif  /* SUPPORT_UCP */
   3425           else
   3426             {
   3427             CHECK_PARTIAL();
   3428             RRETURN(MATCH_NOMATCH);
   3429             }
   3430           }
   3431 
   3432         if (min == max) continue;
   3433 
   3434         if (minimize)
   3435           {
   3436           for (fi = min;; fi++)
   3437             {
   3438             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
   3439             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3440             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3441             if (eptr <= md->end_subject - length &&
   3442               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
   3443 #ifdef SUPPORT_UCP
   3444             else if (oclength > 0 &&
   3445                      eptr <= md->end_subject - oclength &&
   3446                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
   3447 #endif  /* SUPPORT_UCP */
   3448             else
   3449               {
   3450               CHECK_PARTIAL();
   3451               RRETURN(MATCH_NOMATCH);
   3452               }
   3453             }
   3454           /* Control never gets here */
   3455           }
   3456 
   3457         else  /* Maximize */
   3458           {
   3459           pp = eptr;
   3460           for (i = min; i < max; i++)
   3461             {
   3462             if (eptr <= md->end_subject - length &&
   3463                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
   3464 #ifdef SUPPORT_UCP
   3465             else if (oclength > 0 &&
   3466                      eptr <= md->end_subject - oclength &&
   3467                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
   3468 #endif  /* SUPPORT_UCP */
   3469             else
   3470               {
   3471               CHECK_PARTIAL();
   3472               break;
   3473               }
   3474             }
   3475 
   3476           if (possessive) continue;    /* No backtracking */
   3477           for(;;)
   3478             {
   3479             if (eptr == pp) goto TAIL_RECURSE;
   3480             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
   3481             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3482 #ifdef SUPPORT_UCP
   3483             eptr--;
   3484             BACKCHAR(eptr);
   3485 #else   /* without SUPPORT_UCP */
   3486             eptr -= length;
   3487 #endif  /* SUPPORT_UCP */
   3488             }
   3489           }
   3490         /* Control never gets here */
   3491         }
   3492 
   3493       /* If the length of a UTF-8 character is 1, we fall through here, and
   3494       obey the code as for non-UTF-8 characters below, though in this case the
   3495       value of fc will always be < 128. */
   3496       }
   3497     else
   3498 #endif  /* SUPPORT_UTF */
   3499       /* When not in UTF-8 mode, load a single-byte character. */
   3500       fc = *ecode++;
   3501 
   3502     /* The value of fc at this point is always one character, though we may
   3503     or may not be in UTF mode. The code is duplicated for the caseless and
   3504     caseful cases, for speed, since matching characters is likely to be quite
   3505     common. First, ensure the minimum number of matches are present. If min =
   3506     max, continue at the same level without recursing. Otherwise, if
   3507     minimizing, keep trying the rest of the expression and advancing one
   3508     matching character if failing, up to the maximum. Alternatively, if
   3509     maximizing, find the maximum number of characters and work backwards. */
   3510 
   3511     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
   3512       max, (char *)eptr));
   3513 
   3514     if (op >= OP_STARI)  /* Caseless */
   3515       {
   3516 #ifdef COMPILE_PCRE8
   3517       /* fc must be < 128 if UTF is enabled. */
   3518       foc = md->fcc[fc];
   3519 #else
   3520 #ifdef SUPPORT_UTF
   3521 #ifdef SUPPORT_UCP
   3522       if (utf && fc > 127)
   3523         foc = UCD_OTHERCASE(fc);
   3524 #else
   3525       if (utf && fc > 127)
   3526         foc = fc;
   3527 #endif /* SUPPORT_UCP */
   3528       else
   3529 #endif /* SUPPORT_UTF */
   3530         foc = TABLE_GET(fc, md->fcc, fc);
   3531 #endif /* COMPILE_PCRE8 */
   3532 
   3533       for (i = 1; i <= min; i++)
   3534         {
   3535         pcre_uint32 cc;                 /* Faster than pcre_uchar */
   3536         if (eptr >= md->end_subject)
   3537           {
   3538           SCHECK_PARTIAL();
   3539           RRETURN(MATCH_NOMATCH);
   3540           }
   3541         cc = UCHAR21TEST(eptr);
   3542         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
   3543         eptr++;
   3544         }
   3545       if (min == max) continue;
   3546       if (minimize)
   3547         {
   3548         for (fi = min;; fi++)
   3549           {
   3550           pcre_uint32 cc;               /* Faster than pcre_uchar */
   3551           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
   3552           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3553           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3554           if (eptr >= md->end_subject)
   3555             {
   3556             SCHECK_PARTIAL();
   3557             RRETURN(MATCH_NOMATCH);
   3558             }
   3559           cc = UCHAR21TEST(eptr);
   3560           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
   3561           eptr++;
   3562           }
   3563         /* Control never gets here */
   3564         }
   3565       else  /* Maximize */
   3566         {
   3567         pp = eptr;
   3568         for (i = min; i < max; i++)
   3569           {
   3570           pcre_uint32 cc;               /* Faster than pcre_uchar */
   3571           if (eptr >= md->end_subject)
   3572             {
   3573             SCHECK_PARTIAL();
   3574             break;
   3575             }
   3576           cc = UCHAR21TEST(eptr);
   3577           if (fc != cc && foc != cc) break;
   3578           eptr++;
   3579           }
   3580         if (possessive) continue;       /* No backtracking */
   3581         for (;;)
   3582           {
   3583           if (eptr == pp) goto TAIL_RECURSE;
   3584           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
   3585           eptr--;
   3586           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3587           }
   3588         /* Control never gets here */
   3589         }
   3590       }
   3591 
   3592     /* Caseful comparisons (includes all multi-byte characters) */
   3593 
   3594     else
   3595       {
   3596       for (i = 1; i <= min; i++)
   3597         {
   3598         if (eptr >= md->end_subject)
   3599           {
   3600           SCHECK_PARTIAL();
   3601           RRETURN(MATCH_NOMATCH);
   3602           }
   3603         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
   3604         }
   3605 
   3606       if (min == max) continue;
   3607 
   3608       if (minimize)
   3609         {
   3610         for (fi = min;; fi++)
   3611           {
   3612           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
   3613           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3614           if (fi >= max) RRETURN(MATCH_NOMATCH);
   3615           if (eptr >= md->end_subject)
   3616             {
   3617             SCHECK_PARTIAL();
   3618             RRETURN(MATCH_NOMATCH);
   3619             }
   3620           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
   3621           }
   3622         /* Control never gets here */
   3623         }
   3624       else  /* Maximize */
   3625         {
   3626         pp = eptr;
   3627         for (i = min; i < max; i++)
   3628           {
   3629           if (eptr >= md->end_subject)
   3630             {
   3631             SCHECK_PARTIAL();
   3632             break;
   3633             }
   3634           if (fc != UCHAR21TEST(eptr)) break;
   3635           eptr++;
   3636           }
   3637         if (possessive) continue;    /* No backtracking */
   3638         for (;;)
   3639           {
   3640           if (eptr == pp) goto TAIL_RECURSE;
   3641           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
   3642           eptr--;
   3643           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3644           }
   3645         /* Control never gets here */
   3646         }
   3647       }
   3648     /* Control never gets here */
   3649 
   3650     /* Match a negated single one-byte character. The character we are
   3651     checking can be multibyte. */
   3652 
   3653     case OP_NOT:
   3654     case OP_NOTI:
   3655     if (eptr >= md->end_subject)
   3656       {
   3657       SCHECK_PARTIAL();
   3658       RRETURN(MATCH_NOMATCH);
   3659       }
   3660 #ifdef SUPPORT_UTF
   3661     if (utf)
   3662       {
   3663       register pcre_uint32 ch, och;
   3664 
   3665       ecode++;
   3666       GETCHARINC(ch, ecode);
   3667       GETCHARINC(c, eptr);
   3668 
   3669       if (op == OP_NOT)
   3670         {
   3671         if (ch == c) RRETURN(MATCH_NOMATCH);
   3672         }
   3673       else
   3674         {
   3675 #ifdef SUPPORT_UCP
   3676         if (ch > 127)
   3677           och = UCD_OTHERCASE(ch);
   3678 #else
   3679         if (ch > 127)
   3680           och = ch;
   3681 #endif /* SUPPORT_UCP */
   3682         else
   3683           och = TABLE_GET(ch, md->fcc, ch);
   3684         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
   3685         }
   3686       }
   3687     else
   3688 #endif
   3689       {
   3690       register pcre_uint32 ch = ecode[1];
   3691       c = *eptr++;
   3692       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
   3693         RRETURN(MATCH_NOMATCH);
   3694       ecode += 2;
   3695       }
   3696     break;
   3697 
   3698     /* Match a negated single one-byte character repeatedly. This is almost a
   3699     repeat of the code for a repeated single character, but I haven't found a
   3700     nice way of commoning these up that doesn't require a test of the
   3701     positive/negative option for each character match. Maybe that wouldn't add
   3702     very much to the time taken, but character matching *is* what this is all
   3703     about... */
   3704 
   3705     case OP_NOTEXACT:
   3706     case OP_NOTEXACTI:
   3707     min = max = GET2(ecode, 1);
   3708     ecode += 1 + IMM2_SIZE;
   3709     goto REPEATNOTCHAR;
   3710 
   3711     case OP_NOTUPTO:
   3712     case OP_NOTUPTOI:
   3713     case OP_NOTMINUPTO:
   3714     case OP_NOTMINUPTOI:
   3715     min = 0;
   3716     max = GET2(ecode, 1);
   3717     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
   3718     ecode += 1 + IMM2_SIZE;
   3719     goto REPEATNOTCHAR;
   3720 
   3721     case OP_NOTPOSSTAR:
   3722     case OP_NOTPOSSTARI:
   3723     possessive = TRUE;
   3724     min = 0;
   3725     max = INT_MAX;
   3726     ecode++;
   3727     goto REPEATNOTCHAR;
   3728 
   3729     case OP_NOTPOSPLUS:
   3730     case OP_NOTPOSPLUSI:
   3731     possessive = TRUE;
   3732     min = 1;
   3733     max = INT_MAX;
   3734     ecode++;
   3735     goto REPEATNOTCHAR;
   3736 
   3737     case OP_NOTPOSQUERY:
   3738     case OP_NOTPOSQUERYI:
   3739     possessive = TRUE;
   3740     min = 0;
   3741     max = 1;
   3742     ecode++;
   3743     goto REPEATNOTCHAR;
   3744 
   3745     case OP_NOTPOSUPTO:
   3746     case OP_NOTPOSUPTOI:
   3747     possessive = TRUE;
   3748     min = 0;
   3749     max = GET2(ecode, 1);
   3750     ecode += 1 + IMM2_SIZE;
   3751     goto REPEATNOTCHAR;
   3752 
   3753     case OP_NOTSTAR:
   3754     case OP_NOTSTARI:
   3755     case OP_NOTMINSTAR:
   3756     case OP_NOTMINSTARI:
   3757     case OP_NOTPLUS:
   3758     case OP_NOTPLUSI:
   3759     case OP_NOTMINPLUS:
   3760     case OP_NOTMINPLUSI:
   3761     case OP_NOTQUERY:
   3762     case OP_NOTQUERYI:
   3763     case OP_NOTMINQUERY:
   3764     case OP_NOTMINQUERYI:
   3765     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
   3766     minimize = (c & 1) != 0;
   3767     min = rep_min[c];                 /* Pick up values from tables; */
   3768     max = rep_max[c];                 /* zero for max => infinity */
   3769     if (max == 0) max = INT_MAX;
   3770 
   3771     /* Common code for all repeated single-byte matches. */
   3772 
   3773     REPEATNOTCHAR:
   3774     GETCHARINCTEST(fc, ecode);
   3775 
   3776     /* The code is duplicated for the caseless and caseful cases, for speed,
   3777     since matching characters is likely to be quite common. First, ensure the
   3778     minimum number of matches are present. If min = max, continue at the same
   3779     level without recursing. Otherwise, if minimizing, keep trying the rest of
   3780     the expression and advancing one matching character if failing, up to the
   3781     maximum. Alternatively, if maximizing, find the maximum number of
   3782     characters and work backwards. */
   3783 
   3784     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
   3785       max, (char *)eptr));
   3786 
   3787     if (op >= OP_NOTSTARI)     /* Caseless */
   3788       {
   3789 #ifdef SUPPORT_UTF
   3790 #ifdef SUPPORT_UCP
   3791       if (utf && fc > 127)
   3792         foc = UCD_OTHERCASE(fc);
   3793 #else
   3794       if (utf && fc > 127)
   3795         foc = fc;
   3796 #endif /* SUPPORT_UCP */
   3797       else
   3798 #endif /* SUPPORT_UTF */
   3799         foc = TABLE_GET(fc, md->fcc, fc);
   3800 
   3801 #ifdef SUPPORT_UTF
   3802       if (utf)
   3803         {
   3804         register pcre_uint32 d;
   3805         for (i = 1; i <= min; i++)
   3806           {
   3807           if (eptr >= md->end_subject)
   3808             {
   3809             SCHECK_PARTIAL();
   3810             RRETURN(MATCH_NOMATCH);
   3811             }
   3812           GETCHARINC(d, eptr);
   3813           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
   3814           }
   3815         }
   3816       else
   3817 #endif  /* SUPPORT_UTF */
   3818       /* Not UTF mode */
   3819         {
   3820         for (i = 1; i <= min; i++)
   3821           {
   3822           if (eptr >= md->end_subject)
   3823             {
   3824             SCHECK_PARTIAL();
   3825             RRETURN(MATCH_NOMATCH);
   3826             }
   3827           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
   3828           eptr++;
   3829           }
   3830         }
   3831 
   3832       if (min == max) continue;
   3833 
   3834       if (minimize)
   3835         {
   3836 #ifdef SUPPORT_UTF
   3837         if (utf)
   3838           {
   3839           register pcre_uint32 d;
   3840           for (fi = min;; fi++)
   3841             {
   3842             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
   3843             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3844             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3845             if (eptr >= md->end_subject)
   3846               {
   3847               SCHECK_PARTIAL();
   3848               RRETURN(MATCH_NOMATCH);
   3849               }
   3850             GETCHARINC(d, eptr);
   3851             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
   3852             }
   3853           }
   3854         else
   3855 #endif  /*SUPPORT_UTF */
   3856         /* Not UTF mode */
   3857           {
   3858           for (fi = min;; fi++)
   3859             {
   3860             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
   3861             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3862             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3863             if (eptr >= md->end_subject)
   3864               {
   3865               SCHECK_PARTIAL();
   3866               RRETURN(MATCH_NOMATCH);
   3867               }
   3868             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
   3869             eptr++;
   3870             }
   3871           }
   3872         /* Control never gets here */
   3873         }
   3874 
   3875       /* Maximize case */
   3876 
   3877       else
   3878         {
   3879         pp = eptr;
   3880 
   3881 #ifdef SUPPORT_UTF
   3882         if (utf)
   3883           {
   3884           register pcre_uint32 d;
   3885           for (i = min; i < max; i++)
   3886             {
   3887             int len = 1;
   3888             if (eptr >= md->end_subject)
   3889               {
   3890               SCHECK_PARTIAL();
   3891               break;
   3892               }
   3893             GETCHARLEN(d, eptr, len);
   3894             if (fc == d || (unsigned int)foc == d) break;
   3895             eptr += len;
   3896             }
   3897           if (possessive) continue;    /* No backtracking */
   3898           for(;;)
   3899             {
   3900             if (eptr == pp) goto TAIL_RECURSE;
   3901             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
   3902             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3903             eptr--;
   3904             BACKCHAR(eptr);
   3905             }
   3906           }
   3907         else
   3908 #endif  /* SUPPORT_UTF */
   3909         /* Not UTF mode */
   3910           {
   3911           for (i = min; i < max; i++)
   3912             {
   3913             if (eptr >= md->end_subject)
   3914               {
   3915               SCHECK_PARTIAL();
   3916               break;
   3917               }
   3918             if (fc == *eptr || foc == *eptr) break;
   3919             eptr++;
   3920             }
   3921           if (possessive) continue;    /* No backtracking */
   3922           for (;;)
   3923             {
   3924             if (eptr == pp) goto TAIL_RECURSE;
   3925             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
   3926             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3927             eptr--;
   3928             }
   3929           }
   3930         /* Control never gets here */
   3931         }
   3932       }
   3933 
   3934     /* Caseful comparisons */
   3935 
   3936     else
   3937       {
   3938 #ifdef SUPPORT_UTF
   3939       if (utf)
   3940         {
   3941         register pcre_uint32 d;
   3942         for (i = 1; i <= min; i++)
   3943           {
   3944           if (eptr >= md->end_subject)
   3945             {
   3946             SCHECK_PARTIAL();
   3947             RRETURN(MATCH_NOMATCH);
   3948             }
   3949           GETCHARINC(d, eptr);
   3950           if (fc == d) RRETURN(MATCH_NOMATCH);
   3951           }
   3952         }
   3953       else
   3954 #endif
   3955       /* Not UTF mode */
   3956         {
   3957         for (i = 1; i <= min; i++)
   3958           {
   3959           if (eptr >= md->end_subject)
   3960             {
   3961             SCHECK_PARTIAL();
   3962             RRETURN(MATCH_NOMATCH);
   3963             }
   3964           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
   3965           }
   3966         }
   3967 
   3968       if (min == max) continue;
   3969 
   3970       if (minimize)
   3971         {
   3972 #ifdef SUPPORT_UTF
   3973         if (utf)
   3974           {
   3975           register pcre_uint32 d;
   3976           for (fi = min;; fi++)
   3977             {
   3978             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
   3979             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3980             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3981             if (eptr >= md->end_subject)
   3982               {
   3983               SCHECK_PARTIAL();
   3984               RRETURN(MATCH_NOMATCH);
   3985               }
   3986             GETCHARINC(d, eptr);
   3987             if (fc == d) RRETURN(MATCH_NOMATCH);
   3988             }
   3989           }
   3990         else
   3991 #endif
   3992         /* Not UTF mode */
   3993           {
   3994           for (fi = min;; fi++)
   3995             {
   3996             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
   3997             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3998             if (fi >= max) RRETURN(MATCH_NOMATCH);
   3999             if (eptr >= md->end_subject)
   4000               {
   4001               SCHECK_PARTIAL();
   4002               RRETURN(MATCH_NOMATCH);
   4003               }
   4004             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
   4005             }
   4006           }
   4007         /* Control never gets here */
   4008         }
   4009 
   4010       /* Maximize case */
   4011 
   4012       else
   4013         {
   4014         pp = eptr;
   4015 
   4016 #ifdef SUPPORT_UTF
   4017         if (utf)
   4018           {
   4019           register pcre_uint32 d;
   4020           for (i = min; i < max; i++)
   4021             {
   4022             int len = 1;
   4023             if (eptr >= md->end_subject)
   4024               {
   4025               SCHECK_PARTIAL();
   4026               break;
   4027               }
   4028             GETCHARLEN(d, eptr, len);
   4029             if (fc == d) break;
   4030             eptr += len;
   4031             }
   4032           if (possessive) continue;    /* No backtracking */
   4033           for(;;)
   4034             {
   4035             if (eptr == pp) goto TAIL_RECURSE;
   4036             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
   4037             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4038             eptr--;
   4039             BACKCHAR(eptr);
   4040             }
   4041           }
   4042         else
   4043 #endif
   4044         /* Not UTF mode */
   4045           {
   4046           for (i = min; i < max; i++)
   4047             {
   4048             if (eptr >= md->end_subject)
   4049               {
   4050               SCHECK_PARTIAL();
   4051               break;
   4052               }
   4053             if (fc == *eptr) break;
   4054             eptr++;
   4055             }
   4056           if (possessive) continue;    /* No backtracking */
   4057           for (;;)
   4058             {
   4059             if (eptr == pp) goto TAIL_RECURSE;
   4060             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
   4061             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4062             eptr--;
   4063             }
   4064           }
   4065         /* Control never gets here */
   4066         }
   4067       }
   4068     /* Control never gets here */
   4069 
   4070     /* Match a single character type repeatedly; several different opcodes
   4071     share code. This is very similar to the code for single characters, but we
   4072     repeat it in the interests of efficiency. */
   4073 
   4074     case OP_TYPEEXACT:
   4075     min = max = GET2(ecode, 1);
   4076     minimize = TRUE;
   4077     ecode += 1 + IMM2_SIZE;
   4078     goto REPEATTYPE;
   4079 
   4080     case OP_TYPEUPTO:
   4081     case OP_TYPEMINUPTO:
   4082     min = 0;
   4083     max = GET2(ecode, 1);
   4084     minimize = *ecode == OP_TYPEMINUPTO;
   4085     ecode += 1 + IMM2_SIZE;
   4086     goto REPEATTYPE;
   4087 
   4088     case OP_TYPEPOSSTAR:
   4089     possessive = TRUE;
   4090     min = 0;
   4091     max = INT_MAX;
   4092     ecode++;
   4093     goto REPEATTYPE;
   4094 
   4095     case OP_TYPEPOSPLUS:
   4096     possessive = TRUE;
   4097     min = 1;
   4098     max = INT_MAX;
   4099     ecode++;
   4100     goto REPEATTYPE;
   4101 
   4102     case OP_TYPEPOSQUERY:
   4103     possessive = TRUE;
   4104     min = 0;
   4105     max = 1;
   4106     ecode++;
   4107     goto REPEATTYPE;
   4108 
   4109     case OP_TYPEPOSUPTO:
   4110     possessive = TRUE;
   4111     min = 0;
   4112     max = GET2(ecode, 1);
   4113     ecode += 1 + IMM2_SIZE;
   4114     goto REPEATTYPE;
   4115 
   4116     case OP_TYPESTAR:
   4117     case OP_TYPEMINSTAR:
   4118     case OP_TYPEPLUS:
   4119     case OP_TYPEMINPLUS:
   4120     case OP_TYPEQUERY:
   4121     case OP_TYPEMINQUERY:
   4122     c = *ecode++ - OP_TYPESTAR;
   4123     minimize = (c & 1) != 0;
   4124     min = rep_min[c];                 /* Pick up values from tables; */
   4125     max = rep_max[c];                 /* zero for max => infinity */
   4126     if (max == 0) max = INT_MAX;
   4127 
   4128     /* Common code for all repeated single character type matches. Note that
   4129     in UTF-8 mode, '.' matches a character of any length, but for the other
   4130     character types, the valid characters are all one-byte long. */
   4131 
   4132     REPEATTYPE:
   4133     ctype = *ecode++;      /* Code for the character type */
   4134 
   4135 #ifdef SUPPORT_UCP
   4136     if (ctype == OP_PROP || ctype == OP_NOTPROP)
   4137       {
   4138       prop_fail_result = ctype == OP_NOTPROP;
   4139       prop_type = *ecode++;
   4140       prop_value = *ecode++;
   4141       }
   4142     else prop_type = -1;
   4143 #endif
   4144 
   4145     /* First, ensure the minimum number of matches are present. Use inline
   4146     code for maximizing the speed, and do the type test once at the start
   4147     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
   4148     is tidier. Also separate the UCP code, which can be the same for both UTF-8
   4149     and single-bytes. */
   4150 
   4151     if (min > 0)
   4152       {
   4153 #ifdef SUPPORT_UCP
   4154       if (prop_type >= 0)
   4155         {
   4156         switch(prop_type)
   4157           {
   4158           case PT_ANY:
   4159           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4160           for (i = 1; i <= min; i++)
   4161             {
   4162             if (eptr >= md->end_subject)
   4163               {
   4164               SCHECK_PARTIAL();
   4165               RRETURN(MATCH_NOMATCH);
   4166               }
   4167             GETCHARINCTEST(c, eptr);
   4168             }
   4169           break;
   4170 
   4171           case PT_LAMP:
   4172           for (i = 1; i <= min; i++)
   4173             {
   4174             int chartype;
   4175             if (eptr >= md->end_subject)
   4176               {
   4177               SCHECK_PARTIAL();
   4178               RRETURN(MATCH_NOMATCH);
   4179               }
   4180             GETCHARINCTEST(c, eptr);
   4181             chartype = UCD_CHARTYPE(c);
   4182             if ((chartype == ucp_Lu ||
   4183                  chartype == ucp_Ll ||
   4184                  chartype == ucp_Lt) == prop_fail_result)
   4185               RRETURN(MATCH_NOMATCH);
   4186             }
   4187           break;
   4188 
   4189           case PT_GC:
   4190           for (i = 1; i <= min; i++)
   4191             {
   4192             if (eptr >= md->end_subject)
   4193               {
   4194               SCHECK_PARTIAL();
   4195               RRETURN(MATCH_NOMATCH);
   4196               }
   4197             GETCHARINCTEST(c, eptr);
   4198             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
   4199               RRETURN(MATCH_NOMATCH);
   4200             }
   4201           break;
   4202 
   4203           case PT_PC:
   4204           for (i = 1; i <= min; i++)
   4205             {
   4206             if (eptr >= md->end_subject)
   4207               {
   4208               SCHECK_PARTIAL();
   4209               RRETURN(MATCH_NOMATCH);
   4210               }
   4211             GETCHARINCTEST(c, eptr);
   4212             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
   4213               RRETURN(MATCH_NOMATCH);
   4214             }
   4215           break;
   4216 
   4217           case PT_SC:
   4218           for (i = 1; i <= min; i++)
   4219             {
   4220             if (eptr >= md->end_subject)
   4221               {
   4222               SCHECK_PARTIAL();
   4223               RRETURN(MATCH_NOMATCH);
   4224               }
   4225             GETCHARINCTEST(c, eptr);
   4226             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
   4227               RRETURN(MATCH_NOMATCH);
   4228             }
   4229           break;
   4230 
   4231           case PT_ALNUM:
   4232           for (i = 1; i <= min; i++)
   4233             {
   4234             int category;
   4235             if (eptr >= md->end_subject)
   4236               {
   4237               SCHECK_PARTIAL();
   4238               RRETURN(MATCH_NOMATCH);
   4239               }
   4240             GETCHARINCTEST(c, eptr);
   4241             category = UCD_CATEGORY(c);
   4242             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   4243               RRETURN(MATCH_NOMATCH);
   4244             }
   4245           break;
   4246 
   4247           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   4248           which means that Perl space and POSIX space are now identical. PCRE
   4249           was changed at release 8.34. */
   4250 
   4251           case PT_SPACE:    /* Perl space */
   4252           case PT_PXSPACE:  /* POSIX space */
   4253           for (i = 1; i <= min; i++)
   4254             {
   4255             if (eptr >= md->end_subject)
   4256               {
   4257               SCHECK_PARTIAL();
   4258               RRETURN(MATCH_NOMATCH);
   4259               }
   4260             GETCHARINCTEST(c, eptr);
   4261             switch(c)
   4262               {
   4263               HSPACE_CASES:
   4264               VSPACE_CASES:
   4265               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4266               break;
   4267 
   4268               default:
   4269               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   4270                 RRETURN(MATCH_NOMATCH);
   4271               break;
   4272               }
   4273             }
   4274           break;
   4275 
   4276           case PT_WORD:
   4277           for (i = 1; i <= min; i++)
   4278             {
   4279             int category;
   4280             if (eptr >= md->end_subject)
   4281               {
   4282               SCHECK_PARTIAL();
   4283               RRETURN(MATCH_NOMATCH);
   4284               }
   4285             GETCHARINCTEST(c, eptr);
   4286             category = UCD_CATEGORY(c);
   4287             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
   4288                    == prop_fail_result)
   4289               RRETURN(MATCH_NOMATCH);
   4290             }
   4291           break;
   4292 
   4293           case PT_CLIST:
   4294           for (i = 1; i <= min; i++)
   4295             {
   4296             const pcre_uint32 *cp;
   4297             if (eptr >= md->end_subject)
   4298               {
   4299               SCHECK_PARTIAL();
   4300               RRETURN(MATCH_NOMATCH);
   4301               }
   4302             GETCHARINCTEST(c, eptr);
   4303             cp = PRIV(ucd_caseless_sets) + prop_value;
   4304             for (;;)
   4305               {
   4306               if (c < *cp)
   4307                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
   4308               if (c == *cp++)
   4309                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
   4310               }
   4311             }
   4312           break;
   4313 
   4314           case PT_UCNC:
   4315           for (i = 1; i <= min; i++)
   4316             {
   4317             if (eptr >= md->end_subject)
   4318               {
   4319               SCHECK_PARTIAL();
   4320               RRETURN(MATCH_NOMATCH);
   4321               }
   4322             GETCHARINCTEST(c, eptr);
   4323             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   4324                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   4325                  c >= 0xe000) == prop_fail_result)
   4326               RRETURN(MATCH_NOMATCH);
   4327             }
   4328           break;
   4329 
   4330           /* This should not occur */
   4331 
   4332           default:
   4333           RRETURN(PCRE_ERROR_INTERNAL);
   4334           }
   4335         }
   4336 
   4337       /* Match extended Unicode sequences. We will get here only if the
   4338       support is in the binary; otherwise a compile-time error occurs. */
   4339 
   4340       else if (ctype == OP_EXTUNI)
   4341         {
   4342         for (i = 1; i <= min; i++)
   4343           {
   4344           if (eptr >= md->end_subject)
   4345             {
   4346             SCHECK_PARTIAL();
   4347             RRETURN(MATCH_NOMATCH);
   4348             }
   4349           else
   4350             {
   4351             int lgb, rgb;
   4352             GETCHARINCTEST(c, eptr);
   4353             lgb = UCD_GRAPHBREAK(c);
   4354            while (eptr < md->end_subject)
   4355               {
   4356               int len = 1;
   4357               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   4358               rgb = UCD_GRAPHBREAK(c);
   4359               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   4360               lgb = rgb;
   4361               eptr += len;
   4362               }
   4363             }
   4364           CHECK_PARTIAL();
   4365           }
   4366         }
   4367 
   4368       else
   4369 #endif     /* SUPPORT_UCP */
   4370 
   4371 /* Handle all other cases when the coding is UTF-8 */
   4372 
   4373 #ifdef SUPPORT_UTF
   4374       if (utf) switch(ctype)
   4375         {
   4376         case OP_ANY:
   4377         for (i = 1; i <= min; i++)
   4378           {
   4379           if (eptr >= md->end_subject)
   4380             {
   4381             SCHECK_PARTIAL();
   4382             RRETURN(MATCH_NOMATCH);
   4383             }
   4384           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   4385           if (md->partial != 0 &&
   4386               eptr + 1 >= md->end_subject &&
   4387               NLBLOCK->nltype == NLTYPE_FIXED &&
   4388               NLBLOCK->nllen == 2 &&
   4389               UCHAR21(eptr) == NLBLOCK->nl[0])
   4390             {
   4391             md->hitend = TRUE;
   4392             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   4393             }
   4394           eptr++;
   4395           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4396           }
   4397         break;
   4398 
   4399         case OP_ALLANY:
   4400         for (i = 1; i <= min; i++)
   4401           {
   4402           if (eptr >= md->end_subject)
   4403             {
   4404             SCHECK_PARTIAL();
   4405             RRETURN(MATCH_NOMATCH);
   4406             }
   4407           eptr++;
   4408           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4409           }
   4410         break;
   4411 
   4412         case OP_ANYBYTE:
   4413         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
   4414         eptr += min;
   4415         break;
   4416 
   4417         case OP_ANYNL:
   4418         for (i = 1; i <= min; i++)
   4419           {
   4420           if (eptr >= md->end_subject)
   4421             {
   4422             SCHECK_PARTIAL();
   4423             RRETURN(MATCH_NOMATCH);
   4424             }
   4425           GETCHARINC(c, eptr);
   4426           switch(c)
   4427             {
   4428             default: RRETURN(MATCH_NOMATCH);
   4429 
   4430             case CHAR_CR:
   4431             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
   4432             break;
   4433 
   4434             case CHAR_LF:
   4435             break;
   4436 
   4437             case CHAR_VT:
   4438             case CHAR_FF:
   4439             case CHAR_NEL:
   4440 #ifndef EBCDIC
   4441             case 0x2028:
   4442             case 0x2029:
   4443 #endif  /* Not EBCDIC */
   4444             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   4445             break;
   4446             }
   4447           }
   4448         break;
   4449 
   4450         case OP_NOT_HSPACE:
   4451         for (i = 1; i <= min; i++)
   4452           {
   4453           if (eptr >= md->end_subject)
   4454             {
   4455             SCHECK_PARTIAL();
   4456             RRETURN(MATCH_NOMATCH);
   4457             }
   4458           GETCHARINC(c, eptr);
   4459           switch(c)
   4460             {
   4461             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
   4462             default: break;
   4463             }
   4464           }
   4465         break;
   4466 
   4467         case OP_HSPACE:
   4468         for (i = 1; i <= min; i++)
   4469           {
   4470           if (eptr >= md->end_subject)
   4471             {
   4472             SCHECK_PARTIAL();
   4473             RRETURN(MATCH_NOMATCH);
   4474             }
   4475           GETCHARINC(c, eptr);
   4476           switch(c)
   4477             {
   4478             HSPACE_CASES: break;  /* Byte and multibyte cases */
   4479             default: RRETURN(MATCH_NOMATCH);
   4480             }
   4481           }
   4482         break;
   4483 
   4484         case OP_NOT_VSPACE:
   4485         for (i = 1; i <= min; i++)
   4486           {
   4487           if (eptr >= md->end_subject)
   4488             {
   4489             SCHECK_PARTIAL();
   4490             RRETURN(MATCH_NOMATCH);
   4491             }
   4492           GETCHARINC(c, eptr);
   4493           switch(c)
   4494             {
   4495             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   4496             default: break;
   4497             }
   4498           }
   4499         break;
   4500 
   4501         case OP_VSPACE:
   4502         for (i = 1; i <= min; i++)
   4503           {
   4504           if (eptr >= md->end_subject)
   4505             {
   4506             SCHECK_PARTIAL();
   4507             RRETURN(MATCH_NOMATCH);
   4508             }
   4509           GETCHARINC(c, eptr);
   4510           switch(c)
   4511             {
   4512             VSPACE_CASES: break;
   4513             default: RRETURN(MATCH_NOMATCH);
   4514             }
   4515           }
   4516         break;
   4517 
   4518         case OP_NOT_DIGIT:
   4519         for (i = 1; i <= min; i++)
   4520           {
   4521           if (eptr >= md->end_subject)
   4522             {
   4523             SCHECK_PARTIAL();
   4524             RRETURN(MATCH_NOMATCH);
   4525             }
   4526           GETCHARINC(c, eptr);
   4527           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
   4528             RRETURN(MATCH_NOMATCH);
   4529           }
   4530         break;
   4531 
   4532         case OP_DIGIT:
   4533         for (i = 1; i <= min; i++)
   4534           {
   4535           pcre_uint32 cc;
   4536           if (eptr >= md->end_subject)
   4537             {
   4538             SCHECK_PARTIAL();
   4539             RRETURN(MATCH_NOMATCH);
   4540             }
   4541           cc = UCHAR21(eptr);
   4542           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
   4543             RRETURN(MATCH_NOMATCH);
   4544           eptr++;
   4545           /* No need to skip more bytes - we know it's a 1-byte character */
   4546           }
   4547         break;
   4548 
   4549         case OP_NOT_WHITESPACE:
   4550         for (i = 1; i <= min; i++)
   4551           {
   4552           pcre_uint32 cc;
   4553           if (eptr >= md->end_subject)
   4554             {
   4555             SCHECK_PARTIAL();
   4556             RRETURN(MATCH_NOMATCH);
   4557             }
   4558           cc = UCHAR21(eptr);
   4559           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
   4560             RRETURN(MATCH_NOMATCH);
   4561           eptr++;
   4562           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4563           }
   4564         break;
   4565 
   4566         case OP_WHITESPACE:
   4567         for (i = 1; i <= min; i++)
   4568           {
   4569           pcre_uint32 cc;
   4570           if (eptr >= md->end_subject)
   4571             {
   4572             SCHECK_PARTIAL();
   4573             RRETURN(MATCH_NOMATCH);
   4574             }
   4575           cc = UCHAR21(eptr);
   4576           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
   4577             RRETURN(MATCH_NOMATCH);
   4578           eptr++;
   4579           /* No need to skip more bytes - we know it's a 1-byte character */
   4580           }
   4581         break;
   4582 
   4583         case OP_NOT_WORDCHAR:
   4584         for (i = 1; i <= min; i++)
   4585           {
   4586           pcre_uint32 cc;
   4587           if (eptr >= md->end_subject)
   4588             {
   4589             SCHECK_PARTIAL();
   4590             RRETURN(MATCH_NOMATCH);
   4591             }
   4592           cc = UCHAR21(eptr);
   4593           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
   4594             RRETURN(MATCH_NOMATCH);
   4595           eptr++;
   4596           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   4597           }
   4598         break;
   4599 
   4600         case OP_WORDCHAR:
   4601         for (i = 1; i <= min; i++)
   4602           {
   4603           pcre_uint32 cc;
   4604           if (eptr >= md->end_subject)
   4605             {
   4606             SCHECK_PARTIAL();
   4607             RRETURN(MATCH_NOMATCH);
   4608             }
   4609           cc = UCHAR21(eptr);
   4610           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
   4611             RRETURN(MATCH_NOMATCH);
   4612           eptr++;
   4613           /* No need to skip more bytes - we know it's a 1-byte character */
   4614           }
   4615         break;
   4616 
   4617         default:
   4618         RRETURN(PCRE_ERROR_INTERNAL);
   4619         }  /* End switch(ctype) */
   4620 
   4621       else
   4622 #endif     /* SUPPORT_UTF */
   4623 
   4624       /* Code for the non-UTF-8 case for minimum matching of operators other
   4625       than OP_PROP and OP_NOTPROP. */
   4626 
   4627       switch(ctype)
   4628         {
   4629         case OP_ANY:
   4630         for (i = 1; i <= min; i++)
   4631           {
   4632           if (eptr >= md->end_subject)
   4633             {
   4634             SCHECK_PARTIAL();
   4635             RRETURN(MATCH_NOMATCH);
   4636             }
   4637           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
   4638           if (md->partial != 0 &&
   4639               eptr + 1 >= md->end_subject &&
   4640               NLBLOCK->nltype == NLTYPE_FIXED &&
   4641               NLBLOCK->nllen == 2 &&
   4642               *eptr == NLBLOCK->nl[0])
   4643             {
   4644             md->hitend = TRUE;
   4645             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   4646             }
   4647           eptr++;
   4648           }
   4649         break;
   4650 
   4651         case OP_ALLANY:
   4652         if (eptr > md->end_subject - min)
   4653           {
   4654           SCHECK_PARTIAL();
   4655           RRETURN(MATCH_NOMATCH);
   4656           }
   4657         eptr += min;
   4658         break;
   4659 
   4660         case OP_ANYBYTE:
   4661         if (eptr > md->end_subject - min)
   4662           {
   4663           SCHECK_PARTIAL();
   4664           RRETURN(MATCH_NOMATCH);
   4665           }
   4666         eptr += min;
   4667         break;
   4668 
   4669         case OP_ANYNL:
   4670         for (i = 1; i <= min; i++)
   4671           {
   4672           if (eptr >= md->end_subject)
   4673             {
   4674             SCHECK_PARTIAL();
   4675             RRETURN(MATCH_NOMATCH);
   4676             }
   4677           switch(*eptr++)
   4678             {
   4679             default: RRETURN(MATCH_NOMATCH);
   4680 
   4681             case CHAR_CR:
   4682             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
   4683             break;
   4684 
   4685             case CHAR_LF:
   4686             break;
   4687 
   4688             case CHAR_VT:
   4689             case CHAR_FF:
   4690             case CHAR_NEL:
   4691 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4692             case 0x2028:
   4693             case 0x2029:
   4694 #endif
   4695             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   4696             break;
   4697             }
   4698           }
   4699         break;
   4700 
   4701         case OP_NOT_HSPACE:
   4702         for (i = 1; i <= min; i++)
   4703           {
   4704           if (eptr >= md->end_subject)
   4705             {
   4706             SCHECK_PARTIAL();
   4707             RRETURN(MATCH_NOMATCH);
   4708             }
   4709           switch(*eptr++)
   4710             {
   4711             default: break;
   4712             HSPACE_BYTE_CASES:
   4713 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4714             HSPACE_MULTIBYTE_CASES:
   4715 #endif
   4716             RRETURN(MATCH_NOMATCH);
   4717             }
   4718           }
   4719         break;
   4720 
   4721         case OP_HSPACE:
   4722         for (i = 1; i <= min; i++)
   4723           {
   4724           if (eptr >= md->end_subject)
   4725             {
   4726             SCHECK_PARTIAL();
   4727             RRETURN(MATCH_NOMATCH);
   4728             }
   4729           switch(*eptr++)
   4730             {
   4731             default: RRETURN(MATCH_NOMATCH);
   4732             HSPACE_BYTE_CASES:
   4733 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4734             HSPACE_MULTIBYTE_CASES:
   4735 #endif
   4736             break;
   4737             }
   4738           }
   4739         break;
   4740 
   4741         case OP_NOT_VSPACE:
   4742         for (i = 1; i <= min; i++)
   4743           {
   4744           if (eptr >= md->end_subject)
   4745             {
   4746             SCHECK_PARTIAL();
   4747             RRETURN(MATCH_NOMATCH);
   4748             }
   4749           switch(*eptr++)
   4750             {
   4751             VSPACE_BYTE_CASES:
   4752 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4753             VSPACE_MULTIBYTE_CASES:
   4754 #endif
   4755             RRETURN(MATCH_NOMATCH);
   4756             default: break;
   4757             }
   4758           }
   4759         break;
   4760 
   4761         case OP_VSPACE:
   4762         for (i = 1; i <= min; i++)
   4763           {
   4764           if (eptr >= md->end_subject)
   4765             {
   4766             SCHECK_PARTIAL();
   4767             RRETURN(MATCH_NOMATCH);
   4768             }
   4769           switch(*eptr++)
   4770             {
   4771             default: RRETURN(MATCH_NOMATCH);
   4772             VSPACE_BYTE_CASES:
   4773 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   4774             VSPACE_MULTIBYTE_CASES:
   4775 #endif
   4776             break;
   4777             }
   4778           }
   4779         break;
   4780 
   4781         case OP_NOT_DIGIT:
   4782         for (i = 1; i <= min; i++)
   4783           {
   4784           if (eptr >= md->end_subject)
   4785             {
   4786             SCHECK_PARTIAL();
   4787             RRETURN(MATCH_NOMATCH);
   4788             }
   4789           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
   4790             RRETURN(MATCH_NOMATCH);
   4791           eptr++;
   4792           }
   4793         break;
   4794 
   4795         case OP_DIGIT:
   4796         for (i = 1; i <= min; i++)
   4797           {
   4798           if (eptr >= md->end_subject)
   4799             {
   4800             SCHECK_PARTIAL();
   4801             RRETURN(MATCH_NOMATCH);
   4802             }
   4803           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
   4804             RRETURN(MATCH_NOMATCH);
   4805           eptr++;
   4806           }
   4807         break;
   4808 
   4809         case OP_NOT_WHITESPACE:
   4810         for (i = 1; i <= min; i++)
   4811           {
   4812           if (eptr >= md->end_subject)
   4813             {
   4814             SCHECK_PARTIAL();
   4815             RRETURN(MATCH_NOMATCH);
   4816             }
   4817           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
   4818             RRETURN(MATCH_NOMATCH);
   4819           eptr++;
   4820           }
   4821         break;
   4822 
   4823         case OP_WHITESPACE:
   4824         for (i = 1; i <= min; i++)
   4825           {
   4826           if (eptr >= md->end_subject)
   4827             {
   4828             SCHECK_PARTIAL();
   4829             RRETURN(MATCH_NOMATCH);
   4830             }
   4831           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
   4832             RRETURN(MATCH_NOMATCH);
   4833           eptr++;
   4834           }
   4835         break;
   4836 
   4837         case OP_NOT_WORDCHAR:
   4838         for (i = 1; i <= min; i++)
   4839           {
   4840           if (eptr >= md->end_subject)
   4841             {
   4842             SCHECK_PARTIAL();
   4843             RRETURN(MATCH_NOMATCH);
   4844             }
   4845           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
   4846             RRETURN(MATCH_NOMATCH);
   4847           eptr++;
   4848           }
   4849         break;
   4850 
   4851         case OP_WORDCHAR:
   4852         for (i = 1; i <= min; i++)
   4853           {
   4854           if (eptr >= md->end_subject)
   4855             {
   4856             SCHECK_PARTIAL();
   4857             RRETURN(MATCH_NOMATCH);
   4858             }
   4859           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
   4860             RRETURN(MATCH_NOMATCH);
   4861           eptr++;
   4862           }
   4863         break;
   4864 
   4865         default:
   4866         RRETURN(PCRE_ERROR_INTERNAL);
   4867         }
   4868       }
   4869 
   4870     /* If min = max, continue at the same level without recursing */
   4871 
   4872     if (min == max) continue;
   4873 
   4874     /* If minimizing, we have to test the rest of the pattern before each
   4875     subsequent match. Again, separate the UTF-8 case for speed, and also
   4876     separate the UCP cases. */
   4877 
   4878     if (minimize)
   4879       {
   4880 #ifdef SUPPORT_UCP
   4881       if (prop_type >= 0)
   4882         {
   4883         switch(prop_type)
   4884           {
   4885           case PT_ANY:
   4886           for (fi = min;; fi++)
   4887             {
   4888             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
   4889             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4890             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4891             if (eptr >= md->end_subject)
   4892               {
   4893               SCHECK_PARTIAL();
   4894               RRETURN(MATCH_NOMATCH);
   4895               }
   4896             GETCHARINCTEST(c, eptr);
   4897             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   4898             }
   4899           /* Control never gets here */
   4900 
   4901           case PT_LAMP:
   4902           for (fi = min;; fi++)
   4903             {
   4904             int chartype;
   4905             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
   4906             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4907             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4908             if (eptr >= md->end_subject)
   4909               {
   4910               SCHECK_PARTIAL();
   4911               RRETURN(MATCH_NOMATCH);
   4912               }
   4913             GETCHARINCTEST(c, eptr);
   4914             chartype = UCD_CHARTYPE(c);
   4915             if ((chartype == ucp_Lu ||
   4916                  chartype == ucp_Ll ||
   4917                  chartype == ucp_Lt) == prop_fail_result)
   4918               RRETURN(MATCH_NOMATCH);
   4919             }
   4920           /* Control never gets here */
   4921 
   4922           case PT_GC:
   4923           for (fi = min;; fi++)
   4924             {
   4925             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
   4926             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4927             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4928             if (eptr >= md->end_subject)
   4929               {
   4930               SCHECK_PARTIAL();
   4931               RRETURN(MATCH_NOMATCH);
   4932               }
   4933             GETCHARINCTEST(c, eptr);
   4934             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
   4935               RRETURN(MATCH_NOMATCH);
   4936             }
   4937           /* Control never gets here */
   4938 
   4939           case PT_PC:
   4940           for (fi = min;; fi++)
   4941             {
   4942             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
   4943             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4944             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4945             if (eptr >= md->end_subject)
   4946               {
   4947               SCHECK_PARTIAL();
   4948               RRETURN(MATCH_NOMATCH);
   4949               }
   4950             GETCHARINCTEST(c, eptr);
   4951             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
   4952               RRETURN(MATCH_NOMATCH);
   4953             }
   4954           /* Control never gets here */
   4955 
   4956           case PT_SC:
   4957           for (fi = min;; fi++)
   4958             {
   4959             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
   4960             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4961             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4962             if (eptr >= md->end_subject)
   4963               {
   4964               SCHECK_PARTIAL();
   4965               RRETURN(MATCH_NOMATCH);
   4966               }
   4967             GETCHARINCTEST(c, eptr);
   4968             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
   4969               RRETURN(MATCH_NOMATCH);
   4970             }
   4971           /* Control never gets here */
   4972 
   4973           case PT_ALNUM:
   4974           for (fi = min;; fi++)
   4975             {
   4976             int category;
   4977             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
   4978             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4979             if (fi >= max) RRETURN(MATCH_NOMATCH);
   4980             if (eptr >= md->end_subject)
   4981               {
   4982               SCHECK_PARTIAL();
   4983               RRETURN(MATCH_NOMATCH);
   4984               }
   4985             GETCHARINCTEST(c, eptr);
   4986             category = UCD_CATEGORY(c);
   4987             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   4988               RRETURN(MATCH_NOMATCH);
   4989             }
   4990           /* Control never gets here */
   4991 
   4992           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   4993           which means that Perl space and POSIX space are now identical. PCRE
   4994           was changed at release 8.34. */
   4995 
   4996           case PT_SPACE:    /* Perl space */
   4997           case PT_PXSPACE:  /* POSIX space */
   4998           for (fi = min;; fi++)
   4999             {
   5000             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
   5001             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5002             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5003             if (eptr >= md->end_subject)
   5004               {
   5005               SCHECK_PARTIAL();
   5006               RRETURN(MATCH_NOMATCH);
   5007               }
   5008             GETCHARINCTEST(c, eptr);
   5009             switch(c)
   5010               {
   5011               HSPACE_CASES:
   5012               VSPACE_CASES:
   5013               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
   5014               break;
   5015 
   5016               default:
   5017               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   5018                 RRETURN(MATCH_NOMATCH);
   5019               break;
   5020               }
   5021             }
   5022           /* Control never gets here */
   5023 
   5024           case PT_WORD:
   5025           for (fi = min;; fi++)
   5026             {
   5027             int category;
   5028             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
   5029             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5030             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5031             if (eptr >= md->end_subject)
   5032               {
   5033               SCHECK_PARTIAL();
   5034               RRETURN(MATCH_NOMATCH);
   5035               }
   5036             GETCHARINCTEST(c, eptr);
   5037             category = UCD_CATEGORY(c);
   5038             if ((category == ucp_L ||
   5039                  category == ucp_N ||
   5040                  c == CHAR_UNDERSCORE)
   5041                    == prop_fail_result)
   5042               RRETURN(MATCH_NOMATCH);
   5043             }
   5044           /* Control never gets here */
   5045 
   5046           case PT_CLIST:
   5047           for (fi = min;; fi++)
   5048             {
   5049             const pcre_uint32 *cp;
   5050             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
   5051             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5052             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5053             if (eptr >= md->end_subject)
   5054               {
   5055               SCHECK_PARTIAL();
   5056               RRETURN(MATCH_NOMATCH);
   5057               }
   5058             GETCHARINCTEST(c, eptr);
   5059             cp = PRIV(ucd_caseless_sets) + prop_value;
   5060             for (;;)
   5061               {
   5062               if (c < *cp)
   5063                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
   5064               if (c == *cp++)
   5065                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
   5066               }
   5067             }
   5068           /* Control never gets here */
   5069 
   5070           case PT_UCNC:
   5071           for (fi = min;; fi++)
   5072             {
   5073             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
   5074             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5075             if (fi >= max) RRETURN(MATCH_NOMATCH);
   5076             if (eptr >= md->end_subject)
   5077               {
   5078               SCHECK_PARTIAL();
   5079               RRETURN(MATCH_NOMATCH);
   5080               }
   5081             GETCHARINCTEST(c, eptr);
   5082             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   5083                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   5084                  c >= 0xe000) == prop_fail_result)
   5085               RRETURN(MATCH_NOMATCH);
   5086             }
   5087           /* Control never gets here */
   5088 
   5089           /* This should never occur */
   5090           default:
   5091           RRETURN(PCRE_ERROR_INTERNAL);
   5092           }
   5093         }
   5094 
   5095       /* Match extended Unicode sequences. We will get here only if the
   5096       support is in the binary; otherwise a compile-time error occurs. */
   5097 
   5098       else if (ctype == OP_EXTUNI)
   5099         {
   5100         for (fi = min;; fi++)
   5101           {
   5102           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
   5103           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5104           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5105           if (eptr >= md->end_subject)
   5106             {
   5107             SCHECK_PARTIAL();
   5108             RRETURN(MATCH_NOMATCH);
   5109             }
   5110           else
   5111             {
   5112             int lgb, rgb;
   5113             GETCHARINCTEST(c, eptr);
   5114             lgb = UCD_GRAPHBREAK(c);
   5115             while (eptr < md->end_subject)
   5116               {
   5117               int len = 1;
   5118               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   5119               rgb = UCD_GRAPHBREAK(c);
   5120               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5121               lgb = rgb;
   5122               eptr += len;
   5123               }
   5124             }
   5125           CHECK_PARTIAL();
   5126           }
   5127         }
   5128       else
   5129 #endif     /* SUPPORT_UCP */
   5130 
   5131 #ifdef SUPPORT_UTF
   5132       if (utf)
   5133         {
   5134         for (fi = min;; fi++)
   5135           {
   5136           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
   5137           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5138           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5139           if (eptr >= md->end_subject)
   5140             {
   5141             SCHECK_PARTIAL();
   5142             RRETURN(MATCH_NOMATCH);
   5143             }
   5144           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   5145             RRETURN(MATCH_NOMATCH);
   5146           GETCHARINC(c, eptr);
   5147           switch(ctype)
   5148             {
   5149             case OP_ANY:               /* This is the non-NL case */
   5150             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5151                 eptr >= md->end_subject &&
   5152                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5153                 NLBLOCK->nllen == 2 &&
   5154                 c == NLBLOCK->nl[0])
   5155               {
   5156               md->hitend = TRUE;
   5157               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5158               }
   5159             break;
   5160 
   5161             case OP_ALLANY:
   5162             case OP_ANYBYTE:
   5163             break;
   5164 
   5165             case OP_ANYNL:
   5166             switch(c)
   5167               {
   5168               default: RRETURN(MATCH_NOMATCH);
   5169               case CHAR_CR:
   5170               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
   5171               break;
   5172 
   5173               case CHAR_LF:
   5174               break;
   5175 
   5176               case CHAR_VT:
   5177               case CHAR_FF:
   5178               case CHAR_NEL:
   5179 #ifndef EBCDIC
   5180               case 0x2028:
   5181               case 0x2029:
   5182 #endif  /* Not EBCDIC */
   5183               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   5184               break;
   5185               }
   5186             break;
   5187 
   5188             case OP_NOT_HSPACE:
   5189             switch(c)
   5190               {
   5191               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
   5192               default: break;
   5193               }
   5194             break;
   5195 
   5196             case OP_HSPACE:
   5197             switch(c)
   5198               {
   5199               HSPACE_CASES: break;
   5200               default: RRETURN(MATCH_NOMATCH);
   5201               }
   5202             break;
   5203 
   5204             case OP_NOT_VSPACE:
   5205             switch(c)
   5206               {
   5207               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
   5208               default: break;
   5209               }
   5210             break;
   5211 
   5212             case OP_VSPACE:
   5213             switch(c)
   5214               {
   5215               VSPACE_CASES: break;
   5216               default: RRETURN(MATCH_NOMATCH);
   5217               }
   5218             break;
   5219 
   5220             case OP_NOT_DIGIT:
   5221             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
   5222               RRETURN(MATCH_NOMATCH);
   5223             break;
   5224 
   5225             case OP_DIGIT:
   5226             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
   5227               RRETURN(MATCH_NOMATCH);
   5228             break;
   5229 
   5230             case OP_NOT_WHITESPACE:
   5231             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
   5232               RRETURN(MATCH_NOMATCH);
   5233             break;
   5234 
   5235             case OP_WHITESPACE:
   5236             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
   5237               RRETURN(MATCH_NOMATCH);
   5238             break;
   5239 
   5240             case OP_NOT_WORDCHAR:
   5241             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
   5242               RRETURN(MATCH_NOMATCH);
   5243             break;
   5244 
   5245             case OP_WORDCHAR:
   5246             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
   5247               RRETURN(MATCH_NOMATCH);
   5248             break;
   5249 
   5250             default:
   5251             RRETURN(PCRE_ERROR_INTERNAL);
   5252             }
   5253           }
   5254         }
   5255       else
   5256 #endif
   5257       /* Not UTF mode */
   5258         {
   5259         for (fi = min;; fi++)
   5260           {
   5261           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
   5262           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5263           if (fi >= max) RRETURN(MATCH_NOMATCH);
   5264           if (eptr >= md->end_subject)
   5265             {
   5266             SCHECK_PARTIAL();
   5267             RRETURN(MATCH_NOMATCH);
   5268             }
   5269           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   5270             RRETURN(MATCH_NOMATCH);
   5271           c = *eptr++;
   5272           switch(ctype)
   5273             {
   5274             case OP_ANY:               /* This is the non-NL case */
   5275             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5276                 eptr >= md->end_subject &&
   5277                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5278                 NLBLOCK->nllen == 2 &&
   5279                 c == NLBLOCK->nl[0])
   5280               {
   5281               md->hitend = TRUE;
   5282               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5283               }
   5284             break;
   5285 
   5286             case OP_ALLANY:
   5287             case OP_ANYBYTE:
   5288             break;
   5289 
   5290             case OP_ANYNL:
   5291             switch(c)
   5292               {
   5293               default: RRETURN(MATCH_NOMATCH);
   5294               case CHAR_CR:
   5295               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
   5296               break;
   5297 
   5298               case CHAR_LF:
   5299               break;
   5300 
   5301               case CHAR_VT:
   5302               case CHAR_FF:
   5303               case CHAR_NEL:
   5304 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5305               case 0x2028:
   5306               case 0x2029:
   5307 #endif
   5308               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
   5309               break;
   5310               }
   5311             break;
   5312 
   5313             case OP_NOT_HSPACE:
   5314             switch(c)
   5315               {
   5316               default: break;
   5317               HSPACE_BYTE_CASES:
   5318 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5319               HSPACE_MULTIBYTE_CASES:
   5320 #endif
   5321               RRETURN(MATCH_NOMATCH);
   5322               }
   5323             break;
   5324 
   5325             case OP_HSPACE:
   5326             switch(c)
   5327               {
   5328               default: RRETURN(MATCH_NOMATCH);
   5329               HSPACE_BYTE_CASES:
   5330 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5331               HSPACE_MULTIBYTE_CASES:
   5332 #endif
   5333               break;
   5334               }
   5335             break;
   5336 
   5337             case OP_NOT_VSPACE:
   5338             switch(c)
   5339               {
   5340               default: break;
   5341               VSPACE_BYTE_CASES:
   5342 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5343               VSPACE_MULTIBYTE_CASES:
   5344 #endif
   5345               RRETURN(MATCH_NOMATCH);
   5346               }
   5347             break;
   5348 
   5349             case OP_VSPACE:
   5350             switch(c)
   5351               {
   5352               default: RRETURN(MATCH_NOMATCH);
   5353               VSPACE_BYTE_CASES:
   5354 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5355               VSPACE_MULTIBYTE_CASES:
   5356 #endif
   5357               break;
   5358               }
   5359             break;
   5360 
   5361             case OP_NOT_DIGIT:
   5362             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
   5363             break;
   5364 
   5365             case OP_DIGIT:
   5366             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
   5367             break;
   5368 
   5369             case OP_NOT_WHITESPACE:
   5370             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
   5371             break;
   5372 
   5373             case OP_WHITESPACE:
   5374             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
   5375             break;
   5376 
   5377             case OP_NOT_WORDCHAR:
   5378             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
   5379             break;
   5380 
   5381             case OP_WORDCHAR:
   5382             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
   5383             break;
   5384 
   5385             default:
   5386             RRETURN(PCRE_ERROR_INTERNAL);
   5387             }
   5388           }
   5389         }
   5390       /* Control never gets here */
   5391       }
   5392 
   5393     /* If maximizing, it is worth using inline code for speed, doing the type
   5394     test once at the start (i.e. keep it out of the loop). Again, keep the
   5395     UTF-8 and UCP stuff separate. */
   5396 
   5397     else
   5398       {
   5399       pp = eptr;  /* Remember where we started */
   5400 
   5401 #ifdef SUPPORT_UCP
   5402       if (prop_type >= 0)
   5403         {
   5404         switch(prop_type)
   5405           {
   5406           case PT_ANY:
   5407           for (i = min; i < max; i++)
   5408             {
   5409             int len = 1;
   5410             if (eptr >= md->end_subject)
   5411               {
   5412               SCHECK_PARTIAL();
   5413               break;
   5414               }
   5415             GETCHARLENTEST(c, eptr, len);
   5416             if (prop_fail_result) break;
   5417             eptr+= len;
   5418             }
   5419           break;
   5420 
   5421           case PT_LAMP:
   5422           for (i = min; i < max; i++)
   5423             {
   5424             int chartype;
   5425             int len = 1;
   5426             if (eptr >= md->end_subject)
   5427               {
   5428               SCHECK_PARTIAL();
   5429               break;
   5430               }
   5431             GETCHARLENTEST(c, eptr, len);
   5432             chartype = UCD_CHARTYPE(c);
   5433             if ((chartype == ucp_Lu ||
   5434                  chartype == ucp_Ll ||
   5435                  chartype == ucp_Lt) == prop_fail_result)
   5436               break;
   5437             eptr+= len;
   5438             }
   5439           break;
   5440 
   5441           case PT_GC:
   5442           for (i = min; i < max; i++)
   5443             {
   5444             int len = 1;
   5445             if (eptr >= md->end_subject)
   5446               {
   5447               SCHECK_PARTIAL();
   5448               break;
   5449               }
   5450             GETCHARLENTEST(c, eptr, len);
   5451             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
   5452             eptr+= len;
   5453             }
   5454           break;
   5455 
   5456           case PT_PC:
   5457           for (i = min; i < max; i++)
   5458             {
   5459             int len = 1;
   5460             if (eptr >= md->end_subject)
   5461               {
   5462               SCHECK_PARTIAL();
   5463               break;
   5464               }
   5465             GETCHARLENTEST(c, eptr, len);
   5466             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
   5467             eptr+= len;
   5468             }
   5469           break;
   5470 
   5471           case PT_SC:
   5472           for (i = min; i < max; i++)
   5473             {
   5474             int len = 1;
   5475             if (eptr >= md->end_subject)
   5476               {
   5477               SCHECK_PARTIAL();
   5478               break;
   5479               }
   5480             GETCHARLENTEST(c, eptr, len);
   5481             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
   5482             eptr+= len;
   5483             }
   5484           break;
   5485 
   5486           case PT_ALNUM:
   5487           for (i = min; i < max; i++)
   5488             {
   5489             int category;
   5490             int len = 1;
   5491             if (eptr >= md->end_subject)
   5492               {
   5493               SCHECK_PARTIAL();
   5494               break;
   5495               }
   5496             GETCHARLENTEST(c, eptr, len);
   5497             category = UCD_CATEGORY(c);
   5498             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
   5499               break;
   5500             eptr+= len;
   5501             }
   5502           break;
   5503 
   5504           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   5505           which means that Perl space and POSIX space are now identical. PCRE
   5506           was changed at release 8.34. */
   5507 
   5508           case PT_SPACE:    /* Perl space */
   5509           case PT_PXSPACE:  /* POSIX space */
   5510           for (i = min; i < max; i++)
   5511             {
   5512             int len = 1;
   5513             if (eptr >= md->end_subject)
   5514               {
   5515               SCHECK_PARTIAL();
   5516               break;
   5517               }
   5518             GETCHARLENTEST(c, eptr, len);
   5519             switch(c)
   5520               {
   5521               HSPACE_CASES:
   5522               VSPACE_CASES:
   5523               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
   5524               break;
   5525 
   5526               default:
   5527               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
   5528                 goto ENDLOOP99;   /* Break the loop */
   5529               break;
   5530               }
   5531             eptr+= len;
   5532             }
   5533           ENDLOOP99:
   5534           break;
   5535 
   5536           case PT_WORD:
   5537           for (i = min; i < max; i++)
   5538             {
   5539             int category;
   5540             int len = 1;
   5541             if (eptr >= md->end_subject)
   5542               {
   5543               SCHECK_PARTIAL();
   5544               break;
   5545               }
   5546             GETCHARLENTEST(c, eptr, len);
   5547             category = UCD_CATEGORY(c);
   5548             if ((category == ucp_L || category == ucp_N ||
   5549                  c == CHAR_UNDERSCORE) == prop_fail_result)
   5550               break;
   5551             eptr+= len;
   5552             }
   5553           break;
   5554 
   5555           case PT_CLIST:
   5556           for (i = min; i < max; i++)
   5557             {
   5558             const pcre_uint32 *cp;
   5559             int len = 1;
   5560             if (eptr >= md->end_subject)
   5561               {
   5562               SCHECK_PARTIAL();
   5563               break;
   5564               }
   5565             GETCHARLENTEST(c, eptr, len);
   5566             cp = PRIV(ucd_caseless_sets) + prop_value;
   5567             for (;;)
   5568               {
   5569               if (c < *cp)
   5570                 { if (prop_fail_result) break; else goto GOT_MAX; }
   5571               if (c == *cp++)
   5572                 { if (prop_fail_result) goto GOT_MAX; else break; }
   5573               }
   5574             eptr += len;
   5575             }
   5576           GOT_MAX:
   5577           break;
   5578 
   5579           case PT_UCNC:
   5580           for (i = min; i < max; i++)
   5581             {
   5582             int len = 1;
   5583             if (eptr >= md->end_subject)
   5584               {
   5585               SCHECK_PARTIAL();
   5586               break;
   5587               }
   5588             GETCHARLENTEST(c, eptr, len);
   5589             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   5590                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   5591                  c >= 0xe000) == prop_fail_result)
   5592               break;
   5593             eptr += len;
   5594             }
   5595           break;
   5596 
   5597           default:
   5598           RRETURN(PCRE_ERROR_INTERNAL);
   5599           }
   5600 
   5601         /* eptr is now past the end of the maximum run */
   5602 
   5603         if (possessive) continue;    /* No backtracking */
   5604         for(;;)
   5605           {
   5606           if (eptr == pp) goto TAIL_RECURSE;
   5607           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
   5608           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5609           eptr--;
   5610           if (utf) BACKCHAR(eptr);
   5611           }
   5612         }
   5613 
   5614       /* Match extended Unicode grapheme clusters. We will get here only if the
   5615       support is in the binary; otherwise a compile-time error occurs. */
   5616 
   5617       else if (ctype == OP_EXTUNI)
   5618         {
   5619         for (i = min; i < max; i++)
   5620           {
   5621           if (eptr >= md->end_subject)
   5622             {
   5623             SCHECK_PARTIAL();
   5624             break;
   5625             }
   5626           else
   5627             {
   5628             int lgb, rgb;
   5629             GETCHARINCTEST(c, eptr);
   5630             lgb = UCD_GRAPHBREAK(c);
   5631             while (eptr < md->end_subject)
   5632               {
   5633               int len = 1;
   5634               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   5635               rgb = UCD_GRAPHBREAK(c);
   5636               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5637               lgb = rgb;
   5638               eptr += len;
   5639               }
   5640             }
   5641           CHECK_PARTIAL();
   5642           }
   5643 
   5644         /* eptr is now past the end of the maximum run */
   5645 
   5646         if (possessive) continue;    /* No backtracking */
   5647 
   5648         for(;;)
   5649           {
   5650           int lgb, rgb;
   5651           PCRE_PUCHAR fptr;
   5652 
   5653           if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
   5654           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
   5655           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5656 
   5657           /* Backtracking over an extended grapheme cluster involves inspecting
   5658           the previous two characters (if present) to see if a break is
   5659           permitted between them. */
   5660 
   5661           eptr--;
   5662           if (!utf) c = *eptr; else
   5663             {
   5664             BACKCHAR(eptr);
   5665             GETCHAR(c, eptr);
   5666             }
   5667           rgb = UCD_GRAPHBREAK(c);
   5668 
   5669           for (;;)
   5670             {
   5671             if (eptr == pp) goto TAIL_RECURSE;   /* At start of char run */
   5672             fptr = eptr - 1;
   5673             if (!utf) c = *fptr; else
   5674               {
   5675               BACKCHAR(fptr);
   5676               GETCHAR(c, fptr);
   5677               }
   5678             lgb = UCD_GRAPHBREAK(c);
   5679             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
   5680             eptr = fptr;
   5681             rgb = lgb;
   5682             }
   5683           }
   5684         }
   5685 
   5686       else
   5687 #endif   /* SUPPORT_UCP */
   5688 
   5689 #ifdef SUPPORT_UTF
   5690       if (utf)
   5691         {
   5692         switch(ctype)
   5693           {
   5694           case OP_ANY:
   5695           for (i = min; i < max; i++)
   5696             {
   5697             if (eptr >= md->end_subject)
   5698               {
   5699               SCHECK_PARTIAL();
   5700               break;
   5701               }
   5702             if (IS_NEWLINE(eptr)) break;
   5703             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5704                 eptr + 1 >= md->end_subject &&
   5705                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5706                 NLBLOCK->nllen == 2 &&
   5707                 UCHAR21(eptr) == NLBLOCK->nl[0])
   5708               {
   5709               md->hitend = TRUE;
   5710               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5711               }
   5712             eptr++;
   5713             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   5714             }
   5715           break;
   5716 
   5717           case OP_ALLANY:
   5718           if (max < INT_MAX)
   5719             {
   5720             for (i = min; i < max; i++)
   5721               {
   5722               if (eptr >= md->end_subject)
   5723                 {
   5724                 SCHECK_PARTIAL();
   5725                 break;
   5726                 }
   5727               eptr++;
   5728               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
   5729               }
   5730             }
   5731           else
   5732             {
   5733             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
   5734             SCHECK_PARTIAL();
   5735             }
   5736           break;
   5737 
   5738           /* The byte case is the same as non-UTF8 */
   5739 
   5740           case OP_ANYBYTE:
   5741           c = max - min;
   5742           if (c > (unsigned int)(md->end_subject - eptr))
   5743             {
   5744             eptr = md->end_subject;
   5745             SCHECK_PARTIAL();
   5746             }
   5747           else eptr += c;
   5748           break;
   5749 
   5750           case OP_ANYNL:
   5751           for (i = min; i < max; i++)
   5752             {
   5753             int len = 1;
   5754             if (eptr >= md->end_subject)
   5755               {
   5756               SCHECK_PARTIAL();
   5757               break;
   5758               }
   5759             GETCHARLEN(c, eptr, len);
   5760             if (c == CHAR_CR)
   5761               {
   5762               if (++eptr >= md->end_subject) break;
   5763               if (UCHAR21(eptr) == CHAR_LF) eptr++;
   5764               }
   5765             else
   5766               {
   5767               if (c != CHAR_LF &&
   5768                   (md->bsr_anycrlf ||
   5769                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
   5770 #ifndef EBCDIC
   5771                     && c != 0x2028 && c != 0x2029
   5772 #endif  /* Not EBCDIC */
   5773                     )))
   5774                 break;
   5775               eptr += len;
   5776               }
   5777             }
   5778           break;
   5779 
   5780           case OP_NOT_HSPACE:
   5781           case OP_HSPACE:
   5782           for (i = min; i < max; i++)
   5783             {
   5784             BOOL gotspace;
   5785             int len = 1;
   5786             if (eptr >= md->end_subject)
   5787               {
   5788               SCHECK_PARTIAL();
   5789               break;
   5790               }
   5791             GETCHARLEN(c, eptr, len);
   5792             switch(c)
   5793               {
   5794               HSPACE_CASES: gotspace = TRUE; break;
   5795               default: gotspace = FALSE; break;
   5796               }
   5797             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
   5798             eptr += len;
   5799             }
   5800           break;
   5801 
   5802           case OP_NOT_VSPACE:
   5803           case OP_VSPACE:
   5804           for (i = min; i < max; i++)
   5805             {
   5806             BOOL gotspace;
   5807             int len = 1;
   5808             if (eptr >= md->end_subject)
   5809               {
   5810               SCHECK_PARTIAL();
   5811               break;
   5812               }
   5813             GETCHARLEN(c, eptr, len);
   5814             switch(c)
   5815               {
   5816               VSPACE_CASES: gotspace = TRUE; break;
   5817               default: gotspace = FALSE; break;
   5818               }
   5819             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
   5820             eptr += len;
   5821             }
   5822           break;
   5823 
   5824           case OP_NOT_DIGIT:
   5825           for (i = min; i < max; i++)
   5826             {
   5827             int len = 1;
   5828             if (eptr >= md->end_subject)
   5829               {
   5830               SCHECK_PARTIAL();
   5831               break;
   5832               }
   5833             GETCHARLEN(c, eptr, len);
   5834             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
   5835             eptr+= len;
   5836             }
   5837           break;
   5838 
   5839           case OP_DIGIT:
   5840           for (i = min; i < max; i++)
   5841             {
   5842             int len = 1;
   5843             if (eptr >= md->end_subject)
   5844               {
   5845               SCHECK_PARTIAL();
   5846               break;
   5847               }
   5848             GETCHARLEN(c, eptr, len);
   5849             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
   5850             eptr+= len;
   5851             }
   5852           break;
   5853 
   5854           case OP_NOT_WHITESPACE:
   5855           for (i = min; i < max; i++)
   5856             {
   5857             int len = 1;
   5858             if (eptr >= md->end_subject)
   5859               {
   5860               SCHECK_PARTIAL();
   5861               break;
   5862               }
   5863             GETCHARLEN(c, eptr, len);
   5864             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
   5865             eptr+= len;
   5866             }
   5867           break;
   5868 
   5869           case OP_WHITESPACE:
   5870           for (i = min; i < max; i++)
   5871             {
   5872             int len = 1;
   5873             if (eptr >= md->end_subject)
   5874               {
   5875               SCHECK_PARTIAL();
   5876               break;
   5877               }
   5878             GETCHARLEN(c, eptr, len);
   5879             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
   5880             eptr+= len;
   5881             }
   5882           break;
   5883 
   5884           case OP_NOT_WORDCHAR:
   5885           for (i = min; i < max; i++)
   5886             {
   5887             int len = 1;
   5888             if (eptr >= md->end_subject)
   5889               {
   5890               SCHECK_PARTIAL();
   5891               break;
   5892               }
   5893             GETCHARLEN(c, eptr, len);
   5894             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
   5895             eptr+= len;
   5896             }
   5897           break;
   5898 
   5899           case OP_WORDCHAR:
   5900           for (i = min; i < max; i++)
   5901             {
   5902             int len = 1;
   5903             if (eptr >= md->end_subject)
   5904               {
   5905               SCHECK_PARTIAL();
   5906               break;
   5907               }
   5908             GETCHARLEN(c, eptr, len);
   5909             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
   5910             eptr+= len;
   5911             }
   5912           break;
   5913 
   5914           default:
   5915           RRETURN(PCRE_ERROR_INTERNAL);
   5916           }
   5917 
   5918         if (possessive) continue;    /* No backtracking */
   5919         for(;;)
   5920           {
   5921           if (eptr == pp) goto TAIL_RECURSE;
   5922           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
   5923           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5924           eptr--;
   5925           BACKCHAR(eptr);
   5926           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
   5927               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
   5928           }
   5929         }
   5930       else
   5931 #endif  /* SUPPORT_UTF */
   5932       /* Not UTF mode */
   5933         {
   5934         switch(ctype)
   5935           {
   5936           case OP_ANY:
   5937           for (i = min; i < max; i++)
   5938             {
   5939             if (eptr >= md->end_subject)
   5940               {
   5941               SCHECK_PARTIAL();
   5942               break;
   5943               }
   5944             if (IS_NEWLINE(eptr)) break;
   5945             if (md->partial != 0 &&    /* Take care with CRLF partial */
   5946                 eptr + 1 >= md->end_subject &&
   5947                 NLBLOCK->nltype == NLTYPE_FIXED &&
   5948                 NLBLOCK->nllen == 2 &&
   5949                 *eptr == NLBLOCK->nl[0])
   5950               {
   5951               md->hitend = TRUE;
   5952               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
   5953               }
   5954             eptr++;
   5955             }
   5956           break;
   5957 
   5958           case OP_ALLANY:
   5959           case OP_ANYBYTE:
   5960           c = max - min;
   5961           if (c > (unsigned int)(md->end_subject - eptr))
   5962             {
   5963             eptr = md->end_subject;
   5964             SCHECK_PARTIAL();
   5965             }
   5966           else eptr += c;
   5967           break;
   5968 
   5969           case OP_ANYNL:
   5970           for (i = min; i < max; i++)
   5971             {
   5972             if (eptr >= md->end_subject)
   5973               {
   5974               SCHECK_PARTIAL();
   5975               break;
   5976               }
   5977             c = *eptr;
   5978             if (c == CHAR_CR)
   5979               {
   5980               if (++eptr >= md->end_subject) break;
   5981               if (*eptr == CHAR_LF) eptr++;
   5982               }
   5983             else
   5984               {
   5985               if (c != CHAR_LF && (md->bsr_anycrlf ||
   5986                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
   5987 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   5988                  && c != 0x2028 && c != 0x2029
   5989 #endif
   5990                  ))) break;
   5991               eptr++;
   5992               }
   5993             }
   5994           break;
   5995 
   5996           case OP_NOT_HSPACE:
   5997           for (i = min; i < max; i++)
   5998             {
   5999             if (eptr >= md->end_subject)
   6000               {
   6001               SCHECK_PARTIAL();
   6002               break;
   6003               }
   6004             switch(*eptr)
   6005               {
   6006               default: eptr++; break;
   6007               HSPACE_BYTE_CASES:
   6008 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6009               HSPACE_MULTIBYTE_CASES:
   6010 #endif
   6011               goto ENDLOOP00;
   6012               }
   6013             }
   6014           ENDLOOP00:
   6015           break;
   6016 
   6017           case OP_HSPACE:
   6018           for (i = min; i < max; i++)
   6019             {
   6020             if (eptr >= md->end_subject)
   6021               {
   6022               SCHECK_PARTIAL();
   6023               break;
   6024               }
   6025             switch(*eptr)
   6026               {
   6027               default: goto ENDLOOP01;
   6028               HSPACE_BYTE_CASES:
   6029 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6030               HSPACE_MULTIBYTE_CASES:
   6031 #endif
   6032               eptr++; break;
   6033               }
   6034             }
   6035           ENDLOOP01:
   6036           break;
   6037 
   6038           case OP_NOT_VSPACE:
   6039           for (i = min; i < max; i++)
   6040             {
   6041             if (eptr >= md->end_subject)
   6042               {
   6043               SCHECK_PARTIAL();
   6044               break;
   6045               }
   6046             switch(*eptr)
   6047               {
   6048               default: eptr++; break;
   6049               VSPACE_BYTE_CASES:
   6050 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6051               VSPACE_MULTIBYTE_CASES:
   6052 #endif
   6053               goto ENDLOOP02;
   6054               }
   6055             }
   6056           ENDLOOP02:
   6057           break;
   6058 
   6059           case OP_VSPACE:
   6060           for (i = min; i < max; i++)
   6061             {
   6062             if (eptr >= md->end_subject)
   6063               {
   6064               SCHECK_PARTIAL();
   6065               break;
   6066               }
   6067             switch(*eptr)
   6068               {
   6069               default: goto ENDLOOP03;
   6070               VSPACE_BYTE_CASES:
   6071 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
   6072               VSPACE_MULTIBYTE_CASES:
   6073 #endif
   6074               eptr++; break;
   6075               }
   6076             }
   6077           ENDLOOP03:
   6078           break;
   6079 
   6080           case OP_NOT_DIGIT:
   6081           for (i = min; i < max; i++)
   6082             {
   6083             if (eptr >= md->end_subject)
   6084               {
   6085               SCHECK_PARTIAL();
   6086               break;
   6087               }
   6088             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
   6089             eptr++;
   6090             }
   6091           break;
   6092 
   6093           case OP_DIGIT:
   6094           for (i = min; i < max; i++)
   6095             {
   6096             if (eptr >= md->end_subject)
   6097               {
   6098               SCHECK_PARTIAL();
   6099               break;
   6100               }
   6101             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
   6102             eptr++;
   6103             }
   6104           break;
   6105 
   6106           case OP_NOT_WHITESPACE:
   6107           for (i = min; i < max; i++)
   6108             {
   6109             if (eptr >= md->end_subject)
   6110               {
   6111               SCHECK_PARTIAL();
   6112               break;
   6113               }
   6114             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
   6115             eptr++;
   6116             }
   6117           break;
   6118 
   6119           case OP_WHITESPACE:
   6120           for (i = min; i < max; i++)
   6121             {
   6122             if (eptr >= md->end_subject)
   6123               {
   6124               SCHECK_PARTIAL();
   6125               break;
   6126               }
   6127             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
   6128             eptr++;
   6129             }
   6130           break;
   6131 
   6132           case OP_NOT_WORDCHAR:
   6133           for (i = min; i < max; i++)
   6134             {
   6135             if (eptr >= md->end_subject)
   6136               {
   6137               SCHECK_PARTIAL();
   6138               break;
   6139               }
   6140             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
   6141             eptr++;
   6142             }
   6143           break;
   6144 
   6145           case OP_WORDCHAR:
   6146           for (i = min; i < max; i++)
   6147             {
   6148             if (eptr >= md->end_subject)
   6149               {
   6150               SCHECK_PARTIAL();
   6151               break;
   6152               }
   6153             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
   6154             eptr++;
   6155             }
   6156           break;
   6157 
   6158           default:
   6159           RRETURN(PCRE_ERROR_INTERNAL);
   6160           }
   6161 
   6162         if (possessive) continue;    /* No backtracking */
   6163         for (;;)
   6164           {
   6165           if (eptr == pp) goto TAIL_RECURSE;
   6166           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
   6167           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   6168           eptr--;
   6169           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
   6170               eptr[-1] == CHAR_CR) eptr--;
   6171           }
   6172         }
   6173 
   6174       /* Control never gets here */
   6175       }
   6176 
   6177     /* There's been some horrible disaster. Arrival here can only mean there is
   6178     something seriously wrong in the code above or the OP_xxx definitions. */
   6179 
   6180     default:
   6181     DPRINTF(("Unknown opcode %d\n", *ecode));
   6182     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
   6183     }
   6184 
   6185   /* Do not stick any code in here without much thought; it is assumed
   6186   that "continue" in the code above comes out to here to repeat the main
   6187   loop. */
   6188 
   6189   }             /* End of main loop */
   6190 /* Control never reaches here */
   6191 
   6192 
   6193 /* When compiling to use the heap rather than the stack for recursive calls to
   6194 match(), the RRETURN() macro jumps here. The number that is saved in
   6195 frame->Xwhere indicates which label we actually want to return to. */
   6196 
   6197 #ifdef NO_RECURSE
   6198 #define LBL(val) case val: goto L_RM##val;
   6199 HEAP_RETURN:
   6200 switch (frame->Xwhere)
   6201   {
   6202   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
   6203   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
   6204   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
   6205   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
   6206   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
   6207   LBL(65) LBL(66)
   6208 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   6209   LBL(20) LBL(21)
   6210 #endif
   6211 #ifdef SUPPORT_UTF
   6212   LBL(16) LBL(18)
   6213   LBL(22) LBL(23) LBL(28) LBL(30)
   6214   LBL(32) LBL(34) LBL(42) LBL(46)
   6215 #ifdef SUPPORT_UCP
   6216   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
   6217   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
   6218 #endif  /* SUPPORT_UCP */
   6219 #endif  /* SUPPORT_UTF */
   6220   default:
   6221   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   6222   return PCRE_ERROR_INTERNAL;
   6223   }
   6224 #undef LBL
   6225 #endif  /* NO_RECURSE */
   6226 }
   6227 
   6228 
   6229 /***************************************************************************
   6230 ****************************************************************************
   6231                    RECURSION IN THE match() FUNCTION
   6232 
   6233 Undefine all the macros that were defined above to handle this. */
   6234 
   6235 #ifdef NO_RECURSE
   6236 #undef eptr
   6237 #undef ecode
   6238 #undef mstart
   6239 #undef offset_top
   6240 #undef eptrb
   6241 #undef flags
   6242 
   6243 #undef callpat
   6244 #undef charptr
   6245 #undef data
   6246 #undef next
   6247 #undef pp
   6248 #undef prev
   6249 #undef saved_eptr
   6250 
   6251 #undef new_recursive
   6252 
   6253 #undef cur_is_word
   6254 #undef condition
   6255 #undef prev_is_word
   6256 
   6257 #undef ctype
   6258 #undef length
   6259 #undef max
   6260 #undef min
   6261 #undef number
   6262 #undef offset
   6263 #undef op
   6264 #undef save_capture_last
   6265 #undef save_offset1
   6266 #undef save_offset2
   6267 #undef save_offset3
   6268 #undef stacksave
   6269 
   6270 #undef newptrb
   6271 
   6272 #endif
   6273 
   6274 /* These two are defined as macros in both cases */
   6275 
   6276 #undef fc
   6277 #undef fi
   6278 
   6279 /***************************************************************************
   6280 ***************************************************************************/
   6281 
   6282 
   6283 #ifdef NO_RECURSE
   6284 /*************************************************
   6285 *          Release allocated heap frames         *
   6286 *************************************************/
   6287 
   6288 /* This function releases all the allocated frames. The base frame is on the
   6289 machine stack, and so must not be freed.
   6290 
   6291 Argument: the address of the base frame
   6292 Returns:  nothing
   6293 */
   6294 
   6295 static void
   6296 release_match_heapframes (heapframe *frame_base)
   6297 {
   6298 heapframe *nextframe = frame_base->Xnextframe;
   6299 while (nextframe != NULL)
   6300   {
   6301   heapframe *oldframe = nextframe;
   6302   nextframe = nextframe->Xnextframe;
   6303   (PUBL(stack_free))(oldframe);
   6304   }
   6305 }
   6306 #endif
   6307 
   6308 
   6309 /*************************************************
   6310 *         Execute a Regular Expression           *
   6311 *************************************************/
   6312 
   6313 /* This function applies a compiled re to a subject string and picks out
   6314 portions of the string if it matches. Two elements in the vector are set for
   6315 each substring: the offsets to the start and end of the substring.
   6316 
   6317 Arguments:
   6318   argument_re     points to the compiled expression
   6319   extra_data      points to extra data or is NULL
   6320   subject         points to the subject string
   6321   length          length of subject string (may contain binary zeros)
   6322   start_offset    where to start in the subject string
   6323   options         option bits
   6324   offsets         points to a vector of ints to be filled in with offsets
   6325   offsetcount     the number of elements in the vector
   6326 
   6327 Returns:          > 0 => success; value is the number of elements filled in
   6328                   = 0 => success, but offsets is not big enough
   6329                    -1 => failed to match
   6330                  < -1 => some kind of unexpected problem
   6331 */
   6332 
   6333 #if defined COMPILE_PCRE8
   6334 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   6335 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
   6336   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
   6337   int offsetcount)
   6338 #elif defined COMPILE_PCRE16
   6339 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   6340 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
   6341   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
   6342   int offsetcount)
   6343 #elif defined COMPILE_PCRE32
   6344 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   6345 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
   6346   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
   6347   int offsetcount)
   6348 #endif
   6349 {
   6350 int rc, ocount, arg_offset_max;
   6351 int newline;
   6352 BOOL using_temporary_offsets = FALSE;
   6353 BOOL anchored;
   6354 BOOL startline;
   6355 BOOL firstline;
   6356 BOOL utf;
   6357 BOOL has_first_char = FALSE;
   6358 BOOL has_req_char = FALSE;
   6359 pcre_uchar first_char = 0;
   6360 pcre_uchar first_char2 = 0;
   6361 pcre_uchar req_char = 0;
   6362 pcre_uchar req_char2 = 0;
   6363 match_data match_block;
   6364 match_data *md = &match_block;
   6365 const pcre_uint8 *tables;
   6366 const pcre_uint8 *start_bits = NULL;
   6367 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
   6368 PCRE_PUCHAR end_subject;
   6369 PCRE_PUCHAR start_partial = NULL;
   6370 PCRE_PUCHAR match_partial = NULL;
   6371 PCRE_PUCHAR req_char_ptr = start_match - 1;
   6372 
   6373 const pcre_study_data *study;
   6374 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
   6375 
   6376 #ifdef NO_RECURSE
   6377 heapframe frame_zero;
   6378 frame_zero.Xprevframe = NULL;            /* Marks the top level */
   6379 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
   6380 md->match_frames_base = &frame_zero;
   6381 #endif
   6382 
   6383 /* Check for the special magic call that measures the size of the stack used
   6384 per recursive call of match(). Without the funny casting for sizeof, a Windows
   6385 compiler gave this error: "unary minus operator applied to unsigned type,
   6386 result still unsigned". Hopefully the cast fixes that. */
   6387 
   6388 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
   6389     start_offset == -999)
   6390 #ifdef NO_RECURSE
   6391   return -((int)sizeof(heapframe));
   6392 #else
   6393   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
   6394 #endif
   6395 
   6396 /* Plausibility checks */
   6397 
   6398 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
   6399 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
   6400   return PCRE_ERROR_NULL;
   6401 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
   6402 if (length < 0) return PCRE_ERROR_BADLENGTH;
   6403 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
   6404 
   6405 /* Check that the first field in the block is the magic number. If it is not,
   6406 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
   6407 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
   6408 means that the pattern is likely compiled with different endianness. */
   6409 
   6410 if (re->magic_number != MAGIC_NUMBER)
   6411   return re->magic_number == REVERSED_MAGIC_NUMBER?
   6412     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
   6413 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   6414 
   6415 /* These two settings are used in the code for checking a UTF-8 string that
   6416 follows immediately afterwards. Other values in the md block are used only
   6417 during "normal" pcre_exec() processing, not when the JIT support is in use,
   6418 so they are set up later. */
   6419 
   6420 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
   6421 utf = md->utf = (re->options & PCRE_UTF8) != 0;
   6422 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
   6423               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
   6424 
   6425 /* Check a UTF-8 string if required. Pass back the character offset and error
   6426 code for an invalid string if a results vector is available. */
   6427 
   6428 #ifdef SUPPORT_UTF
   6429 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
   6430   {
   6431   int erroroffset;
   6432   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
   6433   if (errorcode != 0)
   6434     {
   6435     if (offsetcount >= 2)
   6436       {
   6437       offsets[0] = erroroffset;
   6438       offsets[1] = errorcode;
   6439       }
   6440 #if defined COMPILE_PCRE8
   6441     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
   6442       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
   6443 #elif defined COMPILE_PCRE16
   6444     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
   6445       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
   6446 #elif defined COMPILE_PCRE32
   6447     return PCRE_ERROR_BADUTF32;
   6448 #endif
   6449     }
   6450 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
   6451   /* Check that a start_offset points to the start of a UTF character. */
   6452   if (start_offset > 0 && start_offset < length &&
   6453       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
   6454     return PCRE_ERROR_BADUTF8_OFFSET;
   6455 #endif
   6456   }
   6457 #endif
   6458 
   6459 /* If the pattern was successfully studied with JIT support, run the JIT
   6460 executable instead of the rest of this function. Most options must be set at
   6461 compile time for the JIT code to be usable. Fallback to the normal code path if
   6462 an unsupported flag is set. */
   6463 
   6464 #ifdef SUPPORT_JIT
   6465 if (extra_data != NULL
   6466     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
   6467                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
   6468     && extra_data->executable_jit != NULL
   6469     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
   6470   {
   6471   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
   6472        start_offset, options, offsets, offsetcount);
   6473 
   6474   /* PCRE_ERROR_NULL means that the selected normal or partial matching
   6475   mode is not compiled. In this case we simply fallback to interpreter. */
   6476 
   6477   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
   6478   }
   6479 #endif
   6480 
   6481 /* Carry on with non-JIT matching. This information is for finding all the
   6482 numbers associated with a given name, for condition testing. */
   6483 
   6484 md->name_table = (pcre_uchar *)re + re->name_table_offset;
   6485 md->name_count = re->name_count;
   6486 md->name_entry_size = re->name_entry_size;
   6487 
   6488 /* Fish out the optional data from the extra_data structure, first setting
   6489 the default values. */
   6490 
   6491 study = NULL;
   6492 md->match_limit = MATCH_LIMIT;
   6493 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
   6494 md->callout_data = NULL;
   6495 
   6496 /* The table pointer is always in native byte order. */
   6497 
   6498 tables = re->tables;
   6499 
   6500 /* The two limit values override the defaults, whatever their value. */
   6501 
   6502 if (extra_data != NULL)
   6503   {
   6504   unsigned long int flags = extra_data->flags;
   6505   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
   6506     study = (const pcre_study_data *)extra_data->study_data;
   6507   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
   6508     md->match_limit = extra_data->match_limit;
   6509   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
   6510     md->match_limit_recursion = extra_data->match_limit_recursion;
   6511   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
   6512     md->callout_data = extra_data->callout_data;
   6513   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
   6514   }
   6515 
   6516 /* Limits in the regex override only if they are smaller. */
   6517 
   6518 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
   6519   md->match_limit = re->limit_match;
   6520 
   6521 if ((re->flags & PCRE_RLSET) != 0 &&
   6522     re->limit_recursion < md->match_limit_recursion)
   6523   md->match_limit_recursion = re->limit_recursion;
   6524 
   6525 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
   6526 is a feature that makes it possible to save compiled regex and re-use them
   6527 in other programs later. */
   6528 
   6529 if (tables == NULL) tables = PRIV(default_tables);
   6530 
   6531 /* Set up other data */
   6532 
   6533 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
   6534 startline = (re->flags & PCRE_STARTLINE) != 0;
   6535 firstline = (re->options & PCRE_FIRSTLINE) != 0;
   6536 
   6537 /* The code starts after the real_pcre block and the capture name table. */
   6538 
   6539 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
   6540   re->name_count * re->name_entry_size;
   6541 
   6542 md->start_subject = (PCRE_PUCHAR)subject;
   6543 md->start_offset = start_offset;
   6544 md->end_subject = md->start_subject + length;
   6545 end_subject = md->end_subject;
   6546 
   6547 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
   6548 md->use_ucp = (re->options & PCRE_UCP) != 0;
   6549 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
   6550 md->ignore_skip_arg = 0;
   6551 
   6552 /* Some options are unpacked into BOOL variables in the hope that testing
   6553 them will be faster than individual option bits. */
   6554 
   6555 md->notbol = (options & PCRE_NOTBOL) != 0;
   6556 md->noteol = (options & PCRE_NOTEOL) != 0;
   6557 md->notempty = (options & PCRE_NOTEMPTY) != 0;
   6558 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
   6559 
   6560 md->hitend = FALSE;
   6561 md->mark = md->nomatch_mark = NULL;     /* In case never set */
   6562 
   6563 md->recursive = NULL;                   /* No recursion at top level */
   6564 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
   6565 
   6566 md->lcc = tables + lcc_offset;
   6567 md->fcc = tables + fcc_offset;
   6568 md->ctypes = tables + ctypes_offset;
   6569 
   6570 /* Handle different \R options. */
   6571 
   6572 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
   6573   {
   6574   case 0:
   6575   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
   6576     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
   6577   else
   6578 #ifdef BSR_ANYCRLF
   6579   md->bsr_anycrlf = TRUE;
   6580 #else
   6581   md->bsr_anycrlf = FALSE;
   6582 #endif
   6583   break;
   6584 
   6585   case PCRE_BSR_ANYCRLF:
   6586   md->bsr_anycrlf = TRUE;
   6587   break;
   6588 
   6589   case PCRE_BSR_UNICODE:
   6590   md->bsr_anycrlf = FALSE;
   6591   break;
   6592 
   6593   default: return PCRE_ERROR_BADNEWLINE;
   6594   }
   6595 
   6596 /* Handle different types of newline. The three bits give eight cases. If
   6597 nothing is set at run time, whatever was used at compile time applies. */
   6598 
   6599 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
   6600         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
   6601   {
   6602   case 0: newline = NEWLINE; break;   /* Compile-time default */
   6603   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
   6604   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
   6605   case PCRE_NEWLINE_CR+
   6606        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
   6607   case PCRE_NEWLINE_ANY: newline = -1; break;
   6608   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
   6609   default: return PCRE_ERROR_BADNEWLINE;
   6610   }
   6611 
   6612 if (newline == -2)
   6613   {
   6614   md->nltype = NLTYPE_ANYCRLF;
   6615   }
   6616 else if (newline < 0)
   6617   {
   6618   md->nltype = NLTYPE_ANY;
   6619   }
   6620 else
   6621   {
   6622   md->nltype = NLTYPE_FIXED;
   6623   if (newline > 255)
   6624     {
   6625     md->nllen = 2;
   6626     md->nl[0] = (newline >> 8) & 255;
   6627     md->nl[1] = newline & 255;
   6628     }
   6629   else
   6630     {
   6631     md->nllen = 1;
   6632     md->nl[0] = newline;
   6633     }
   6634   }
   6635 
   6636 /* Partial matching was originally supported only for a restricted set of
   6637 regexes; from release 8.00 there are no restrictions, but the bits are still
   6638 defined (though never set). So there's no harm in leaving this code. */
   6639 
   6640 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
   6641   return PCRE_ERROR_BADPARTIAL;
   6642 
   6643 /* If the expression has got more back references than the offsets supplied can
   6644 hold, we get a temporary chunk of working store to use during the matching.
   6645 Otherwise, we can use the vector supplied, rounding down its size to a multiple
   6646 of 3. */
   6647 
   6648 ocount = offsetcount - (offsetcount % 3);
   6649 arg_offset_max = (2*ocount)/3;
   6650 
   6651 if (re->top_backref > 0 && re->top_backref >= ocount/3)
   6652   {
   6653   ocount = re->top_backref * 3 + 3;
   6654   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
   6655   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
   6656   using_temporary_offsets = TRUE;
   6657   DPRINTF(("Got memory to hold back references\n"));
   6658   }
   6659 else md->offset_vector = offsets;
   6660 md->offset_end = ocount;
   6661 md->offset_max = (2*ocount)/3;
   6662 md->capture_last = 0;
   6663 
   6664 /* Reset the working variable associated with each extraction. These should
   6665 never be used unless previously set, but they get saved and restored, and so we
   6666 initialize them to avoid reading uninitialized locations. Also, unset the
   6667 offsets for the matched string. This is really just for tidiness with callouts,
   6668 in case they inspect these fields. */
   6669 
   6670 if (md->offset_vector != NULL)
   6671   {
   6672   register int *iptr = md->offset_vector + ocount;
   6673   register int *iend = iptr - re->top_bracket;
   6674   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
   6675   while (--iptr >= iend) *iptr = -1;
   6676   md->offset_vector[0] = md->offset_vector[1] = -1;
   6677   }
   6678 
   6679 /* Set up the first character to match, if available. The first_char value is
   6680 never set for an anchored regular expression, but the anchoring may be forced
   6681 at run time, so we have to test for anchoring. The first char may be unset for
   6682 an unanchored pattern, of course. If there's no first char and the pattern was
   6683 studied, there may be a bitmap of possible first characters. */
   6684 
   6685 if (!anchored)
   6686   {
   6687   if ((re->flags & PCRE_FIRSTSET) != 0)
   6688     {
   6689     has_first_char = TRUE;
   6690     first_char = first_char2 = (pcre_uchar)(re->first_char);
   6691     if ((re->flags & PCRE_FCH_CASELESS) != 0)
   6692       {
   6693       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
   6694 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
   6695       if (utf && first_char > 127)
   6696         first_char2 = UCD_OTHERCASE(first_char);
   6697 #endif
   6698       }
   6699     }
   6700   else
   6701     if (!startline && study != NULL &&
   6702       (study->flags & PCRE_STUDY_MAPPED) != 0)
   6703         start_bits = study->start_bits;
   6704   }
   6705 
   6706 /* For anchored or unanchored matches, there may be a "last known required
   6707 character" set. */
   6708 
   6709 if ((re->flags & PCRE_REQCHSET) != 0)
   6710   {
   6711   has_req_char = TRUE;
   6712   req_char = req_char2 = (pcre_uchar)(re->req_char);
   6713   if ((re->flags & PCRE_RCH_CASELESS) != 0)
   6714     {
   6715     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
   6716 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
   6717     if (utf && req_char > 127)
   6718       req_char2 = UCD_OTHERCASE(req_char);
   6719 #endif
   6720     }
   6721   }
   6722 
   6723 
   6724 /* ==========================================================================*/
   6725 
   6726 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
   6727 the loop runs just once. */
   6728 
   6729 for(;;)
   6730   {
   6731   PCRE_PUCHAR save_end_subject = end_subject;
   6732   PCRE_PUCHAR new_start_match;
   6733 
   6734   /* If firstline is TRUE, the start of the match is constrained to the first
   6735   line of a multiline string. That is, the match must be before or at the first
   6736   newline. Implement this by temporarily adjusting end_subject so that we stop
   6737   scanning at a newline. If the match fails at the newline, later code breaks
   6738   this loop. */
   6739 
   6740   if (firstline)
   6741     {
   6742     PCRE_PUCHAR t = start_match;
   6743 #ifdef SUPPORT_UTF
   6744     if (utf)
   6745       {
   6746       while (t < md->end_subject && !IS_NEWLINE(t))
   6747         {
   6748         t++;
   6749         ACROSSCHAR(t < end_subject, *t, t++);
   6750         }
   6751       }
   6752     else
   6753 #endif
   6754     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
   6755     end_subject = t;
   6756     }
   6757 
   6758   /* There are some optimizations that avoid running the match if a known
   6759   starting point is not found, or if a known later character is not present.
   6760   However, there is an option that disables these, for testing and for ensuring
   6761   that all callouts do actually occur. The option can be set in the regex by
   6762   (*NO_START_OPT) or passed in match-time options. */
   6763 
   6764   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
   6765     {
   6766     /* Advance to a unique first char if there is one. */
   6767 
   6768     if (has_first_char)
   6769       {
   6770       pcre_uchar smc;
   6771 
   6772       if (first_char != first_char2)
   6773         while (start_match < end_subject &&
   6774           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
   6775           start_match++;
   6776       else
   6777         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
   6778           start_match++;
   6779       }
   6780 
   6781     /* Or to just after a linebreak for a multiline match */
   6782 
   6783     else if (startline)
   6784       {
   6785       if (start_match > md->start_subject + start_offset)
   6786         {
   6787 #ifdef SUPPORT_UTF
   6788         if (utf)
   6789           {
   6790           while (start_match < end_subject && !WAS_NEWLINE(start_match))
   6791             {
   6792             start_match++;
   6793             ACROSSCHAR(start_match < end_subject, *start_match,
   6794               start_match++);
   6795             }
   6796           }
   6797         else
   6798 #endif
   6799         while (start_match < end_subject && !WAS_NEWLINE(start_match))
   6800           start_match++;
   6801 
   6802         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
   6803         and we are now at a LF, advance the match position by one more character.
   6804         */
   6805 
   6806         if (start_match[-1] == CHAR_CR &&
   6807              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
   6808              start_match < end_subject &&
   6809              UCHAR21TEST(start_match) == CHAR_NL)
   6810           start_match++;
   6811         }
   6812       }
   6813 
   6814     /* Or to a non-unique first byte after study */
   6815 
   6816     else if (start_bits != NULL)
   6817       {
   6818       while (start_match < end_subject)
   6819         {
   6820         register pcre_uint32 c = UCHAR21TEST(start_match);
   6821 #ifndef COMPILE_PCRE8
   6822         if (c > 255) c = 255;
   6823 #endif
   6824         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
   6825         start_match++;
   6826         }
   6827       }
   6828     }   /* Starting optimizations */
   6829 
   6830   /* Restore fudged end_subject */
   6831 
   6832   end_subject = save_end_subject;
   6833 
   6834   /* The following two optimizations are disabled for partial matching or if
   6835   disabling is explicitly requested. */
   6836 
   6837   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
   6838     {
   6839     /* If the pattern was studied, a minimum subject length may be set. This is
   6840     a lower bound; no actual string of that length may actually match the
   6841     pattern. Although the value is, strictly, in characters, we treat it as
   6842     bytes to avoid spending too much time in this optimization. */
   6843 
   6844     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
   6845         (pcre_uint32)(end_subject - start_match) < study->minlength)
   6846       {
   6847       rc = MATCH_NOMATCH;
   6848       break;
   6849       }
   6850 
   6851     /* If req_char is set, we know that that character must appear in the
   6852     subject for the match to succeed. If the first character is set, req_char
   6853     must be later in the subject; otherwise the test starts at the match point.
   6854     This optimization can save a huge amount of backtracking in patterns with
   6855     nested unlimited repeats that aren't going to match. Writing separate code
   6856     for cased/caseless versions makes it go faster, as does using an
   6857     autoincrement and backing off on a match.
   6858 
   6859     HOWEVER: when the subject string is very, very long, searching to its end
   6860     can take a long time, and give bad performance on quite ordinary patterns.
   6861     This showed up when somebody was matching something like /^\d+C/ on a
   6862     32-megabyte string... so we don't do this when the string is sufficiently
   6863     long. */
   6864 
   6865     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
   6866       {
   6867       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
   6868 
   6869       /* We don't need to repeat the search if we haven't yet reached the
   6870       place we found it at last time. */
   6871 
   6872       if (p > req_char_ptr)
   6873         {
   6874         if (req_char != req_char2)
   6875           {
   6876           while (p < end_subject)
   6877             {
   6878             register pcre_uint32 pp = UCHAR21INCTEST(p);
   6879             if (pp == req_char || pp == req_char2) { p--; break; }
   6880             }
   6881           }
   6882         else
   6883           {
   6884           while (p < end_subject)
   6885             {
   6886             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
   6887             }
   6888           }
   6889 
   6890         /* If we can't find the required character, break the matching loop,
   6891         forcing a match failure. */
   6892 
   6893         if (p >= end_subject)
   6894           {
   6895           rc = MATCH_NOMATCH;
   6896           break;
   6897           }
   6898 
   6899         /* If we have found the required character, save the point where we
   6900         found it, so that we don't search again next time round the loop if
   6901         the start hasn't passed this character yet. */
   6902 
   6903         req_char_ptr = p;
   6904         }
   6905       }
   6906     }
   6907 
   6908 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
   6909   printf(">>>> Match against: ");
   6910   pchars(start_match, end_subject - start_match, TRUE, md);
   6911   printf("\n");
   6912 #endif
   6913 
   6914   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
   6915   first starting point for which a partial match was found. */
   6916 
   6917   md->start_match_ptr = start_match;
   6918   md->start_used_ptr = start_match;
   6919   md->match_call_count = 0;
   6920   md->match_function_type = 0;
   6921   md->end_offset_top = 0;
   6922   md->skip_arg_count = 0;
   6923   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
   6924   if (md->hitend && start_partial == NULL)
   6925     {
   6926     start_partial = md->start_used_ptr;
   6927     match_partial = start_match;
   6928     }
   6929 
   6930   switch(rc)
   6931     {
   6932     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
   6933     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
   6934     entirely. The only way we can do that is to re-do the match at the same
   6935     point, with a flag to force SKIP with an argument to be ignored. Just
   6936     treating this case as NOMATCH does not work because it does not check other
   6937     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
   6938 
   6939     case MATCH_SKIP_ARG:
   6940     new_start_match = start_match;
   6941     md->ignore_skip_arg = md->skip_arg_count;
   6942     break;
   6943 
   6944     /* SKIP passes back the next starting point explicitly, but if it is no
   6945     greater than the match we have just done, treat it as NOMATCH. */
   6946 
   6947     case MATCH_SKIP:
   6948     if (md->start_match_ptr > start_match)
   6949       {
   6950       new_start_match = md->start_match_ptr;
   6951       break;
   6952       }
   6953     /* Fall through */
   6954 
   6955     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
   6956     exactly like PRUNE. Unset ignore SKIP-with-argument. */
   6957 
   6958     case MATCH_NOMATCH:
   6959     case MATCH_PRUNE:
   6960     case MATCH_THEN:
   6961     md->ignore_skip_arg = 0;
   6962     new_start_match = start_match + 1;
   6963 #ifdef SUPPORT_UTF
   6964     if (utf)
   6965       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
   6966         new_start_match++);
   6967 #endif
   6968     break;
   6969 
   6970     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
   6971 
   6972     case MATCH_COMMIT:
   6973     rc = MATCH_NOMATCH;
   6974     goto ENDLOOP;
   6975 
   6976     /* Any other return is either a match, or some kind of error. */
   6977 
   6978     default:
   6979     goto ENDLOOP;
   6980     }
   6981 
   6982   /* Control reaches here for the various types of "no match at this point"
   6983   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
   6984 
   6985   rc = MATCH_NOMATCH;
   6986 
   6987   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
   6988   newline in the subject (though it may continue over the newline). Therefore,
   6989   if we have just failed to match, starting at a newline, do not continue. */
   6990 
   6991   if (firstline && IS_NEWLINE(start_match)) break;
   6992 
   6993   /* Advance to new matching position */
   6994 
   6995   start_match = new_start_match;
   6996 
   6997   /* Break the loop if the pattern is anchored or if we have passed the end of
   6998   the subject. */
   6999 
   7000   if (anchored || start_match > end_subject) break;
   7001 
   7002   /* If we have just passed a CR and we are now at a LF, and the pattern does
   7003   not contain any explicit matches for \r or \n, and the newline option is CRLF
   7004   or ANY or ANYCRLF, advance the match position by one more character. In
   7005   normal matching start_match will aways be greater than the first position at
   7006   this stage, but a failed *SKIP can cause a return at the same point, which is
   7007   why the first test exists. */
   7008 
   7009   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
   7010       start_match[-1] == CHAR_CR &&
   7011       start_match < end_subject &&
   7012       *start_match == CHAR_NL &&
   7013       (re->flags & PCRE_HASCRORLF) == 0 &&
   7014         (md->nltype == NLTYPE_ANY ||
   7015          md->nltype == NLTYPE_ANYCRLF ||
   7016          md->nllen == 2))
   7017     start_match++;
   7018 
   7019   md->mark = NULL;   /* Reset for start of next match attempt */
   7020   }                  /* End of for(;;) "bumpalong" loop */
   7021 
   7022 /* ==========================================================================*/
   7023 
   7024 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
   7025 conditions is true:
   7026 
   7027 (1) The pattern is anchored or the match was failed by (*COMMIT);
   7028 
   7029 (2) We are past the end of the subject;
   7030 
   7031 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
   7032     this option requests that a match occur at or before the first newline in
   7033     the subject.
   7034 
   7035 When we have a match and the offset vector is big enough to deal with any
   7036 backreferences, captured substring offsets will already be set up. In the case
   7037 where we had to get some local store to hold offsets for backreference
   7038 processing, copy those that we can. In this case there need not be overflow if
   7039 certain parts of the pattern were not used, even though there are more
   7040 capturing parentheses than vector slots. */
   7041 
   7042 ENDLOOP:
   7043 
   7044 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   7045   {
   7046   if (using_temporary_offsets)
   7047     {
   7048     if (arg_offset_max >= 4)
   7049       {
   7050       memcpy(offsets + 2, md->offset_vector + 2,
   7051         (arg_offset_max - 2) * sizeof(int));
   7052       DPRINTF(("Copied offsets from temporary memory\n"));
   7053       }
   7054     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
   7055     DPRINTF(("Freeing temporary memory\n"));
   7056     (PUBL(free))(md->offset_vector);
   7057     }
   7058 
   7059   /* Set the return code to the number of captured strings, or 0 if there were
   7060   too many to fit into the vector. */
   7061 
   7062   rc = ((md->capture_last & OVFLBIT) != 0 &&
   7063          md->end_offset_top >= arg_offset_max)?
   7064     0 : md->end_offset_top/2;
   7065 
   7066   /* If there is space in the offset vector, set any unused pairs at the end of
   7067   the pattern to -1 for backwards compatibility. It is documented that this
   7068   happens. In earlier versions, the whole set of potential capturing offsets
   7069   was set to -1 each time round the loop, but this is handled differently now.
   7070   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
   7071   those at the end that need unsetting here. We can't just unset them all at
   7072   the start of the whole thing because they may get set in one branch that is
   7073   not the final matching branch. */
   7074 
   7075   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
   7076     {
   7077     register int *iptr, *iend;
   7078     int resetcount = 2 + re->top_bracket * 2;
   7079     if (resetcount > offsetcount) resetcount = offsetcount;
   7080     iptr = offsets + md->end_offset_top;
   7081     iend = offsets + resetcount;
   7082     while (iptr < iend) *iptr++ = -1;
   7083     }
   7084 
   7085   /* If there is space, set up the whole thing as substring 0. The value of
   7086   md->start_match_ptr might be modified if \K was encountered on the success
   7087   matching path. */
   7088 
   7089   if (offsetcount < 2) rc = 0; else
   7090     {
   7091     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
   7092     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
   7093     }
   7094 
   7095   /* Return MARK data if requested */
   7096 
   7097   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
   7098     *(extra_data->mark) = (pcre_uchar *)md->mark;
   7099   DPRINTF((">>>> returning %d\n", rc));
   7100 #ifdef NO_RECURSE
   7101   release_match_heapframes(&frame_zero);
   7102 #endif
   7103   return rc;
   7104   }
   7105 
   7106 /* Control gets here if there has been an error, or if the overall match
   7107 attempt has failed at all permitted starting positions. */
   7108 
   7109 if (using_temporary_offsets)
   7110   {
   7111   DPRINTF(("Freeing temporary memory\n"));
   7112   (PUBL(free))(md->offset_vector);
   7113   }
   7114 
   7115 /* For anything other than nomatch or partial match, just return the code. */
   7116 
   7117 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
   7118   {
   7119   DPRINTF((">>>> error: returning %d\n", rc));
   7120 #ifdef NO_RECURSE
   7121   release_match_heapframes(&frame_zero);
   7122 #endif
   7123   return rc;
   7124   }
   7125 
   7126 /* Handle partial matches - disable any mark data */
   7127 
   7128 if (match_partial != NULL)
   7129   {
   7130   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
   7131   md->mark = NULL;
   7132   if (offsetcount > 1)
   7133     {
   7134     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
   7135     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
   7136     if (offsetcount > 2)
   7137       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
   7138     }
   7139   rc = PCRE_ERROR_PARTIAL;
   7140   }
   7141 
   7142 /* This is the classic nomatch case */
   7143 
   7144 else
   7145   {
   7146   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
   7147   rc = PCRE_ERROR_NOMATCH;
   7148   }
   7149 
   7150 /* Return the MARK data if it has been requested. */
   7151 
   7152 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
   7153   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
   7154 #ifdef NO_RECURSE
   7155   release_match_heapframes(&frame_zero);
   7156 #endif
   7157 return rc;
   7158 }
   7159 
   7160 /* End of pcre_exec.c */
   7161