Home | History | Annotate | Download | only in pcre
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9            Copyright (c) 1997-2010 University of Cambridge
     10 
     11 -----------------------------------------------------------------------------
     12 Redistribution and use in source and binary forms, with or without
     13 modification, are permitted provided that the following conditions are met:
     14 
     15     * Redistributions of source code must retain the above copyright notice,
     16       this list of conditions and the following disclaimer.
     17 
     18     * Redistributions in binary form must reproduce the above copyright
     19       notice, this list of conditions and the following disclaimer in the
     20       documentation and/or other materials provided with the distribution.
     21 
     22     * Neither the name of the University of Cambridge nor the names of its
     23       contributors may be used to endorse or promote products derived from
     24       this software without specific prior written permission.
     25 
     26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36 POSSIBILITY OF SUCH DAMAGE.
     37 -----------------------------------------------------------------------------
     38 */
     39 
     40 
     41 /* This module contains pcre_exec(), the externally visible function that does
     42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
     43 possible. There are also some static supporting functions. */
     44 
     45 #ifdef HAVE_CONFIG_H
     46 #include "config.h"
     47 #endif
     48 
     49 #define NLBLOCK md             /* Block containing newline information */
     50 #define PSSTART start_subject  /* Field containing processed string start */
     51 #define PSEND   end_subject    /* Field containing processed string end */
     52 
     53 #include "pcre_internal.h"
     54 
     55 /* Undefine some potentially clashing cpp symbols */
     56 
     57 #undef min
     58 #undef max
     59 
     60 /* Flag bits for the match() function */
     61 
     62 #define match_condassert     0x01  /* Called to check a condition assertion */
     63 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
     64 
     65 /* Non-error returns from the match() function. Error returns are externally
     66 defined PCRE_ERROR_xxx codes, which are all negative. */
     67 
     68 #define MATCH_MATCH        1
     69 #define MATCH_NOMATCH      0
     70 
     71 /* Special internal returns from the match() function. Make them sufficiently
     72 negative to avoid the external error codes. */
     73 
     74 #define MATCH_ACCEPT       (-999)
     75 #define MATCH_COMMIT       (-998)
     76 #define MATCH_PRUNE        (-997)
     77 #define MATCH_SKIP         (-996)
     78 #define MATCH_SKIP_ARG     (-995)
     79 #define MATCH_THEN         (-994)
     80 
     81 /* This is a convenience macro for code that occurs many times. */
     82 
     83 #define MRRETURN(ra) \
     84   { \
     85   md->mark = markptr; \
     86   RRETURN(ra); \
     87   }
     88 
     89 /* Maximum number of ints of offset to save on the stack for recursive calls.
     90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
     91 because the offset vector is always a multiple of 3 long. */
     92 
     93 #define REC_STACK_SAVE_MAX 30
     94 
     95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
     96 
     97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
     98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
     99 
    100 
    101 
    102 #ifdef PCRE_DEBUG
    103 /*************************************************
    104 *        Debugging function to print chars       *
    105 *************************************************/
    106 
    107 /* Print a sequence of chars in printable format, stopping at the end of the
    108 subject if the requested.
    109 
    110 Arguments:
    111   p           points to characters
    112   length      number to print
    113   is_subject  TRUE if printing from within md->start_subject
    114   md          pointer to matching data block, if is_subject is TRUE
    115 
    116 Returns:     nothing
    117 */
    118 
    119 static void
    120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
    121 {
    122 unsigned int c;
    123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
    124 while (length-- > 0)
    125   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
    126 }
    127 #endif
    128 
    129 
    130 
    131 /*************************************************
    132 *          Match a back-reference                *
    133 *************************************************/
    134 
    135 /* If a back reference hasn't been set, the length that is passed is greater
    136 than the number of characters left in the string, so the match fails.
    137 
    138 Arguments:
    139   offset      index into the offset vector
    140   eptr        points into the subject
    141   length      length to be matched
    142   md          points to match data block
    143   ims         the ims flags
    144 
    145 Returns:      TRUE if matched
    146 */
    147 
    148 static BOOL
    149 match_ref(int offset, register USPTR eptr, int length, match_data *md,
    150   unsigned long int ims)
    151 {
    152 USPTR p = md->start_subject + md->offset_vector[offset];
    153 
    154 #ifdef PCRE_DEBUG
    155 if (eptr >= md->end_subject)
    156   printf("matching subject <null>");
    157 else
    158   {
    159   printf("matching subject ");
    160   pchars(eptr, length, TRUE, md);
    161   }
    162 printf(" against backref ");
    163 pchars(p, length, FALSE, md);
    164 printf("\n");
    165 #endif
    166 
    167 /* Always fail if not enough characters left */
    168 
    169 if (length > md->end_subject - eptr) return FALSE;
    170 
    171 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
    172 properly if Unicode properties are supported. Otherwise, we can check only
    173 ASCII characters. */
    174 
    175 if ((ims & PCRE_CASELESS) != 0)
    176   {
    177 #ifdef SUPPORT_UTF8
    178 #ifdef SUPPORT_UCP
    179   if (md->utf8)
    180     {
    181     USPTR endptr = eptr + length;
    182     while (eptr < endptr)
    183       {
    184       int c, d;
    185       GETCHARINC(c, eptr);
    186       GETCHARINC(d, p);
    187       if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
    188       }
    189     }
    190   else
    191 #endif
    192 #endif
    193 
    194   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
    195   is no UCP support. */
    196 
    197   while (length-- > 0)
    198     { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
    199   }
    200 
    201 /* In the caseful case, we can just compare the bytes, whether or not we
    202 are in UTF-8 mode. */
    203 
    204 else
    205   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
    206 
    207 return TRUE;
    208 }
    209 
    210 
    211 
    212 /***************************************************************************
    213 ****************************************************************************
    214                    RECURSION IN THE match() FUNCTION
    215 
    216 The match() function is highly recursive, though not every recursive call
    217 increases the recursive depth. Nevertheless, some regular expressions can cause
    218 it to recurse to a great depth. I was writing for Unix, so I just let it call
    219 itself recursively. This uses the stack for saving everything that has to be
    220 saved for a recursive call. On Unix, the stack can be large, and this works
    221 fine.
    222 
    223 It turns out that on some non-Unix-like systems there are problems with
    224 programs that use a lot of stack. (This despite the fact that every last chip
    225 has oodles of memory these days, and techniques for extending the stack have
    226 been known for decades.) So....
    227 
    228 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
    229 calls by keeping local variables that need to be preserved in blocks of memory
    230 obtained from malloc() instead instead of on the stack. Macros are used to
    231 achieve this so that the actual code doesn't look very different to what it
    232 always used to.
    233 
    234 The original heap-recursive code used longjmp(). However, it seems that this
    235 can be very slow on some operating systems. Following a suggestion from Stan
    236 Switzer, the use of longjmp() has been abolished, at the cost of having to
    237 provide a unique number for each call to RMATCH. There is no way of generating
    238 a sequence of numbers at compile time in C. I have given them names, to make
    239 them stand out more clearly.
    240 
    241 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
    242 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
    243 tests. Furthermore, not using longjmp() means that local dynamic variables
    244 don't have indeterminate values; this has meant that the frame size can be
    245 reduced because the result can be "passed back" by straight setting of the
    246 variable instead of being passed in the frame.
    247 ****************************************************************************
    248 ***************************************************************************/
    249 
    250 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
    251 below must be updated in sync.  */
    252 
    253 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
    254        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
    255        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
    256        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
    257        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
    258        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
    259        RM61,  RM62 };
    260 
    261 /* These versions of the macros use the stack, as normal. There are debugging
    262 versions and production versions. Note that the "rw" argument of RMATCH isn't
    263 actually used in this definition. */
    264 
    265 #ifndef NO_RECURSE
    266 #define REGISTER register
    267 
    268 #ifdef PCRE_DEBUG
    269 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
    270   { \
    271   printf("match() called in line %d\n", __LINE__); \
    272   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
    273   printf("to line %d\n", __LINE__); \
    274   }
    275 #define RRETURN(ra) \
    276   { \
    277   printf("match() returned %d from line %d ", ra, __LINE__); \
    278   return ra; \
    279   }
    280 #else
    281 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
    282   rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
    283 #define RRETURN(ra) return ra
    284 #endif
    285 
    286 #else
    287 
    288 
    289 /* These versions of the macros manage a private stack on the heap. Note that
    290 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
    291 argument of match(), which never changes. */
    292 
    293 #define REGISTER
    294 
    295 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
    296   {\
    297   heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
    298   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
    299   frame->Xwhere = rw; \
    300   newframe->Xeptr = ra;\
    301   newframe->Xecode = rb;\
    302   newframe->Xmstart = mstart;\
    303   newframe->Xmarkptr = markptr;\
    304   newframe->Xoffset_top = rc;\
    305   newframe->Xims = re;\
    306   newframe->Xeptrb = rf;\
    307   newframe->Xflags = rg;\
    308   newframe->Xrdepth = frame->Xrdepth + 1;\
    309   newframe->Xprevframe = frame;\
    310   frame = newframe;\
    311   DPRINTF(("restarting from line %d\n", __LINE__));\
    312   goto HEAP_RECURSE;\
    313   L_##rw:\
    314   DPRINTF(("jumped back to line %d\n", __LINE__));\
    315   }
    316 
    317 #define RRETURN(ra)\
    318   {\
    319   heapframe *oldframe = frame;\
    320   frame = oldframe->Xprevframe;\
    321   (pcre_stack_free)(oldframe);\
    322   if (frame != NULL)\
    323     {\
    324     rrc = ra;\
    325     goto HEAP_RETURN;\
    326     }\
    327   return ra;\
    328   }
    329 
    330 
    331 /* Structure for remembering the local variables in a private frame */
    332 
    333 typedef struct heapframe {
    334   struct heapframe *Xprevframe;
    335 
    336   /* Function arguments that may change */
    337 
    338   USPTR Xeptr;
    339   const uschar *Xecode;
    340   USPTR Xmstart;
    341   USPTR Xmarkptr;
    342   int Xoffset_top;
    343   long int Xims;
    344   eptrblock *Xeptrb;
    345   int Xflags;
    346   unsigned int Xrdepth;
    347 
    348   /* Function local variables */
    349 
    350   USPTR Xcallpat;
    351 #ifdef SUPPORT_UTF8
    352   USPTR Xcharptr;
    353 #endif
    354   USPTR Xdata;
    355   USPTR Xnext;
    356   USPTR Xpp;
    357   USPTR Xprev;
    358   USPTR Xsaved_eptr;
    359 
    360   recursion_info Xnew_recursive;
    361 
    362   BOOL Xcur_is_word;
    363   BOOL Xcondition;
    364   BOOL Xprev_is_word;
    365 
    366   unsigned long int Xoriginal_ims;
    367 
    368 #ifdef SUPPORT_UCP
    369   int Xprop_type;
    370   int Xprop_value;
    371   int Xprop_fail_result;
    372   int Xprop_category;
    373   int Xprop_chartype;
    374   int Xprop_script;
    375   int Xoclength;
    376   uschar Xocchars[8];
    377 #endif
    378 
    379   int Xcodelink;
    380   int Xctype;
    381   unsigned int Xfc;
    382   int Xfi;
    383   int Xlength;
    384   int Xmax;
    385   int Xmin;
    386   int Xnumber;
    387   int Xoffset;
    388   int Xop;
    389   int Xsave_capture_last;
    390   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
    391   int Xstacksave[REC_STACK_SAVE_MAX];
    392 
    393   eptrblock Xnewptrb;
    394 
    395   /* Where to jump back to */
    396 
    397   int Xwhere;
    398 
    399 } heapframe;
    400 
    401 #endif
    402 
    403 
    404 /***************************************************************************
    405 ***************************************************************************/
    406 
    407 
    408 
    409 /*************************************************
    410 *         Match from current position            *
    411 *************************************************/
    412 
    413 /* This function is called recursively in many circumstances. Whenever it
    414 returns a negative (error) response, the outer incarnation must also return the
    415 same response. */
    416 
    417 /* These macros pack up tests that are used for partial matching, and which
    418 appears several times in the code. We set the "hit end" flag if the pointer is
    419 at the end of the subject and also past the start of the subject (i.e.
    420 something has been matched). For hard partial matching, we then return
    421 immediately. The second one is used when we already know we are past the end of
    422 the subject. */
    423 
    424 #define CHECK_PARTIAL()\
    425   if (md->partial != 0 && eptr >= md->end_subject && \
    426       eptr > md->start_used_ptr) \
    427     { \
    428     md->hitend = TRUE; \
    429     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
    430     }
    431 
    432 #define SCHECK_PARTIAL()\
    433   if (md->partial != 0 && eptr > md->start_used_ptr) \
    434     { \
    435     md->hitend = TRUE; \
    436     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
    437     }
    438 
    439 
    440 /* Performance note: It might be tempting to extract commonly used fields from
    441 the md structure (e.g. utf8, end_subject) into individual variables to improve
    442 performance. Tests using gcc on a SPARC disproved this; in the first case, it
    443 made performance worse.
    444 
    445 Arguments:
    446    eptr        pointer to current character in subject
    447    ecode       pointer to current position in compiled code
    448    mstart      pointer to the current match start position (can be modified
    449                  by encountering \K)
    450    markptr     pointer to the most recent MARK name, or NULL
    451    offset_top  current top pointer
    452    md          pointer to "static" info for the match
    453    ims         current /i, /m, and /s options
    454    eptrb       pointer to chain of blocks containing eptr at start of
    455                  brackets - for testing for empty matches
    456    flags       can contain
    457                  match_condassert - this is an assertion condition
    458                  match_cbegroup - this is the start of an unlimited repeat
    459                    group that can match an empty string
    460    rdepth      the recursion depth
    461 
    462 Returns:       MATCH_MATCH if matched            )  these values are >= 0
    463                MATCH_NOMATCH if failed to match  )
    464                a negative MATCH_xxx value for PRUNE, SKIP, etc
    465                a negative PCRE_ERROR_xxx value if aborted by an error condition
    466                  (e.g. stopped by repeated call or recursion limit)
    467 */
    468 
    469 static int
    470 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
    471   const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
    472   eptrblock *eptrb, int flags, unsigned int rdepth)
    473 {
    474 /* These variables do not need to be preserved over recursion in this function,
    475 so they can be ordinary variables in all cases. Mark some of them with
    476 "register" because they are used a lot in loops. */
    477 
    478 register int  rrc;         /* Returns from recursive calls */
    479 register int  i;           /* Used for loops not involving calls to RMATCH() */
    480 register unsigned int c;   /* Character values not kept over RMATCH() calls */
    481 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
    482 
    483 BOOL minimize, possessive; /* Quantifier options */
    484 int condcode;
    485 
    486 /* When recursion is not being used, all "local" variables that have to be
    487 preserved over calls to RMATCH() are part of a "frame" which is obtained from
    488 heap storage. Set up the top-level frame here; others are obtained from the
    489 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
    490 
    491 #ifdef NO_RECURSE
    492 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
    493 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
    494 frame->Xprevframe = NULL;            /* Marks the top level */
    495 
    496 /* Copy in the original argument variables */
    497 
    498 frame->Xeptr = eptr;
    499 frame->Xecode = ecode;
    500 frame->Xmstart = mstart;
    501 frame->Xmarkptr = markptr;
    502 frame->Xoffset_top = offset_top;
    503 frame->Xims = ims;
    504 frame->Xeptrb = eptrb;
    505 frame->Xflags = flags;
    506 frame->Xrdepth = rdepth;
    507 
    508 /* This is where control jumps back to to effect "recursion" */
    509 
    510 HEAP_RECURSE:
    511 
    512 /* Macros make the argument variables come from the current frame */
    513 
    514 #define eptr               frame->Xeptr
    515 #define ecode              frame->Xecode
    516 #define mstart             frame->Xmstart
    517 #define markptr            frame->Xmarkptr
    518 #define offset_top         frame->Xoffset_top
    519 #define ims                frame->Xims
    520 #define eptrb              frame->Xeptrb
    521 #define flags              frame->Xflags
    522 #define rdepth             frame->Xrdepth
    523 
    524 /* Ditto for the local variables */
    525 
    526 #ifdef SUPPORT_UTF8
    527 #define charptr            frame->Xcharptr
    528 #endif
    529 #define callpat            frame->Xcallpat
    530 #define codelink           frame->Xcodelink
    531 #define data               frame->Xdata
    532 #define next               frame->Xnext
    533 #define pp                 frame->Xpp
    534 #define prev               frame->Xprev
    535 #define saved_eptr         frame->Xsaved_eptr
    536 
    537 #define new_recursive      frame->Xnew_recursive
    538 
    539 #define cur_is_word        frame->Xcur_is_word
    540 #define condition          frame->Xcondition
    541 #define prev_is_word       frame->Xprev_is_word
    542 
    543 #define original_ims       frame->Xoriginal_ims
    544 
    545 #ifdef SUPPORT_UCP
    546 #define prop_type          frame->Xprop_type
    547 #define prop_value         frame->Xprop_value
    548 #define prop_fail_result   frame->Xprop_fail_result
    549 #define prop_category      frame->Xprop_category
    550 #define prop_chartype      frame->Xprop_chartype
    551 #define prop_script        frame->Xprop_script
    552 #define oclength           frame->Xoclength
    553 #define occhars            frame->Xocchars
    554 #endif
    555 
    556 #define ctype              frame->Xctype
    557 #define fc                 frame->Xfc
    558 #define fi                 frame->Xfi
    559 #define length             frame->Xlength
    560 #define max                frame->Xmax
    561 #define min                frame->Xmin
    562 #define number             frame->Xnumber
    563 #define offset             frame->Xoffset
    564 #define op                 frame->Xop
    565 #define save_capture_last  frame->Xsave_capture_last
    566 #define save_offset1       frame->Xsave_offset1
    567 #define save_offset2       frame->Xsave_offset2
    568 #define save_offset3       frame->Xsave_offset3
    569 #define stacksave          frame->Xstacksave
    570 
    571 #define newptrb            frame->Xnewptrb
    572 
    573 /* When recursion is being used, local variables are allocated on the stack and
    574 get preserved during recursion in the normal way. In this environment, fi and
    575 i, and fc and c, can be the same variables. */
    576 
    577 #else         /* NO_RECURSE not defined */
    578 #define fi i
    579 #define fc c
    580 
    581 
    582 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
    583 const uschar *charptr;             /* in small blocks of the code. My normal */
    584 #endif                             /* style of coding would have declared    */
    585 const uschar *callpat;             /* them within each of those blocks.      */
    586 const uschar *data;                /* However, in order to accommodate the   */
    587 const uschar *next;                /* version of this code that uses an      */
    588 USPTR         pp;                  /* external "stack" implemented on the    */
    589 const uschar *prev;                /* heap, it is easier to declare them all */
    590 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
    591                                    /* out in a block. The only declarations  */
    592 recursion_info new_recursive;      /* within blocks below are for variables  */
    593                                    /* that do not have to be preserved over  */
    594 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
    595 BOOL condition;
    596 BOOL prev_is_word;
    597 
    598 unsigned long int original_ims;
    599 
    600 #ifdef SUPPORT_UCP
    601 int prop_type;
    602 int prop_value;
    603 int prop_fail_result;
    604 int prop_category;
    605 int prop_chartype;
    606 int prop_script;
    607 int oclength;
    608 uschar occhars[8];
    609 #endif
    610 
    611 int codelink;
    612 int ctype;
    613 int length;
    614 int max;
    615 int min;
    616 int number;
    617 int offset;
    618 int op;
    619 int save_capture_last;
    620 int save_offset1, save_offset2, save_offset3;
    621 int stacksave[REC_STACK_SAVE_MAX];
    622 
    623 eptrblock newptrb;
    624 #endif     /* NO_RECURSE */
    625 
    626 /* These statements are here to stop the compiler complaining about unitialized
    627 variables. */
    628 
    629 #ifdef SUPPORT_UCP
    630 prop_value = 0;
    631 prop_fail_result = 0;
    632 #endif
    633 
    634 
    635 /* This label is used for tail recursion, which is used in a few cases even
    636 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
    637 used. Thanks to Ian Taylor for noticing this possibility and sending the
    638 original patch. */
    639 
    640 TAIL_RECURSE:
    641 
    642 /* OK, now we can get on with the real code of the function. Recursive calls
    643 are specified by the macro RMATCH and RRETURN is used to return. When
    644 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
    645 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
    646 defined). However, RMATCH isn't like a function call because it's quite a
    647 complicated macro. It has to be used in one particular way. This shouldn't,
    648 however, impact performance when true recursion is being used. */
    649 
    650 #ifdef SUPPORT_UTF8
    651 utf8 = md->utf8;       /* Local copy of the flag */
    652 #else
    653 utf8 = FALSE;
    654 #endif
    655 
    656 /* First check that we haven't called match() too many times, or that we
    657 haven't exceeded the recursive call limit. */
    658 
    659 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
    660 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
    661 
    662 original_ims = ims;    /* Save for resetting on ')' */
    663 
    664 /* At the start of a group with an unlimited repeat that may match an empty
    665 string, the match_cbegroup flag is set. When this is the case, add the current
    666 subject pointer to the chain of such remembered pointers, to be checked when we
    667 hit the closing ket, in order to break infinite loops that match no characters.
    668 When match() is called in other circumstances, don't add to the chain. The
    669 match_cbegroup flag must NOT be used with tail recursion, because the memory
    670 block that is used is on the stack, so a new one may be required for each
    671 match(). */
    672 
    673 if ((flags & match_cbegroup) != 0)
    674   {
    675   newptrb.epb_saved_eptr = eptr;
    676   newptrb.epb_prev = eptrb;
    677   eptrb = &newptrb;
    678   }
    679 
    680 /* Now start processing the opcodes. */
    681 
    682 for (;;)
    683   {
    684   minimize = possessive = FALSE;
    685   op = *ecode;
    686 
    687   switch(op)
    688     {
    689     case OP_MARK:
    690     markptr = ecode + 2;
    691     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
    692       ims, eptrb, flags, RM55);
    693 
    694     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
    695     argument, and we must check whether that argument matches this MARK's
    696     argument. It is passed back in md->start_match_ptr (an overloading of that
    697     variable). If it does match, we reset that variable to the current subject
    698     position and return MATCH_SKIP. Otherwise, pass back the return code
    699     unaltered. */
    700 
    701     if (rrc == MATCH_SKIP_ARG &&
    702         strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
    703       {
    704       md->start_match_ptr = eptr;
    705       RRETURN(MATCH_SKIP);
    706       }
    707 
    708     if (md->mark == NULL) md->mark = markptr;
    709     RRETURN(rrc);
    710 
    711     case OP_FAIL:
    712     MRRETURN(MATCH_NOMATCH);
    713 
    714     /* COMMIT overrides PRUNE, SKIP, and THEN */
    715 
    716     case OP_COMMIT:
    717     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
    718       ims, eptrb, flags, RM52);
    719     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
    720         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
    721         rrc != MATCH_THEN)
    722       RRETURN(rrc);
    723     MRRETURN(MATCH_COMMIT);
    724 
    725     /* PRUNE overrides THEN */
    726 
    727     case OP_PRUNE:
    728     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
    729       ims, eptrb, flags, RM51);
    730     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
    731     MRRETURN(MATCH_PRUNE);
    732 
    733     case OP_PRUNE_ARG:
    734     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
    735       ims, eptrb, flags, RM56);
    736     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
    737     md->mark = ecode + 2;
    738     RRETURN(MATCH_PRUNE);
    739 
    740     /* SKIP overrides PRUNE and THEN */
    741 
    742     case OP_SKIP:
    743     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
    744       ims, eptrb, flags, RM53);
    745     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
    746       RRETURN(rrc);
    747     md->start_match_ptr = eptr;   /* Pass back current position */
    748     MRRETURN(MATCH_SKIP);
    749 
    750     case OP_SKIP_ARG:
    751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
    752       ims, eptrb, flags, RM57);
    753     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
    754       RRETURN(rrc);
    755 
    756     /* Pass back the current skip name by overloading md->start_match_ptr and
    757     returning the special MATCH_SKIP_ARG return code. This will either be
    758     caught by a matching MARK, or get to the top, where it is treated the same
    759     as PRUNE. */
    760 
    761     md->start_match_ptr = ecode + 2;
    762     RRETURN(MATCH_SKIP_ARG);
    763 
    764     /* For THEN (and THEN_ARG) we pass back the address of the bracket or
    765     the alt that is at the start of the current branch. This makes it possible
    766     to skip back past alternatives that precede the THEN within the current
    767     branch. */
    768 
    769     case OP_THEN:
    770     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
    771       ims, eptrb, flags, RM54);
    772     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    773     md->start_match_ptr = ecode - GET(ecode, 1);
    774     MRRETURN(MATCH_THEN);
    775 
    776     case OP_THEN_ARG:
    777     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
    778       offset_top, md, ims, eptrb, flags, RM58);
    779     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    780     md->start_match_ptr = ecode - GET(ecode, 1);
    781     md->mark = ecode + LINK_SIZE + 2;
    782     RRETURN(MATCH_THEN);
    783 
    784     /* Handle a capturing bracket. If there is space in the offset vector, save
    785     the current subject position in the working slot at the top of the vector.
    786     We mustn't change the current values of the data slot, because they may be
    787     set from a previous iteration of this group, and be referred to by a
    788     reference inside the group.
    789 
    790     If the bracket fails to match, we need to restore this value and also the
    791     values of the final offsets, in case they were set by a previous iteration
    792     of the same bracket.
    793 
    794     If there isn't enough space in the offset vector, treat this as if it were
    795     a non-capturing bracket. Don't worry about setting the flag for the error
    796     case here; that is handled in the code for KET. */
    797 
    798     case OP_CBRA:
    799     case OP_SCBRA:
    800     number = GET2(ecode, 1+LINK_SIZE);
    801     offset = number << 1;
    802 
    803 #ifdef PCRE_DEBUG
    804     printf("start bracket %d\n", number);
    805     printf("subject=");
    806     pchars(eptr, 16, TRUE, md);
    807     printf("\n");
    808 #endif
    809 
    810     if (offset < md->offset_max)
    811       {
    812       save_offset1 = md->offset_vector[offset];
    813       save_offset2 = md->offset_vector[offset+1];
    814       save_offset3 = md->offset_vector[md->offset_end - number];
    815       save_capture_last = md->capture_last;
    816 
    817       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
    818       md->offset_vector[md->offset_end - number] =
    819         (int)(eptr - md->start_subject);
    820 
    821       flags = (op == OP_SCBRA)? match_cbegroup : 0;
    822       do
    823         {
    824         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
    825           ims, eptrb, flags, RM1);
    826         if (rrc != MATCH_NOMATCH &&
    827             (rrc != MATCH_THEN || md->start_match_ptr != ecode))
    828           RRETURN(rrc);
    829         md->capture_last = save_capture_last;
    830         ecode += GET(ecode, 1);
    831         }
    832       while (*ecode == OP_ALT);
    833 
    834       DPRINTF(("bracket %d failed\n", number));
    835 
    836       md->offset_vector[offset] = save_offset1;
    837       md->offset_vector[offset+1] = save_offset2;
    838       md->offset_vector[md->offset_end - number] = save_offset3;
    839 
    840       if (rrc != MATCH_THEN) md->mark = markptr;
    841       RRETURN(MATCH_NOMATCH);
    842       }
    843 
    844     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
    845     as a non-capturing bracket. */
    846 
    847     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
    848     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
    849 
    850     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
    851 
    852     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
    853     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
    854 
    855     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
    856     final alternative within the brackets, we would return the result of a
    857     recursive call to match() whatever happened. We can reduce stack usage by
    858     turning this into a tail recursion, except in the case when match_cbegroup
    859     is set.*/
    860 
    861     case OP_BRA:
    862     case OP_SBRA:
    863     DPRINTF(("start non-capturing bracket\n"));
    864     flags = (op >= OP_SBRA)? match_cbegroup : 0;
    865     for (;;)
    866       {
    867       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
    868         {
    869         if (flags == 0)    /* Not a possibly empty group */
    870           {
    871           ecode += _pcre_OP_lengths[*ecode];
    872           DPRINTF(("bracket 0 tail recursion\n"));
    873           goto TAIL_RECURSE;
    874           }
    875 
    876         /* Possibly empty group; can't use tail recursion. */
    877 
    878         RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
    879           eptrb, flags, RM48);
    880         if (rrc == MATCH_NOMATCH) md->mark = markptr;
    881         RRETURN(rrc);
    882         }
    883 
    884       /* For non-final alternatives, continue the loop for a NOMATCH result;
    885       otherwise return. */
    886 
    887       RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
    888         eptrb, flags, RM2);
    889       if (rrc != MATCH_NOMATCH &&
    890           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
    891         RRETURN(rrc);
    892       ecode += GET(ecode, 1);
    893       }
    894     /* Control never reaches here. */
    895 
    896     /* Conditional group: compilation checked that there are no more than
    897     two branches. If the condition is false, skipping the first branch takes us
    898     past the end if there is only one branch, but that's OK because that is
    899     exactly what going to the ket would do. As there is only one branch to be
    900     obeyed, we can use tail recursion to avoid using another stack frame. */
    901 
    902     case OP_COND:
    903     case OP_SCOND:
    904     codelink= GET(ecode, 1);
    905 
    906     /* Because of the way auto-callout works during compile, a callout item is
    907     inserted between OP_COND and an assertion condition. */
    908 
    909     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
    910       {
    911       if (pcre_callout != NULL)
    912         {
    913         pcre_callout_block cb;
    914         cb.version          = 1;   /* Version 1 of the callout block */
    915         cb.callout_number   = ecode[LINK_SIZE+2];
    916         cb.offset_vector    = md->offset_vector;
    917         cb.subject          = (PCRE_SPTR)md->start_subject;
    918         cb.subject_length   = (int)(md->end_subject - md->start_subject);
    919         cb.start_match      = (int)(mstart - md->start_subject);
    920         cb.current_position = (int)(eptr - md->start_subject);
    921         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
    922         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
    923         cb.capture_top      = offset_top/2;
    924         cb.capture_last     = md->capture_last;
    925         cb.callout_data     = md->callout_data;
    926         if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
    927         if (rrc < 0) RRETURN(rrc);
    928         }
    929       ecode += _pcre_OP_lengths[OP_CALLOUT];
    930       }
    931 
    932     condcode = ecode[LINK_SIZE+1];
    933 
    934     /* Now see what the actual condition is */
    935 
    936     if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
    937       {
    938       if (md->recursive == NULL)                /* Not recursing => FALSE */
    939         {
    940         condition = FALSE;
    941         ecode += GET(ecode, 1);
    942         }
    943       else
    944         {
    945         int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
    946         condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
    947 
    948         /* If the test is for recursion into a specific subpattern, and it is
    949         false, but the test was set up by name, scan the table to see if the
    950         name refers to any other numbers, and test them. The condition is true
    951         if any one is set. */
    952 
    953         if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
    954           {
    955           uschar *slotA = md->name_table;
    956           for (i = 0; i < md->name_count; i++)
    957             {
    958             if (GET2(slotA, 0) == recno) break;
    959             slotA += md->name_entry_size;
    960             }
    961 
    962           /* Found a name for the number - there can be only one; duplicate
    963           names for different numbers are allowed, but not vice versa. First
    964           scan down for duplicates. */
    965 
    966           if (i < md->name_count)
    967             {
    968             uschar *slotB = slotA;
    969             while (slotB > md->name_table)
    970               {
    971               slotB -= md->name_entry_size;
    972               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
    973                 {
    974                 condition = GET2(slotB, 0) == md->recursive->group_num;
    975                 if (condition) break;
    976                 }
    977               else break;
    978               }
    979 
    980             /* Scan up for duplicates */
    981 
    982             if (!condition)
    983               {
    984               slotB = slotA;
    985               for (i++; i < md->name_count; i++)
    986                 {
    987                 slotB += md->name_entry_size;
    988                 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
    989                   {
    990                   condition = GET2(slotB, 0) == md->recursive->group_num;
    991                   if (condition) break;
    992                   }
    993                 else break;
    994                 }
    995               }
    996             }
    997           }
    998 
    999         /* Chose branch according to the condition */
   1000 
   1001         ecode += condition? 3 : GET(ecode, 1);
   1002         }
   1003       }
   1004 
   1005     else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
   1006       {
   1007       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
   1008       condition = offset < offset_top && md->offset_vector[offset] >= 0;
   1009 
   1010       /* If the numbered capture is unset, but the reference was by name,
   1011       scan the table to see if the name refers to any other numbers, and test
   1012       them. The condition is true if any one is set. This is tediously similar
   1013       to the code above, but not close enough to try to amalgamate. */
   1014 
   1015       if (!condition && condcode == OP_NCREF)
   1016         {
   1017         int refno = offset >> 1;
   1018         uschar *slotA = md->name_table;
   1019 
   1020         for (i = 0; i < md->name_count; i++)
   1021           {
   1022           if (GET2(slotA, 0) == refno) break;
   1023           slotA += md->name_entry_size;
   1024           }
   1025 
   1026         /* Found a name for the number - there can be only one; duplicate names
   1027         for different numbers are allowed, but not vice versa. First scan down
   1028         for duplicates. */
   1029 
   1030         if (i < md->name_count)
   1031           {
   1032           uschar *slotB = slotA;
   1033           while (slotB > md->name_table)
   1034             {
   1035             slotB -= md->name_entry_size;
   1036             if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
   1037               {
   1038               offset = GET2(slotB, 0) << 1;
   1039               condition = offset < offset_top &&
   1040                 md->offset_vector[offset] >= 0;
   1041               if (condition) break;
   1042               }
   1043             else break;
   1044             }
   1045 
   1046           /* Scan up for duplicates */
   1047 
   1048           if (!condition)
   1049             {
   1050             slotB = slotA;
   1051             for (i++; i < md->name_count; i++)
   1052               {
   1053               slotB += md->name_entry_size;
   1054               if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
   1055                 {
   1056                 offset = GET2(slotB, 0) << 1;
   1057                 condition = offset < offset_top &&
   1058                   md->offset_vector[offset] >= 0;
   1059                 if (condition) break;
   1060                 }
   1061               else break;
   1062               }
   1063             }
   1064           }
   1065         }
   1066 
   1067       /* Chose branch according to the condition */
   1068 
   1069       ecode += condition? 3 : GET(ecode, 1);
   1070       }
   1071 
   1072     else if (condcode == OP_DEF)     /* DEFINE - always false */
   1073       {
   1074       condition = FALSE;
   1075       ecode += GET(ecode, 1);
   1076       }
   1077 
   1078     /* The condition is an assertion. Call match() to evaluate it - setting
   1079     the final argument match_condassert causes it to stop at the end of an
   1080     assertion. */
   1081 
   1082     else
   1083       {
   1084       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
   1085           match_condassert, RM3);
   1086       if (rrc == MATCH_MATCH)
   1087         {
   1088         condition = TRUE;
   1089         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
   1090         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
   1091         }
   1092       else if (rrc != MATCH_NOMATCH &&
   1093               (rrc != MATCH_THEN || md->start_match_ptr != ecode))
   1094         {
   1095         RRETURN(rrc);         /* Need braces because of following else */
   1096         }
   1097       else
   1098         {
   1099         condition = FALSE;
   1100         ecode += codelink;
   1101         }
   1102       }
   1103 
   1104     /* We are now at the branch that is to be obeyed. As there is only one,
   1105     we can use tail recursion to avoid using another stack frame, except when
   1106     match_cbegroup is required for an unlimited repeat of a possibly empty
   1107     group. If the second alternative doesn't exist, we can just plough on. */
   1108 
   1109     if (condition || *ecode == OP_ALT)
   1110       {
   1111       ecode += 1 + LINK_SIZE;
   1112       if (op == OP_SCOND)        /* Possibly empty group */
   1113         {
   1114         RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
   1115         RRETURN(rrc);
   1116         }
   1117       else                       /* Group must match something */
   1118         {
   1119         flags = 0;
   1120         goto TAIL_RECURSE;
   1121         }
   1122       }
   1123     else                         /* Condition false & no alternative */
   1124       {
   1125       ecode += 1 + LINK_SIZE;
   1126       }
   1127     break;
   1128 
   1129 
   1130     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
   1131     to close any currently open capturing brackets. */
   1132 
   1133     case OP_CLOSE:
   1134     number = GET2(ecode, 1);
   1135     offset = number << 1;
   1136 
   1137 #ifdef PCRE_DEBUG
   1138       printf("end bracket %d at *ACCEPT", number);
   1139       printf("\n");
   1140 #endif
   1141 
   1142     md->capture_last = number;
   1143     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
   1144       {
   1145       md->offset_vector[offset] =
   1146         md->offset_vector[md->offset_end - number];
   1147       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
   1148       if (offset_top <= offset) offset_top = offset + 2;
   1149       }
   1150     ecode += 3;
   1151     break;
   1152 
   1153 
   1154     /* End of the pattern, either real or forced. If we are in a top-level
   1155     recursion, we should restore the offsets appropriately and continue from
   1156     after the call. */
   1157 
   1158     case OP_ACCEPT:
   1159     case OP_END:
   1160     if (md->recursive != NULL && md->recursive->group_num == 0)
   1161       {
   1162       recursion_info *rec = md->recursive;
   1163       DPRINTF(("End of pattern in a (?0) recursion\n"));
   1164       md->recursive = rec->prevrec;
   1165       memmove(md->offset_vector, rec->offset_save,
   1166         rec->saved_max * sizeof(int));
   1167       offset_top = rec->save_offset_top;
   1168       ims = original_ims;
   1169       ecode = rec->after_call;
   1170       break;
   1171       }
   1172 
   1173     /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
   1174     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
   1175     the subject. In both cases, backtracking will then try other alternatives,
   1176     if any. */
   1177 
   1178     if (eptr == mstart &&
   1179         (md->notempty ||
   1180           (md->notempty_atstart &&
   1181             mstart == md->start_subject + md->start_offset)))
   1182       MRRETURN(MATCH_NOMATCH);
   1183 
   1184     /* Otherwise, we have a match. */
   1185 
   1186     md->end_match_ptr = eptr;           /* Record where we ended */
   1187     md->end_offset_top = offset_top;    /* and how many extracts were taken */
   1188     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
   1189 
   1190     /* For some reason, the macros don't work properly if an expression is
   1191     given as the argument to MRRETURN when the heap is in use. */
   1192 
   1193     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
   1194     MRRETURN(rrc);
   1195 
   1196     /* Change option settings */
   1197 
   1198     case OP_OPT:
   1199     ims = ecode[1];
   1200     ecode += 2;
   1201     DPRINTF(("ims set to %02lx\n", ims));
   1202     break;
   1203 
   1204     /* Assertion brackets. Check the alternative branches in turn - the
   1205     matching won't pass the KET for an assertion. If any one branch matches,
   1206     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
   1207     start of each branch to move the current point backwards, so the code at
   1208     this level is identical to the lookahead case. */
   1209 
   1210     case OP_ASSERT:
   1211     case OP_ASSERTBACK:
   1212     do
   1213       {
   1214       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
   1215         RM4);
   1216       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1217         {
   1218         mstart = md->start_match_ptr;   /* In case \K reset it */
   1219         break;
   1220         }
   1221       if (rrc != MATCH_NOMATCH &&
   1222           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
   1223         RRETURN(rrc);
   1224       ecode += GET(ecode, 1);
   1225       }
   1226     while (*ecode == OP_ALT);
   1227     if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
   1228 
   1229     /* If checking an assertion for a condition, return MATCH_MATCH. */
   1230 
   1231     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
   1232 
   1233     /* Continue from after the assertion, updating the offsets high water
   1234     mark, since extracts may have been taken during the assertion. */
   1235 
   1236     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1237     ecode += 1 + LINK_SIZE;
   1238     offset_top = md->end_offset_top;
   1239     continue;
   1240 
   1241     /* Negative assertion: all branches must fail to match. Encountering SKIP,
   1242     PRUNE, or COMMIT means we must assume failure without checking subsequent
   1243     branches. */
   1244 
   1245     case OP_ASSERT_NOT:
   1246     case OP_ASSERTBACK_NOT:
   1247     do
   1248       {
   1249       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
   1250         RM5);
   1251       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
   1252       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
   1253         {
   1254         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1255         break;
   1256         }
   1257       if (rrc != MATCH_NOMATCH &&
   1258           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
   1259         RRETURN(rrc);
   1260       ecode += GET(ecode,1);
   1261       }
   1262     while (*ecode == OP_ALT);
   1263 
   1264     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
   1265 
   1266     ecode += 1 + LINK_SIZE;
   1267     continue;
   1268 
   1269     /* Move the subject pointer back. This occurs only at the start of
   1270     each branch of a lookbehind assertion. If we are too close to the start to
   1271     move back, this match function fails. When working with UTF-8 we move
   1272     back a number of characters, not bytes. */
   1273 
   1274     case OP_REVERSE:
   1275 #ifdef SUPPORT_UTF8
   1276     if (utf8)
   1277       {
   1278       i = GET(ecode, 1);
   1279       while (i-- > 0)
   1280         {
   1281         eptr--;
   1282         if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
   1283         BACKCHAR(eptr);
   1284         }
   1285       }
   1286     else
   1287 #endif
   1288 
   1289     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
   1290 
   1291       {
   1292       eptr -= GET(ecode, 1);
   1293       if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
   1294       }
   1295 
   1296     /* Save the earliest consulted character, then skip to next op code */
   1297 
   1298     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
   1299     ecode += 1 + LINK_SIZE;
   1300     break;
   1301 
   1302     /* The callout item calls an external function, if one is provided, passing
   1303     details of the match so far. This is mainly for debugging, though the
   1304     function is able to force a failure. */
   1305 
   1306     case OP_CALLOUT:
   1307     if (pcre_callout != NULL)
   1308       {
   1309       pcre_callout_block cb;
   1310       cb.version          = 1;   /* Version 1 of the callout block */
   1311       cb.callout_number   = ecode[1];
   1312       cb.offset_vector    = md->offset_vector;
   1313       cb.subject          = (PCRE_SPTR)md->start_subject;
   1314       cb.subject_length   = (int)(md->end_subject - md->start_subject);
   1315       cb.start_match      = (int)(mstart - md->start_subject);
   1316       cb.current_position = (int)(eptr - md->start_subject);
   1317       cb.pattern_position = GET(ecode, 2);
   1318       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
   1319       cb.capture_top      = offset_top/2;
   1320       cb.capture_last     = md->capture_last;
   1321       cb.callout_data     = md->callout_data;
   1322       if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
   1323       if (rrc < 0) RRETURN(rrc);
   1324       }
   1325     ecode += 2 + 2*LINK_SIZE;
   1326     break;
   1327 
   1328     /* Recursion either matches the current regex, or some subexpression. The
   1329     offset data is the offset to the starting bracket from the start of the
   1330     whole pattern. (This is so that it works from duplicated subpatterns.)
   1331 
   1332     If there are any capturing brackets started but not finished, we have to
   1333     save their starting points and reinstate them after the recursion. However,
   1334     we don't know how many such there are (offset_top records the completed
   1335     total) so we just have to save all the potential data. There may be up to
   1336     65535 such values, which is too large to put on the stack, but using malloc
   1337     for small numbers seems expensive. As a compromise, the stack is used when
   1338     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
   1339     is used. A problem is what to do if the malloc fails ... there is no way of
   1340     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
   1341     values on the stack, and accept that the rest may be wrong.
   1342 
   1343     There are also other values that have to be saved. We use a chained
   1344     sequence of blocks that actually live on the stack. Thanks to Robin Houston
   1345     for the original version of this logic. */
   1346 
   1347     case OP_RECURSE:
   1348       {
   1349       callpat = md->start_code + GET(ecode, 1);
   1350       new_recursive.group_num = (callpat == md->start_code)? 0 :
   1351         GET2(callpat, 1 + LINK_SIZE);
   1352 
   1353       /* Add to "recursing stack" */
   1354 
   1355       new_recursive.prevrec = md->recursive;
   1356       md->recursive = &new_recursive;
   1357 
   1358       /* Find where to continue from afterwards */
   1359 
   1360       ecode += 1 + LINK_SIZE;
   1361       new_recursive.after_call = ecode;
   1362 
   1363       /* Now save the offset data. */
   1364 
   1365       new_recursive.saved_max = md->offset_end;
   1366       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
   1367         new_recursive.offset_save = stacksave;
   1368       else
   1369         {
   1370         new_recursive.offset_save =
   1371           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
   1372         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
   1373         }
   1374 
   1375       memcpy(new_recursive.offset_save, md->offset_vector,
   1376             new_recursive.saved_max * sizeof(int));
   1377       new_recursive.save_offset_top = offset_top;
   1378 
   1379       /* OK, now we can do the recursion. For each top-level alternative we
   1380       restore the offset and recursion data. */
   1381 
   1382       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
   1383       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
   1384       do
   1385         {
   1386         RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
   1387           md, ims, eptrb, flags, RM6);
   1388         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
   1389           {
   1390           DPRINTF(("Recursion matched\n"));
   1391           md->recursive = new_recursive.prevrec;
   1392           if (new_recursive.offset_save != stacksave)
   1393             (pcre_free)(new_recursive.offset_save);
   1394           MRRETURN(MATCH_MATCH);
   1395           }
   1396         else if (rrc != MATCH_NOMATCH &&
   1397                 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
   1398           {
   1399           DPRINTF(("Recursion gave error %d\n", rrc));
   1400           if (new_recursive.offset_save != stacksave)
   1401             (pcre_free)(new_recursive.offset_save);
   1402           RRETURN(rrc);
   1403           }
   1404 
   1405         md->recursive = &new_recursive;
   1406         memcpy(md->offset_vector, new_recursive.offset_save,
   1407             new_recursive.saved_max * sizeof(int));
   1408         callpat += GET(callpat, 1);
   1409         }
   1410       while (*callpat == OP_ALT);
   1411 
   1412       DPRINTF(("Recursion didn't match\n"));
   1413       md->recursive = new_recursive.prevrec;
   1414       if (new_recursive.offset_save != stacksave)
   1415         (pcre_free)(new_recursive.offset_save);
   1416       MRRETURN(MATCH_NOMATCH);
   1417       }
   1418     /* Control never reaches here */
   1419 
   1420     /* "Once" brackets are like assertion brackets except that after a match,
   1421     the point in the subject string is not moved back. Thus there can never be
   1422     a move back into the brackets. Friedl calls these "atomic" subpatterns.
   1423     Check the alternative branches in turn - the matching won't pass the KET
   1424     for this kind of subpattern. If any one branch matches, we carry on as at
   1425     the end of a normal bracket, leaving the subject pointer, but resetting
   1426     the start-of-match value in case it was changed by \K. */
   1427 
   1428     case OP_ONCE:
   1429     prev = ecode;
   1430     saved_eptr = eptr;
   1431 
   1432     do
   1433       {
   1434       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
   1435       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
   1436         {
   1437         mstart = md->start_match_ptr;
   1438         break;
   1439         }
   1440       if (rrc != MATCH_NOMATCH &&
   1441           (rrc != MATCH_THEN || md->start_match_ptr != ecode))
   1442         RRETURN(rrc);
   1443       ecode += GET(ecode,1);
   1444       }
   1445     while (*ecode == OP_ALT);
   1446 
   1447     /* If hit the end of the group (which could be repeated), fail */
   1448 
   1449     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
   1450 
   1451     /* Continue as from after the assertion, updating the offsets high water
   1452     mark, since extracts may have been taken. */
   1453 
   1454     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
   1455 
   1456     offset_top = md->end_offset_top;
   1457     eptr = md->end_match_ptr;
   1458 
   1459     /* For a non-repeating ket, just continue at this level. This also
   1460     happens for a repeating ket if no characters were matched in the group.
   1461     This is the forcible breaking of infinite loops as implemented in Perl
   1462     5.005. If there is an options reset, it will get obeyed in the normal
   1463     course of events. */
   1464 
   1465     if (*ecode == OP_KET || eptr == saved_eptr)
   1466       {
   1467       ecode += 1+LINK_SIZE;
   1468       break;
   1469       }
   1470 
   1471     /* The repeating kets try the rest of the pattern or restart from the
   1472     preceding bracket, in the appropriate order. The second "call" of match()
   1473     uses tail recursion, to avoid using another stack frame. We need to reset
   1474     any options that changed within the bracket before re-running it, so
   1475     check the next opcode. */
   1476 
   1477     if (ecode[1+LINK_SIZE] == OP_OPT)
   1478       {
   1479       ims = (ims & ~PCRE_IMS) | ecode[4];
   1480       DPRINTF(("ims set to %02lx at group repeat\n", ims));
   1481       }
   1482 
   1483     if (*ecode == OP_KETRMIN)
   1484       {
   1485       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
   1486       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1487       ecode = prev;
   1488       flags = 0;
   1489       goto TAIL_RECURSE;
   1490       }
   1491     else  /* OP_KETRMAX */
   1492       {
   1493       RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
   1494       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1495       ecode += 1 + LINK_SIZE;
   1496       flags = 0;
   1497       goto TAIL_RECURSE;
   1498       }
   1499     /* Control never gets here */
   1500 
   1501     /* An alternation is the end of a branch; scan along to find the end of the
   1502     bracketed group and go to there. */
   1503 
   1504     case OP_ALT:
   1505     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
   1506     break;
   1507 
   1508     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
   1509     indicating that it may occur zero times. It may repeat infinitely, or not
   1510     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
   1511     with fixed upper repeat limits are compiled as a number of copies, with the
   1512     optional ones preceded by BRAZERO or BRAMINZERO. */
   1513 
   1514     case OP_BRAZERO:
   1515       {
   1516       next = ecode+1;
   1517       RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
   1518       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1519       do next += GET(next,1); while (*next == OP_ALT);
   1520       ecode = next + 1 + LINK_SIZE;
   1521       }
   1522     break;
   1523 
   1524     case OP_BRAMINZERO:
   1525       {
   1526       next = ecode+1;
   1527       do next += GET(next, 1); while (*next == OP_ALT);
   1528       RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
   1529       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1530       ecode++;
   1531       }
   1532     break;
   1533 
   1534     case OP_SKIPZERO:
   1535       {
   1536       next = ecode+1;
   1537       do next += GET(next,1); while (*next == OP_ALT);
   1538       ecode = next + 1 + LINK_SIZE;
   1539       }
   1540     break;
   1541 
   1542     /* End of a group, repeated or non-repeating. */
   1543 
   1544     case OP_KET:
   1545     case OP_KETRMIN:
   1546     case OP_KETRMAX:
   1547     prev = ecode - GET(ecode, 1);
   1548 
   1549     /* If this was a group that remembered the subject start, in order to break
   1550     infinite repeats of empty string matches, retrieve the subject start from
   1551     the chain. Otherwise, set it NULL. */
   1552 
   1553     if (*prev >= OP_SBRA)
   1554       {
   1555       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
   1556       eptrb = eptrb->epb_prev;              /* Backup to previous group */
   1557       }
   1558     else saved_eptr = NULL;
   1559 
   1560     /* If we are at the end of an assertion group or an atomic group, stop
   1561     matching and return MATCH_MATCH, but record the current high water mark for
   1562     use by positive assertions. We also need to record the match start in case
   1563     it was changed by \K. */
   1564 
   1565     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
   1566         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
   1567         *prev == OP_ONCE)
   1568       {
   1569       md->end_match_ptr = eptr;      /* For ONCE */
   1570       md->end_offset_top = offset_top;
   1571       md->start_match_ptr = mstart;
   1572       MRRETURN(MATCH_MATCH);
   1573       }
   1574 
   1575     /* For capturing groups we have to check the group number back at the start
   1576     and if necessary complete handling an extraction by setting the offsets and
   1577     bumping the high water mark. Note that whole-pattern recursion is coded as
   1578     a recurse into group 0, so it won't be picked up here. Instead, we catch it
   1579     when the OP_END is reached. Other recursion is handled here. */
   1580 
   1581     if (*prev == OP_CBRA || *prev == OP_SCBRA)
   1582       {
   1583       number = GET2(prev, 1+LINK_SIZE);
   1584       offset = number << 1;
   1585 
   1586 #ifdef PCRE_DEBUG
   1587       printf("end bracket %d", number);
   1588       printf("\n");
   1589 #endif
   1590 
   1591       md->capture_last = number;
   1592       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
   1593         {
   1594         md->offset_vector[offset] =
   1595           md->offset_vector[md->offset_end - number];
   1596         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
   1597         if (offset_top <= offset) offset_top = offset + 2;
   1598         }
   1599 
   1600       /* Handle a recursively called group. Restore the offsets
   1601       appropriately and continue from after the call. */
   1602 
   1603       if (md->recursive != NULL && md->recursive->group_num == number)
   1604         {
   1605         recursion_info *rec = md->recursive;
   1606         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
   1607         md->recursive = rec->prevrec;
   1608         memcpy(md->offset_vector, rec->offset_save,
   1609           rec->saved_max * sizeof(int));
   1610         offset_top = rec->save_offset_top;
   1611         ecode = rec->after_call;
   1612         ims = original_ims;
   1613         break;
   1614         }
   1615       }
   1616 
   1617     /* For both capturing and non-capturing groups, reset the value of the ims
   1618     flags, in case they got changed during the group. */
   1619 
   1620     ims = original_ims;
   1621     DPRINTF(("ims reset to %02lx\n", ims));
   1622 
   1623     /* For a non-repeating ket, just continue at this level. This also
   1624     happens for a repeating ket if no characters were matched in the group.
   1625     This is the forcible breaking of infinite loops as implemented in Perl
   1626     5.005. If there is an options reset, it will get obeyed in the normal
   1627     course of events. */
   1628 
   1629     if (*ecode == OP_KET || eptr == saved_eptr)
   1630       {
   1631       ecode += 1 + LINK_SIZE;
   1632       break;
   1633       }
   1634 
   1635     /* The repeating kets try the rest of the pattern or restart from the
   1636     preceding bracket, in the appropriate order. In the second case, we can use
   1637     tail recursion to avoid using another stack frame, unless we have an
   1638     unlimited repeat of a group that can match an empty string. */
   1639 
   1640     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
   1641 
   1642     if (*ecode == OP_KETRMIN)
   1643       {
   1644       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
   1645       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1646       if (flags != 0)    /* Could match an empty string */
   1647         {
   1648         RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
   1649         RRETURN(rrc);
   1650         }
   1651       ecode = prev;
   1652       goto TAIL_RECURSE;
   1653       }
   1654     else  /* OP_KETRMAX */
   1655       {
   1656       RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
   1657       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   1658       ecode += 1 + LINK_SIZE;
   1659       flags = 0;
   1660       goto TAIL_RECURSE;
   1661       }
   1662     /* Control never gets here */
   1663 
   1664     /* Start of subject unless notbol, or after internal newline if multiline */
   1665 
   1666     case OP_CIRC:
   1667     if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
   1668     if ((ims & PCRE_MULTILINE) != 0)
   1669       {
   1670       if (eptr != md->start_subject &&
   1671           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
   1672         MRRETURN(MATCH_NOMATCH);
   1673       ecode++;
   1674       break;
   1675       }
   1676     /* ... else fall through */
   1677 
   1678     /* Start of subject assertion */
   1679 
   1680     case OP_SOD:
   1681     if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
   1682     ecode++;
   1683     break;
   1684 
   1685     /* Start of match assertion */
   1686 
   1687     case OP_SOM:
   1688     if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
   1689     ecode++;
   1690     break;
   1691 
   1692     /* Reset the start of match point */
   1693 
   1694     case OP_SET_SOM:
   1695     mstart = eptr;
   1696     ecode++;
   1697     break;
   1698 
   1699     /* Assert before internal newline if multiline, or before a terminating
   1700     newline unless endonly is set, else end of subject unless noteol is set. */
   1701 
   1702     case OP_DOLL:
   1703     if ((ims & PCRE_MULTILINE) != 0)
   1704       {
   1705       if (eptr < md->end_subject)
   1706         { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
   1707       else
   1708         {
   1709         if (md->noteol) MRRETURN(MATCH_NOMATCH);
   1710         SCHECK_PARTIAL();
   1711         }
   1712       ecode++;
   1713       break;
   1714       }
   1715     else  /* Not multiline */
   1716       {
   1717       if (md->noteol) MRRETURN(MATCH_NOMATCH);
   1718       if (!md->endonly) goto ASSERT_NL_OR_EOS;
   1719       }
   1720 
   1721     /* ... else fall through for endonly */
   1722 
   1723     /* End of subject assertion (\z) */
   1724 
   1725     case OP_EOD:
   1726     if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
   1727     SCHECK_PARTIAL();
   1728     ecode++;
   1729     break;
   1730 
   1731     /* End of subject or ending \n assertion (\Z) */
   1732 
   1733     case OP_EODN:
   1734     ASSERT_NL_OR_EOS:
   1735     if (eptr < md->end_subject &&
   1736         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
   1737       MRRETURN(MATCH_NOMATCH);
   1738 
   1739     /* Either at end of string or \n before end. */
   1740 
   1741     SCHECK_PARTIAL();
   1742     ecode++;
   1743     break;
   1744 
   1745     /* Word boundary assertions */
   1746 
   1747     case OP_NOT_WORD_BOUNDARY:
   1748     case OP_WORD_BOUNDARY:
   1749       {
   1750 
   1751       /* Find out if the previous and current characters are "word" characters.
   1752       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
   1753       be "non-word" characters. Remember the earliest consulted character for
   1754       partial matching. */
   1755 
   1756 #ifdef SUPPORT_UTF8
   1757       if (utf8)
   1758         {
   1759         /* Get status of previous character */
   1760 
   1761         if (eptr == md->start_subject) prev_is_word = FALSE; else
   1762           {
   1763           USPTR lastptr = eptr - 1;
   1764           while((*lastptr & 0xc0) == 0x80) lastptr--;
   1765           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
   1766           GETCHAR(c, lastptr);
   1767 #ifdef SUPPORT_UCP
   1768           if (md->use_ucp)
   1769             {
   1770             if (c == '_') prev_is_word = TRUE; else
   1771               {
   1772               int cat = UCD_CATEGORY(c);
   1773               prev_is_word = (cat == ucp_L || cat == ucp_N);
   1774               }
   1775             }
   1776           else
   1777 #endif
   1778           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
   1779           }
   1780 
   1781         /* Get status of next character */
   1782 
   1783         if (eptr >= md->end_subject)
   1784           {
   1785           SCHECK_PARTIAL();
   1786           cur_is_word = FALSE;
   1787           }
   1788         else
   1789           {
   1790           GETCHAR(c, eptr);
   1791 #ifdef SUPPORT_UCP
   1792           if (md->use_ucp)
   1793             {
   1794             if (c == '_') cur_is_word = TRUE; else
   1795               {
   1796               int cat = UCD_CATEGORY(c);
   1797               cur_is_word = (cat == ucp_L || cat == ucp_N);
   1798               }
   1799             }
   1800           else
   1801 #endif
   1802           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
   1803           }
   1804         }
   1805       else
   1806 #endif
   1807 
   1808       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
   1809       consistency with the behaviour of \w we do use it in this case. */
   1810 
   1811         {
   1812         /* Get status of previous character */
   1813 
   1814         if (eptr == md->start_subject) prev_is_word = FALSE; else
   1815           {
   1816           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
   1817 #ifdef SUPPORT_UCP
   1818           if (md->use_ucp)
   1819             {
   1820             c = eptr[-1];
   1821             if (c == '_') prev_is_word = TRUE; else
   1822               {
   1823               int cat = UCD_CATEGORY(c);
   1824               prev_is_word = (cat == ucp_L || cat == ucp_N);
   1825               }
   1826             }
   1827           else
   1828 #endif
   1829           prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
   1830           }
   1831 
   1832         /* Get status of next character */
   1833 
   1834         if (eptr >= md->end_subject)
   1835           {
   1836           SCHECK_PARTIAL();
   1837           cur_is_word = FALSE;
   1838           }
   1839         else
   1840 #ifdef SUPPORT_UCP
   1841         if (md->use_ucp)
   1842           {
   1843           c = *eptr;
   1844           if (c == '_') cur_is_word = TRUE; else
   1845             {
   1846             int cat = UCD_CATEGORY(c);
   1847             cur_is_word = (cat == ucp_L || cat == ucp_N);
   1848             }
   1849           }
   1850         else
   1851 #endif
   1852         cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
   1853         }
   1854 
   1855       /* Now see if the situation is what we want */
   1856 
   1857       if ((*ecode++ == OP_WORD_BOUNDARY)?
   1858            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
   1859         MRRETURN(MATCH_NOMATCH);
   1860       }
   1861     break;
   1862 
   1863     /* Match a single character type; inline for speed */
   1864 
   1865     case OP_ANY:
   1866     if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
   1867     /* Fall through */
   1868 
   1869     case OP_ALLANY:
   1870     if (eptr++ >= md->end_subject)
   1871       {
   1872       SCHECK_PARTIAL();
   1873       MRRETURN(MATCH_NOMATCH);
   1874       }
   1875     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
   1876     ecode++;
   1877     break;
   1878 
   1879     /* Match a single byte, even in UTF-8 mode. This opcode really does match
   1880     any byte, even newline, independent of the setting of PCRE_DOTALL. */
   1881 
   1882     case OP_ANYBYTE:
   1883     if (eptr++ >= md->end_subject)
   1884       {
   1885       SCHECK_PARTIAL();
   1886       MRRETURN(MATCH_NOMATCH);
   1887       }
   1888     ecode++;
   1889     break;
   1890 
   1891     case OP_NOT_DIGIT:
   1892     if (eptr >= md->end_subject)
   1893       {
   1894       SCHECK_PARTIAL();
   1895       MRRETURN(MATCH_NOMATCH);
   1896       }
   1897     GETCHARINCTEST(c, eptr);
   1898     if (
   1899 #ifdef SUPPORT_UTF8
   1900        c < 256 &&
   1901 #endif
   1902        (md->ctypes[c] & ctype_digit) != 0
   1903        )
   1904       MRRETURN(MATCH_NOMATCH);
   1905     ecode++;
   1906     break;
   1907 
   1908     case OP_DIGIT:
   1909     if (eptr >= md->end_subject)
   1910       {
   1911       SCHECK_PARTIAL();
   1912       MRRETURN(MATCH_NOMATCH);
   1913       }
   1914     GETCHARINCTEST(c, eptr);
   1915     if (
   1916 #ifdef SUPPORT_UTF8
   1917        c >= 256 ||
   1918 #endif
   1919        (md->ctypes[c] & ctype_digit) == 0
   1920        )
   1921       MRRETURN(MATCH_NOMATCH);
   1922     ecode++;
   1923     break;
   1924 
   1925     case OP_NOT_WHITESPACE:
   1926     if (eptr >= md->end_subject)
   1927       {
   1928       SCHECK_PARTIAL();
   1929       MRRETURN(MATCH_NOMATCH);
   1930       }
   1931     GETCHARINCTEST(c, eptr);
   1932     if (
   1933 #ifdef SUPPORT_UTF8
   1934        c < 256 &&
   1935 #endif
   1936        (md->ctypes[c] & ctype_space) != 0
   1937        )
   1938       MRRETURN(MATCH_NOMATCH);
   1939     ecode++;
   1940     break;
   1941 
   1942     case OP_WHITESPACE:
   1943     if (eptr >= md->end_subject)
   1944       {
   1945       SCHECK_PARTIAL();
   1946       MRRETURN(MATCH_NOMATCH);
   1947       }
   1948     GETCHARINCTEST(c, eptr);
   1949     if (
   1950 #ifdef SUPPORT_UTF8
   1951        c >= 256 ||
   1952 #endif
   1953        (md->ctypes[c] & ctype_space) == 0
   1954        )
   1955       MRRETURN(MATCH_NOMATCH);
   1956     ecode++;
   1957     break;
   1958 
   1959     case OP_NOT_WORDCHAR:
   1960     if (eptr >= md->end_subject)
   1961       {
   1962       SCHECK_PARTIAL();
   1963       MRRETURN(MATCH_NOMATCH);
   1964       }
   1965     GETCHARINCTEST(c, eptr);
   1966     if (
   1967 #ifdef SUPPORT_UTF8
   1968        c < 256 &&
   1969 #endif
   1970        (md->ctypes[c] & ctype_word) != 0
   1971        )
   1972       MRRETURN(MATCH_NOMATCH);
   1973     ecode++;
   1974     break;
   1975 
   1976     case OP_WORDCHAR:
   1977     if (eptr >= md->end_subject)
   1978       {
   1979       SCHECK_PARTIAL();
   1980       MRRETURN(MATCH_NOMATCH);
   1981       }
   1982     GETCHARINCTEST(c, eptr);
   1983     if (
   1984 #ifdef SUPPORT_UTF8
   1985        c >= 256 ||
   1986 #endif
   1987        (md->ctypes[c] & ctype_word) == 0
   1988        )
   1989       MRRETURN(MATCH_NOMATCH);
   1990     ecode++;
   1991     break;
   1992 
   1993     case OP_ANYNL:
   1994     if (eptr >= md->end_subject)
   1995       {
   1996       SCHECK_PARTIAL();
   1997       MRRETURN(MATCH_NOMATCH);
   1998       }
   1999     GETCHARINCTEST(c, eptr);
   2000     switch(c)
   2001       {
   2002       default: MRRETURN(MATCH_NOMATCH);
   2003       case 0x000d:
   2004       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
   2005       break;
   2006 
   2007       case 0x000a:
   2008       break;
   2009 
   2010       case 0x000b:
   2011       case 0x000c:
   2012       case 0x0085:
   2013       case 0x2028:
   2014       case 0x2029:
   2015       if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
   2016       break;
   2017       }
   2018     ecode++;
   2019     break;
   2020 
   2021     case OP_NOT_HSPACE:
   2022     if (eptr >= md->end_subject)
   2023       {
   2024       SCHECK_PARTIAL();
   2025       MRRETURN(MATCH_NOMATCH);
   2026       }
   2027     GETCHARINCTEST(c, eptr);
   2028     switch(c)
   2029       {
   2030       default: break;
   2031       case 0x09:      /* HT */
   2032       case 0x20:      /* SPACE */
   2033       case 0xa0:      /* NBSP */
   2034       case 0x1680:    /* OGHAM SPACE MARK */
   2035       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   2036       case 0x2000:    /* EN QUAD */
   2037       case 0x2001:    /* EM QUAD */
   2038       case 0x2002:    /* EN SPACE */
   2039       case 0x2003:    /* EM SPACE */
   2040       case 0x2004:    /* THREE-PER-EM SPACE */
   2041       case 0x2005:    /* FOUR-PER-EM SPACE */
   2042       case 0x2006:    /* SIX-PER-EM SPACE */
   2043       case 0x2007:    /* FIGURE SPACE */
   2044       case 0x2008:    /* PUNCTUATION SPACE */
   2045       case 0x2009:    /* THIN SPACE */
   2046       case 0x200A:    /* HAIR SPACE */
   2047       case 0x202f:    /* NARROW NO-BREAK SPACE */
   2048       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   2049       case 0x3000:    /* IDEOGRAPHIC SPACE */
   2050       MRRETURN(MATCH_NOMATCH);
   2051       }
   2052     ecode++;
   2053     break;
   2054 
   2055     case OP_HSPACE:
   2056     if (eptr >= md->end_subject)
   2057       {
   2058       SCHECK_PARTIAL();
   2059       MRRETURN(MATCH_NOMATCH);
   2060       }
   2061     GETCHARINCTEST(c, eptr);
   2062     switch(c)
   2063       {
   2064       default: MRRETURN(MATCH_NOMATCH);
   2065       case 0x09:      /* HT */
   2066       case 0x20:      /* SPACE */
   2067       case 0xa0:      /* NBSP */
   2068       case 0x1680:    /* OGHAM SPACE MARK */
   2069       case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   2070       case 0x2000:    /* EN QUAD */
   2071       case 0x2001:    /* EM QUAD */
   2072       case 0x2002:    /* EN SPACE */
   2073       case 0x2003:    /* EM SPACE */
   2074       case 0x2004:    /* THREE-PER-EM SPACE */
   2075       case 0x2005:    /* FOUR-PER-EM SPACE */
   2076       case 0x2006:    /* SIX-PER-EM SPACE */
   2077       case 0x2007:    /* FIGURE SPACE */
   2078       case 0x2008:    /* PUNCTUATION SPACE */
   2079       case 0x2009:    /* THIN SPACE */
   2080       case 0x200A:    /* HAIR SPACE */
   2081       case 0x202f:    /* NARROW NO-BREAK SPACE */
   2082       case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   2083       case 0x3000:    /* IDEOGRAPHIC SPACE */
   2084       break;
   2085       }
   2086     ecode++;
   2087     break;
   2088 
   2089     case OP_NOT_VSPACE:
   2090     if (eptr >= md->end_subject)
   2091       {
   2092       SCHECK_PARTIAL();
   2093       MRRETURN(MATCH_NOMATCH);
   2094       }
   2095     GETCHARINCTEST(c, eptr);
   2096     switch(c)
   2097       {
   2098       default: break;
   2099       case 0x0a:      /* LF */
   2100       case 0x0b:      /* VT */
   2101       case 0x0c:      /* FF */
   2102       case 0x0d:      /* CR */
   2103       case 0x85:      /* NEL */
   2104       case 0x2028:    /* LINE SEPARATOR */
   2105       case 0x2029:    /* PARAGRAPH SEPARATOR */
   2106       MRRETURN(MATCH_NOMATCH);
   2107       }
   2108     ecode++;
   2109     break;
   2110 
   2111     case OP_VSPACE:
   2112     if (eptr >= md->end_subject)
   2113       {
   2114       SCHECK_PARTIAL();
   2115       MRRETURN(MATCH_NOMATCH);
   2116       }
   2117     GETCHARINCTEST(c, eptr);
   2118     switch(c)
   2119       {
   2120       default: MRRETURN(MATCH_NOMATCH);
   2121       case 0x0a:      /* LF */
   2122       case 0x0b:      /* VT */
   2123       case 0x0c:      /* FF */
   2124       case 0x0d:      /* CR */
   2125       case 0x85:      /* NEL */
   2126       case 0x2028:    /* LINE SEPARATOR */
   2127       case 0x2029:    /* PARAGRAPH SEPARATOR */
   2128       break;
   2129       }
   2130     ecode++;
   2131     break;
   2132 
   2133 #ifdef SUPPORT_UCP
   2134     /* Check the next character by Unicode property. We will get here only
   2135     if the support is in the binary; otherwise a compile-time error occurs. */
   2136 
   2137     case OP_PROP:
   2138     case OP_NOTPROP:
   2139     if (eptr >= md->end_subject)
   2140       {
   2141       SCHECK_PARTIAL();
   2142       MRRETURN(MATCH_NOMATCH);
   2143       }
   2144     GETCHARINCTEST(c, eptr);
   2145       {
   2146       const ucd_record *prop = GET_UCD(c);
   2147 
   2148       switch(ecode[1])
   2149         {
   2150         case PT_ANY:
   2151         if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
   2152         break;
   2153 
   2154         case PT_LAMP:
   2155         if ((prop->chartype == ucp_Lu ||
   2156              prop->chartype == ucp_Ll ||
   2157              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
   2158           MRRETURN(MATCH_NOMATCH);
   2159         break;
   2160 
   2161         case PT_GC:
   2162         if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
   2163           MRRETURN(MATCH_NOMATCH);
   2164         break;
   2165 
   2166         case PT_PC:
   2167         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
   2168           MRRETURN(MATCH_NOMATCH);
   2169         break;
   2170 
   2171         case PT_SC:
   2172         if ((ecode[2] != prop->script) == (op == OP_PROP))
   2173           MRRETURN(MATCH_NOMATCH);
   2174         break;
   2175 
   2176         /* These are specials */
   2177 
   2178         case PT_ALNUM:
   2179         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
   2180              _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
   2181           MRRETURN(MATCH_NOMATCH);
   2182         break;
   2183 
   2184         case PT_SPACE:    /* Perl space */
   2185         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
   2186              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
   2187                == (op == OP_NOTPROP))
   2188           MRRETURN(MATCH_NOMATCH);
   2189         break;
   2190 
   2191         case PT_PXSPACE:  /* POSIX space */
   2192         if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
   2193              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
   2194              c == CHAR_FF || c == CHAR_CR)
   2195                == (op == OP_NOTPROP))
   2196           MRRETURN(MATCH_NOMATCH);
   2197         break;
   2198 
   2199         case PT_WORD:
   2200         if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
   2201              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
   2202              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
   2203           MRRETURN(MATCH_NOMATCH);
   2204         break;
   2205 
   2206         /* This should never occur */
   2207 
   2208         default:
   2209         RRETURN(PCRE_ERROR_INTERNAL);
   2210         }
   2211 
   2212       ecode += 3;
   2213       }
   2214     break;
   2215 
   2216     /* Match an extended Unicode sequence. We will get here only if the support
   2217     is in the binary; otherwise a compile-time error occurs. */
   2218 
   2219     case OP_EXTUNI:
   2220     if (eptr >= md->end_subject)
   2221       {
   2222       SCHECK_PARTIAL();
   2223       MRRETURN(MATCH_NOMATCH);
   2224       }
   2225     GETCHARINCTEST(c, eptr);
   2226       {
   2227       int category = UCD_CATEGORY(c);
   2228       if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
   2229       while (eptr < md->end_subject)
   2230         {
   2231         int len = 1;
   2232         if (!utf8) c = *eptr; else
   2233           {
   2234           GETCHARLEN(c, eptr, len);
   2235           }
   2236         category = UCD_CATEGORY(c);
   2237         if (category != ucp_M) break;
   2238         eptr += len;
   2239         }
   2240       }
   2241     ecode++;
   2242     break;
   2243 #endif
   2244 
   2245 
   2246     /* Match a back reference, possibly repeatedly. Look past the end of the
   2247     item to see if there is repeat information following. The code is similar
   2248     to that for character classes, but repeated for efficiency. Then obey
   2249     similar code to character type repeats - written out again for speed.
   2250     However, if the referenced string is the empty string, always treat
   2251     it as matched, any number of times (otherwise there could be infinite
   2252     loops). */
   2253 
   2254     case OP_REF:
   2255       {
   2256       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
   2257       ecode += 3;
   2258 
   2259       /* If the reference is unset, there are two possibilities:
   2260 
   2261       (a) In the default, Perl-compatible state, set the length to be longer
   2262       than the amount of subject left; this ensures that every attempt at a
   2263       match fails. We can't just fail here, because of the possibility of
   2264       quantifiers with zero minima.
   2265 
   2266       (b) If the JavaScript compatibility flag is set, set the length to zero
   2267       so that the back reference matches an empty string.
   2268 
   2269       Otherwise, set the length to the length of what was matched by the
   2270       referenced subpattern. */
   2271 
   2272       if (offset >= offset_top || md->offset_vector[offset] < 0)
   2273         length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
   2274       else
   2275         length = md->offset_vector[offset+1] - md->offset_vector[offset];
   2276 
   2277       /* Set up for repetition, or handle the non-repeated case */
   2278 
   2279       switch (*ecode)
   2280         {
   2281         case OP_CRSTAR:
   2282         case OP_CRMINSTAR:
   2283         case OP_CRPLUS:
   2284         case OP_CRMINPLUS:
   2285         case OP_CRQUERY:
   2286         case OP_CRMINQUERY:
   2287         c = *ecode++ - OP_CRSTAR;
   2288         minimize = (c & 1) != 0;
   2289         min = rep_min[c];                 /* Pick up values from tables; */
   2290         max = rep_max[c];                 /* zero for max => infinity */
   2291         if (max == 0) max = INT_MAX;
   2292         break;
   2293 
   2294         case OP_CRRANGE:
   2295         case OP_CRMINRANGE:
   2296         minimize = (*ecode == OP_CRMINRANGE);
   2297         min = GET2(ecode, 1);
   2298         max = GET2(ecode, 3);
   2299         if (max == 0) max = INT_MAX;
   2300         ecode += 5;
   2301         break;
   2302 
   2303         default:               /* No repeat follows */
   2304         if (!match_ref(offset, eptr, length, md, ims))
   2305           {
   2306           CHECK_PARTIAL();
   2307           MRRETURN(MATCH_NOMATCH);
   2308           }
   2309         eptr += length;
   2310         continue;              /* With the main loop */
   2311         }
   2312 
   2313       /* If the length of the reference is zero, just continue with the
   2314       main loop. */
   2315 
   2316       if (length == 0) continue;
   2317 
   2318       /* First, ensure the minimum number of matches are present. We get back
   2319       the length of the reference string explicitly rather than passing the
   2320       address of eptr, so that eptr can be a register variable. */
   2321 
   2322       for (i = 1; i <= min; i++)
   2323         {
   2324         if (!match_ref(offset, eptr, length, md, ims))
   2325           {
   2326           CHECK_PARTIAL();
   2327           MRRETURN(MATCH_NOMATCH);
   2328           }
   2329         eptr += length;
   2330         }
   2331 
   2332       /* If min = max, continue at the same level without recursion.
   2333       They are not both allowed to be zero. */
   2334 
   2335       if (min == max) continue;
   2336 
   2337       /* If minimizing, keep trying and advancing the pointer */
   2338 
   2339       if (minimize)
   2340         {
   2341         for (fi = min;; fi++)
   2342           {
   2343           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
   2344           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2345           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   2346           if (!match_ref(offset, eptr, length, md, ims))
   2347             {
   2348             CHECK_PARTIAL();
   2349             MRRETURN(MATCH_NOMATCH);
   2350             }
   2351           eptr += length;
   2352           }
   2353         /* Control never gets here */
   2354         }
   2355 
   2356       /* If maximizing, find the longest string and work backwards */
   2357 
   2358       else
   2359         {
   2360         pp = eptr;
   2361         for (i = min; i < max; i++)
   2362           {
   2363           if (!match_ref(offset, eptr, length, md, ims))
   2364             {
   2365             CHECK_PARTIAL();
   2366             break;
   2367             }
   2368           eptr += length;
   2369           }
   2370         while (eptr >= pp)
   2371           {
   2372           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
   2373           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2374           eptr -= length;
   2375           }
   2376         MRRETURN(MATCH_NOMATCH);
   2377         }
   2378       }
   2379     /* Control never gets here */
   2380 
   2381     /* Match a bit-mapped character class, possibly repeatedly. This op code is
   2382     used when all the characters in the class have values in the range 0-255,
   2383     and either the matching is caseful, or the characters are in the range
   2384     0-127 when UTF-8 processing is enabled. The only difference between
   2385     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
   2386     encountered.
   2387 
   2388     First, look past the end of the item to see if there is repeat information
   2389     following. Then obey similar code to character type repeats - written out
   2390     again for speed. */
   2391 
   2392     case OP_NCLASS:
   2393     case OP_CLASS:
   2394       {
   2395       data = ecode + 1;                /* Save for matching */
   2396       ecode += 33;                     /* Advance past the item */
   2397 
   2398       switch (*ecode)
   2399         {
   2400         case OP_CRSTAR:
   2401         case OP_CRMINSTAR:
   2402         case OP_CRPLUS:
   2403         case OP_CRMINPLUS:
   2404         case OP_CRQUERY:
   2405         case OP_CRMINQUERY:
   2406         c = *ecode++ - OP_CRSTAR;
   2407         minimize = (c & 1) != 0;
   2408         min = rep_min[c];                 /* Pick up values from tables; */
   2409         max = rep_max[c];                 /* zero for max => infinity */
   2410         if (max == 0) max = INT_MAX;
   2411         break;
   2412 
   2413         case OP_CRRANGE:
   2414         case OP_CRMINRANGE:
   2415         minimize = (*ecode == OP_CRMINRANGE);
   2416         min = GET2(ecode, 1);
   2417         max = GET2(ecode, 3);
   2418         if (max == 0) max = INT_MAX;
   2419         ecode += 5;
   2420         break;
   2421 
   2422         default:               /* No repeat follows */
   2423         min = max = 1;
   2424         break;
   2425         }
   2426 
   2427       /* First, ensure the minimum number of matches are present. */
   2428 
   2429 #ifdef SUPPORT_UTF8
   2430       /* UTF-8 mode */
   2431       if (utf8)
   2432         {
   2433         for (i = 1; i <= min; i++)
   2434           {
   2435           if (eptr >= md->end_subject)
   2436             {
   2437             SCHECK_PARTIAL();
   2438             MRRETURN(MATCH_NOMATCH);
   2439             }
   2440           GETCHARINC(c, eptr);
   2441           if (c > 255)
   2442             {
   2443             if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
   2444             }
   2445           else
   2446             {
   2447             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
   2448             }
   2449           }
   2450         }
   2451       else
   2452 #endif
   2453       /* Not UTF-8 mode */
   2454         {
   2455         for (i = 1; i <= min; i++)
   2456           {
   2457           if (eptr >= md->end_subject)
   2458             {
   2459             SCHECK_PARTIAL();
   2460             MRRETURN(MATCH_NOMATCH);
   2461             }
   2462           c = *eptr++;
   2463           if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
   2464           }
   2465         }
   2466 
   2467       /* If max == min we can continue with the main loop without the
   2468       need to recurse. */
   2469 
   2470       if (min == max) continue;
   2471 
   2472       /* If minimizing, keep testing the rest of the expression and advancing
   2473       the pointer while it matches the class. */
   2474 
   2475       if (minimize)
   2476         {
   2477 #ifdef SUPPORT_UTF8
   2478         /* UTF-8 mode */
   2479         if (utf8)
   2480           {
   2481           for (fi = min;; fi++)
   2482             {
   2483             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
   2484             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2485             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   2486             if (eptr >= md->end_subject)
   2487               {
   2488               SCHECK_PARTIAL();
   2489               MRRETURN(MATCH_NOMATCH);
   2490               }
   2491             GETCHARINC(c, eptr);
   2492             if (c > 255)
   2493               {
   2494               if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
   2495               }
   2496             else
   2497               {
   2498               if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
   2499               }
   2500             }
   2501           }
   2502         else
   2503 #endif
   2504         /* Not UTF-8 mode */
   2505           {
   2506           for (fi = min;; fi++)
   2507             {
   2508             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
   2509             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2510             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   2511             if (eptr >= md->end_subject)
   2512               {
   2513               SCHECK_PARTIAL();
   2514               MRRETURN(MATCH_NOMATCH);
   2515               }
   2516             c = *eptr++;
   2517             if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
   2518             }
   2519           }
   2520         /* Control never gets here */
   2521         }
   2522 
   2523       /* If maximizing, find the longest possible run, then work backwards. */
   2524 
   2525       else
   2526         {
   2527         pp = eptr;
   2528 
   2529 #ifdef SUPPORT_UTF8
   2530         /* UTF-8 mode */
   2531         if (utf8)
   2532           {
   2533           for (i = min; i < max; i++)
   2534             {
   2535             int len = 1;
   2536             if (eptr >= md->end_subject)
   2537               {
   2538               SCHECK_PARTIAL();
   2539               break;
   2540               }
   2541             GETCHARLEN(c, eptr, len);
   2542             if (c > 255)
   2543               {
   2544               if (op == OP_CLASS) break;
   2545               }
   2546             else
   2547               {
   2548               if ((data[c/8] & (1 << (c&7))) == 0) break;
   2549               }
   2550             eptr += len;
   2551             }
   2552           for (;;)
   2553             {
   2554             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
   2555             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2556             if (eptr-- == pp) break;        /* Stop if tried at original pos */
   2557             BACKCHAR(eptr);
   2558             }
   2559           }
   2560         else
   2561 #endif
   2562           /* Not UTF-8 mode */
   2563           {
   2564           for (i = min; i < max; i++)
   2565             {
   2566             if (eptr >= md->end_subject)
   2567               {
   2568               SCHECK_PARTIAL();
   2569               break;
   2570               }
   2571             c = *eptr;
   2572             if ((data[c/8] & (1 << (c&7))) == 0) break;
   2573             eptr++;
   2574             }
   2575           while (eptr >= pp)
   2576             {
   2577             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
   2578             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2579             eptr--;
   2580             }
   2581           }
   2582 
   2583         MRRETURN(MATCH_NOMATCH);
   2584         }
   2585       }
   2586     /* Control never gets here */
   2587 
   2588 
   2589     /* Match an extended character class. This opcode is encountered only
   2590     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
   2591     mode, because Unicode properties are supported in non-UTF-8 mode. */
   2592 
   2593 #ifdef SUPPORT_UTF8
   2594     case OP_XCLASS:
   2595       {
   2596       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
   2597       ecode += GET(ecode, 1);                      /* Advance past the item */
   2598 
   2599       switch (*ecode)
   2600         {
   2601         case OP_CRSTAR:
   2602         case OP_CRMINSTAR:
   2603         case OP_CRPLUS:
   2604         case OP_CRMINPLUS:
   2605         case OP_CRQUERY:
   2606         case OP_CRMINQUERY:
   2607         c = *ecode++ - OP_CRSTAR;
   2608         minimize = (c & 1) != 0;
   2609         min = rep_min[c];                 /* Pick up values from tables; */
   2610         max = rep_max[c];                 /* zero for max => infinity */
   2611         if (max == 0) max = INT_MAX;
   2612         break;
   2613 
   2614         case OP_CRRANGE:
   2615         case OP_CRMINRANGE:
   2616         minimize = (*ecode == OP_CRMINRANGE);
   2617         min = GET2(ecode, 1);
   2618         max = GET2(ecode, 3);
   2619         if (max == 0) max = INT_MAX;
   2620         ecode += 5;
   2621         break;
   2622 
   2623         default:               /* No repeat follows */
   2624         min = max = 1;
   2625         break;
   2626         }
   2627 
   2628       /* First, ensure the minimum number of matches are present. */
   2629 
   2630       for (i = 1; i <= min; i++)
   2631         {
   2632         if (eptr >= md->end_subject)
   2633           {
   2634           SCHECK_PARTIAL();
   2635           MRRETURN(MATCH_NOMATCH);
   2636           }
   2637         GETCHARINCTEST(c, eptr);
   2638         if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
   2639         }
   2640 
   2641       /* If max == min we can continue with the main loop without the
   2642       need to recurse. */
   2643 
   2644       if (min == max) continue;
   2645 
   2646       /* If minimizing, keep testing the rest of the expression and advancing
   2647       the pointer while it matches the class. */
   2648 
   2649       if (minimize)
   2650         {
   2651         for (fi = min;; fi++)
   2652           {
   2653           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
   2654           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2655           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   2656           if (eptr >= md->end_subject)
   2657             {
   2658             SCHECK_PARTIAL();
   2659             MRRETURN(MATCH_NOMATCH);
   2660             }
   2661           GETCHARINCTEST(c, eptr);
   2662           if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
   2663           }
   2664         /* Control never gets here */
   2665         }
   2666 
   2667       /* If maximizing, find the longest possible run, then work backwards. */
   2668 
   2669       else
   2670         {
   2671         pp = eptr;
   2672         for (i = min; i < max; i++)
   2673           {
   2674           int len = 1;
   2675           if (eptr >= md->end_subject)
   2676             {
   2677             SCHECK_PARTIAL();
   2678             break;
   2679             }
   2680           GETCHARLENTEST(c, eptr, len);
   2681           if (!_pcre_xclass(c, data)) break;
   2682           eptr += len;
   2683           }
   2684         for(;;)
   2685           {
   2686           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
   2687           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2688           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   2689           if (utf8) BACKCHAR(eptr);
   2690           }
   2691         MRRETURN(MATCH_NOMATCH);
   2692         }
   2693 
   2694       /* Control never gets here */
   2695       }
   2696 #endif    /* End of XCLASS */
   2697 
   2698     /* Match a single character, casefully */
   2699 
   2700     case OP_CHAR:
   2701 #ifdef SUPPORT_UTF8
   2702     if (utf8)
   2703       {
   2704       length = 1;
   2705       ecode++;
   2706       GETCHARLEN(fc, ecode, length);
   2707       if (length > md->end_subject - eptr)
   2708         {
   2709         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
   2710         MRRETURN(MATCH_NOMATCH);
   2711         }
   2712       while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
   2713       }
   2714     else
   2715 #endif
   2716 
   2717     /* Non-UTF-8 mode */
   2718       {
   2719       if (md->end_subject - eptr < 1)
   2720         {
   2721         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
   2722         MRRETURN(MATCH_NOMATCH);
   2723         }
   2724       if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
   2725       ecode += 2;
   2726       }
   2727     break;
   2728 
   2729     /* Match a single character, caselessly */
   2730 
   2731     case OP_CHARNC:
   2732 #ifdef SUPPORT_UTF8
   2733     if (utf8)
   2734       {
   2735       length = 1;
   2736       ecode++;
   2737       GETCHARLEN(fc, ecode, length);
   2738 
   2739       if (length > md->end_subject - eptr)
   2740         {
   2741         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
   2742         MRRETURN(MATCH_NOMATCH);
   2743         }
   2744 
   2745       /* If the pattern character's value is < 128, we have only one byte, and
   2746       can use the fast lookup table. */
   2747 
   2748       if (fc < 128)
   2749         {
   2750         if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
   2751         }
   2752 
   2753       /* Otherwise we must pick up the subject character */
   2754 
   2755       else
   2756         {
   2757         unsigned int dc;
   2758         GETCHARINC(dc, eptr);
   2759         ecode += length;
   2760 
   2761         /* If we have Unicode property support, we can use it to test the other
   2762         case of the character, if there is one. */
   2763 
   2764         if (fc != dc)
   2765           {
   2766 #ifdef SUPPORT_UCP
   2767           if (dc != UCD_OTHERCASE(fc))
   2768 #endif
   2769             MRRETURN(MATCH_NOMATCH);
   2770           }
   2771         }
   2772       }
   2773     else
   2774 #endif   /* SUPPORT_UTF8 */
   2775 
   2776     /* Non-UTF-8 mode */
   2777       {
   2778       if (md->end_subject - eptr < 1)
   2779         {
   2780         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
   2781         MRRETURN(MATCH_NOMATCH);
   2782         }
   2783       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
   2784       ecode += 2;
   2785       }
   2786     break;
   2787 
   2788     /* Match a single character repeatedly. */
   2789 
   2790     case OP_EXACT:
   2791     min = max = GET2(ecode, 1);
   2792     ecode += 3;
   2793     goto REPEATCHAR;
   2794 
   2795     case OP_POSUPTO:
   2796     possessive = TRUE;
   2797     /* Fall through */
   2798 
   2799     case OP_UPTO:
   2800     case OP_MINUPTO:
   2801     min = 0;
   2802     max = GET2(ecode, 1);
   2803     minimize = *ecode == OP_MINUPTO;
   2804     ecode += 3;
   2805     goto REPEATCHAR;
   2806 
   2807     case OP_POSSTAR:
   2808     possessive = TRUE;
   2809     min = 0;
   2810     max = INT_MAX;
   2811     ecode++;
   2812     goto REPEATCHAR;
   2813 
   2814     case OP_POSPLUS:
   2815     possessive = TRUE;
   2816     min = 1;
   2817     max = INT_MAX;
   2818     ecode++;
   2819     goto REPEATCHAR;
   2820 
   2821     case OP_POSQUERY:
   2822     possessive = TRUE;
   2823     min = 0;
   2824     max = 1;
   2825     ecode++;
   2826     goto REPEATCHAR;
   2827 
   2828     case OP_STAR:
   2829     case OP_MINSTAR:
   2830     case OP_PLUS:
   2831     case OP_MINPLUS:
   2832     case OP_QUERY:
   2833     case OP_MINQUERY:
   2834     c = *ecode++ - OP_STAR;
   2835     minimize = (c & 1) != 0;
   2836 
   2837     min = rep_min[c];                 /* Pick up values from tables; */
   2838     max = rep_max[c];                 /* zero for max => infinity */
   2839     if (max == 0) max = INT_MAX;
   2840 
   2841     /* Common code for all repeated single-character matches. */
   2842 
   2843     REPEATCHAR:
   2844 #ifdef SUPPORT_UTF8
   2845     if (utf8)
   2846       {
   2847       length = 1;
   2848       charptr = ecode;
   2849       GETCHARLEN(fc, ecode, length);
   2850       ecode += length;
   2851 
   2852       /* Handle multibyte character matching specially here. There is
   2853       support for caseless matching if UCP support is present. */
   2854 
   2855       if (length > 1)
   2856         {
   2857 #ifdef SUPPORT_UCP
   2858         unsigned int othercase;
   2859         if ((ims & PCRE_CASELESS) != 0 &&
   2860             (othercase = UCD_OTHERCASE(fc)) != fc)
   2861           oclength = _pcre_ord2utf8(othercase, occhars);
   2862         else oclength = 0;
   2863 #endif  /* SUPPORT_UCP */
   2864 
   2865         for (i = 1; i <= min; i++)
   2866           {
   2867           if (eptr <= md->end_subject - length &&
   2868             memcmp(eptr, charptr, length) == 0) eptr += length;
   2869 #ifdef SUPPORT_UCP
   2870           else if (oclength > 0 &&
   2871                    eptr <= md->end_subject - oclength &&
   2872                    memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
   2873 #endif  /* SUPPORT_UCP */
   2874           else
   2875             {
   2876             CHECK_PARTIAL();
   2877             MRRETURN(MATCH_NOMATCH);
   2878             }
   2879           }
   2880 
   2881         if (min == max) continue;
   2882 
   2883         if (minimize)
   2884           {
   2885           for (fi = min;; fi++)
   2886             {
   2887             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
   2888             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2889             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   2890             if (eptr <= md->end_subject - length &&
   2891               memcmp(eptr, charptr, length) == 0) eptr += length;
   2892 #ifdef SUPPORT_UCP
   2893             else if (oclength > 0 &&
   2894                      eptr <= md->end_subject - oclength &&
   2895                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
   2896 #endif  /* SUPPORT_UCP */
   2897             else
   2898               {
   2899               CHECK_PARTIAL();
   2900               MRRETURN(MATCH_NOMATCH);
   2901               }
   2902             }
   2903           /* Control never gets here */
   2904           }
   2905 
   2906         else  /* Maximize */
   2907           {
   2908           pp = eptr;
   2909           for (i = min; i < max; i++)
   2910             {
   2911             if (eptr <= md->end_subject - length &&
   2912                 memcmp(eptr, charptr, length) == 0) eptr += length;
   2913 #ifdef SUPPORT_UCP
   2914             else if (oclength > 0 &&
   2915                      eptr <= md->end_subject - oclength &&
   2916                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
   2917 #endif  /* SUPPORT_UCP */
   2918             else
   2919               {
   2920               CHECK_PARTIAL();
   2921               break;
   2922               }
   2923             }
   2924 
   2925           if (possessive) continue;
   2926 
   2927           for(;;)
   2928             {
   2929             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
   2930             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2931             if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
   2932 #ifdef SUPPORT_UCP
   2933             eptr--;
   2934             BACKCHAR(eptr);
   2935 #else   /* without SUPPORT_UCP */
   2936             eptr -= length;
   2937 #endif  /* SUPPORT_UCP */
   2938             }
   2939           }
   2940         /* Control never gets here */
   2941         }
   2942 
   2943       /* If the length of a UTF-8 character is 1, we fall through here, and
   2944       obey the code as for non-UTF-8 characters below, though in this case the
   2945       value of fc will always be < 128. */
   2946       }
   2947     else
   2948 #endif  /* SUPPORT_UTF8 */
   2949 
   2950     /* When not in UTF-8 mode, load a single-byte character. */
   2951 
   2952     fc = *ecode++;
   2953 
   2954     /* The value of fc at this point is always less than 256, though we may or
   2955     may not be in UTF-8 mode. The code is duplicated for the caseless and
   2956     caseful cases, for speed, since matching characters is likely to be quite
   2957     common. First, ensure the minimum number of matches are present. If min =
   2958     max, continue at the same level without recursing. Otherwise, if
   2959     minimizing, keep trying the rest of the expression and advancing one
   2960     matching character if failing, up to the maximum. Alternatively, if
   2961     maximizing, find the maximum number of characters and work backwards. */
   2962 
   2963     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
   2964       max, eptr));
   2965 
   2966     if ((ims & PCRE_CASELESS) != 0)
   2967       {
   2968       fc = md->lcc[fc];
   2969       for (i = 1; i <= min; i++)
   2970         {
   2971         if (eptr >= md->end_subject)
   2972           {
   2973           SCHECK_PARTIAL();
   2974           MRRETURN(MATCH_NOMATCH);
   2975           }
   2976         if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
   2977         }
   2978       if (min == max) continue;
   2979       if (minimize)
   2980         {
   2981         for (fi = min;; fi++)
   2982           {
   2983           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
   2984           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   2985           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   2986           if (eptr >= md->end_subject)
   2987             {
   2988             SCHECK_PARTIAL();
   2989             MRRETURN(MATCH_NOMATCH);
   2990             }
   2991           if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
   2992           }
   2993         /* Control never gets here */
   2994         }
   2995       else  /* Maximize */
   2996         {
   2997         pp = eptr;
   2998         for (i = min; i < max; i++)
   2999           {
   3000           if (eptr >= md->end_subject)
   3001             {
   3002             SCHECK_PARTIAL();
   3003             break;
   3004             }
   3005           if (fc != md->lcc[*eptr]) break;
   3006           eptr++;
   3007           }
   3008 
   3009         if (possessive) continue;
   3010 
   3011         while (eptr >= pp)
   3012           {
   3013           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
   3014           eptr--;
   3015           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3016           }
   3017         MRRETURN(MATCH_NOMATCH);
   3018         }
   3019       /* Control never gets here */
   3020       }
   3021 
   3022     /* Caseful comparisons (includes all multi-byte characters) */
   3023 
   3024     else
   3025       {
   3026       for (i = 1; i <= min; i++)
   3027         {
   3028         if (eptr >= md->end_subject)
   3029           {
   3030           SCHECK_PARTIAL();
   3031           MRRETURN(MATCH_NOMATCH);
   3032           }
   3033         if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
   3034         }
   3035 
   3036       if (min == max) continue;
   3037 
   3038       if (minimize)
   3039         {
   3040         for (fi = min;; fi++)
   3041           {
   3042           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
   3043           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3044           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   3045           if (eptr >= md->end_subject)
   3046             {
   3047             SCHECK_PARTIAL();
   3048             MRRETURN(MATCH_NOMATCH);
   3049             }
   3050           if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
   3051           }
   3052         /* Control never gets here */
   3053         }
   3054       else  /* Maximize */
   3055         {
   3056         pp = eptr;
   3057         for (i = min; i < max; i++)
   3058           {
   3059           if (eptr >= md->end_subject)
   3060             {
   3061             SCHECK_PARTIAL();
   3062             break;
   3063             }
   3064           if (fc != *eptr) break;
   3065           eptr++;
   3066           }
   3067         if (possessive) continue;
   3068 
   3069         while (eptr >= pp)
   3070           {
   3071           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
   3072           eptr--;
   3073           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3074           }
   3075         MRRETURN(MATCH_NOMATCH);
   3076         }
   3077       }
   3078     /* Control never gets here */
   3079 
   3080     /* Match a negated single one-byte character. The character we are
   3081     checking can be multibyte. */
   3082 
   3083     case OP_NOT:
   3084     if (eptr >= md->end_subject)
   3085       {
   3086       SCHECK_PARTIAL();
   3087       MRRETURN(MATCH_NOMATCH);
   3088       }
   3089     ecode++;
   3090     GETCHARINCTEST(c, eptr);
   3091     if ((ims & PCRE_CASELESS) != 0)
   3092       {
   3093 #ifdef SUPPORT_UTF8
   3094       if (c < 256)
   3095 #endif
   3096       c = md->lcc[c];
   3097       if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
   3098       }
   3099     else
   3100       {
   3101       if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
   3102       }
   3103     break;
   3104 
   3105     /* Match a negated single one-byte character repeatedly. This is almost a
   3106     repeat of the code for a repeated single character, but I haven't found a
   3107     nice way of commoning these up that doesn't require a test of the
   3108     positive/negative option for each character match. Maybe that wouldn't add
   3109     very much to the time taken, but character matching *is* what this is all
   3110     about... */
   3111 
   3112     case OP_NOTEXACT:
   3113     min = max = GET2(ecode, 1);
   3114     ecode += 3;
   3115     goto REPEATNOTCHAR;
   3116 
   3117     case OP_NOTUPTO:
   3118     case OP_NOTMINUPTO:
   3119     min = 0;
   3120     max = GET2(ecode, 1);
   3121     minimize = *ecode == OP_NOTMINUPTO;
   3122     ecode += 3;
   3123     goto REPEATNOTCHAR;
   3124 
   3125     case OP_NOTPOSSTAR:
   3126     possessive = TRUE;
   3127     min = 0;
   3128     max = INT_MAX;
   3129     ecode++;
   3130     goto REPEATNOTCHAR;
   3131 
   3132     case OP_NOTPOSPLUS:
   3133     possessive = TRUE;
   3134     min = 1;
   3135     max = INT_MAX;
   3136     ecode++;
   3137     goto REPEATNOTCHAR;
   3138 
   3139     case OP_NOTPOSQUERY:
   3140     possessive = TRUE;
   3141     min = 0;
   3142     max = 1;
   3143     ecode++;
   3144     goto REPEATNOTCHAR;
   3145 
   3146     case OP_NOTPOSUPTO:
   3147     possessive = TRUE;
   3148     min = 0;
   3149     max = GET2(ecode, 1);
   3150     ecode += 3;
   3151     goto REPEATNOTCHAR;
   3152 
   3153     case OP_NOTSTAR:
   3154     case OP_NOTMINSTAR:
   3155     case OP_NOTPLUS:
   3156     case OP_NOTMINPLUS:
   3157     case OP_NOTQUERY:
   3158     case OP_NOTMINQUERY:
   3159     c = *ecode++ - OP_NOTSTAR;
   3160     minimize = (c & 1) != 0;
   3161     min = rep_min[c];                 /* Pick up values from tables; */
   3162     max = rep_max[c];                 /* zero for max => infinity */
   3163     if (max == 0) max = INT_MAX;
   3164 
   3165     /* Common code for all repeated single-byte matches. */
   3166 
   3167     REPEATNOTCHAR:
   3168     fc = *ecode++;
   3169 
   3170     /* The code is duplicated for the caseless and caseful cases, for speed,
   3171     since matching characters is likely to be quite common. First, ensure the
   3172     minimum number of matches are present. If min = max, continue at the same
   3173     level without recursing. Otherwise, if minimizing, keep trying the rest of
   3174     the expression and advancing one matching character if failing, up to the
   3175     maximum. Alternatively, if maximizing, find the maximum number of
   3176     characters and work backwards. */
   3177 
   3178     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
   3179       max, eptr));
   3180 
   3181     if ((ims & PCRE_CASELESS) != 0)
   3182       {
   3183       fc = md->lcc[fc];
   3184 
   3185 #ifdef SUPPORT_UTF8
   3186       /* UTF-8 mode */
   3187       if (utf8)
   3188         {
   3189         register unsigned int d;
   3190         for (i = 1; i <= min; i++)
   3191           {
   3192           if (eptr >= md->end_subject)
   3193             {
   3194             SCHECK_PARTIAL();
   3195             MRRETURN(MATCH_NOMATCH);
   3196             }
   3197           GETCHARINC(d, eptr);
   3198           if (d < 256) d = md->lcc[d];
   3199           if (fc == d) MRRETURN(MATCH_NOMATCH);
   3200           }
   3201         }
   3202       else
   3203 #endif
   3204 
   3205       /* Not UTF-8 mode */
   3206         {
   3207         for (i = 1; i <= min; i++)
   3208           {
   3209           if (eptr >= md->end_subject)
   3210             {
   3211             SCHECK_PARTIAL();
   3212             MRRETURN(MATCH_NOMATCH);
   3213             }
   3214           if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
   3215           }
   3216         }
   3217 
   3218       if (min == max) continue;
   3219 
   3220       if (minimize)
   3221         {
   3222 #ifdef SUPPORT_UTF8
   3223         /* UTF-8 mode */
   3224         if (utf8)
   3225           {
   3226           register unsigned int d;
   3227           for (fi = min;; fi++)
   3228             {
   3229             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
   3230             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3231             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   3232             if (eptr >= md->end_subject)
   3233               {
   3234               SCHECK_PARTIAL();
   3235               MRRETURN(MATCH_NOMATCH);
   3236               }
   3237             GETCHARINC(d, eptr);
   3238             if (d < 256) d = md->lcc[d];
   3239             if (fc == d) MRRETURN(MATCH_NOMATCH);
   3240             }
   3241           }
   3242         else
   3243 #endif
   3244         /* Not UTF-8 mode */
   3245           {
   3246           for (fi = min;; fi++)
   3247             {
   3248             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
   3249             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3250             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   3251             if (eptr >= md->end_subject)
   3252               {
   3253               SCHECK_PARTIAL();
   3254               MRRETURN(MATCH_NOMATCH);
   3255               }
   3256             if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
   3257             }
   3258           }
   3259         /* Control never gets here */
   3260         }
   3261 
   3262       /* Maximize case */
   3263 
   3264       else
   3265         {
   3266         pp = eptr;
   3267 
   3268 #ifdef SUPPORT_UTF8
   3269         /* UTF-8 mode */
   3270         if (utf8)
   3271           {
   3272           register unsigned int d;
   3273           for (i = min; i < max; i++)
   3274             {
   3275             int len = 1;
   3276             if (eptr >= md->end_subject)
   3277               {
   3278               SCHECK_PARTIAL();
   3279               break;
   3280               }
   3281             GETCHARLEN(d, eptr, len);
   3282             if (d < 256) d = md->lcc[d];
   3283             if (fc == d) break;
   3284             eptr += len;
   3285             }
   3286         if (possessive) continue;
   3287         for(;;)
   3288             {
   3289             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
   3290             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3291             if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3292             BACKCHAR(eptr);
   3293             }
   3294           }
   3295         else
   3296 #endif
   3297         /* Not UTF-8 mode */
   3298           {
   3299           for (i = min; i < max; i++)
   3300             {
   3301             if (eptr >= md->end_subject)
   3302               {
   3303               SCHECK_PARTIAL();
   3304               break;
   3305               }
   3306             if (fc == md->lcc[*eptr]) break;
   3307             eptr++;
   3308             }
   3309           if (possessive) continue;
   3310           while (eptr >= pp)
   3311             {
   3312             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
   3313             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3314             eptr--;
   3315             }
   3316           }
   3317 
   3318         MRRETURN(MATCH_NOMATCH);
   3319         }
   3320       /* Control never gets here */
   3321       }
   3322 
   3323     /* Caseful comparisons */
   3324 
   3325     else
   3326       {
   3327 #ifdef SUPPORT_UTF8
   3328       /* UTF-8 mode */
   3329       if (utf8)
   3330         {
   3331         register unsigned int d;
   3332         for (i = 1; i <= min; i++)
   3333           {
   3334           if (eptr >= md->end_subject)
   3335             {
   3336             SCHECK_PARTIAL();
   3337             MRRETURN(MATCH_NOMATCH);
   3338             }
   3339           GETCHARINC(d, eptr);
   3340           if (fc == d) MRRETURN(MATCH_NOMATCH);
   3341           }
   3342         }
   3343       else
   3344 #endif
   3345       /* Not UTF-8 mode */
   3346         {
   3347         for (i = 1; i <= min; i++)
   3348           {
   3349           if (eptr >= md->end_subject)
   3350             {
   3351             SCHECK_PARTIAL();
   3352             MRRETURN(MATCH_NOMATCH);
   3353             }
   3354           if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
   3355           }
   3356         }
   3357 
   3358       if (min == max) continue;
   3359 
   3360       if (minimize)
   3361         {
   3362 #ifdef SUPPORT_UTF8
   3363         /* UTF-8 mode */
   3364         if (utf8)
   3365           {
   3366           register unsigned int d;
   3367           for (fi = min;; fi++)
   3368             {
   3369             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
   3370             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3371             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   3372             if (eptr >= md->end_subject)
   3373               {
   3374               SCHECK_PARTIAL();
   3375               MRRETURN(MATCH_NOMATCH);
   3376               }
   3377             GETCHARINC(d, eptr);
   3378             if (fc == d) MRRETURN(MATCH_NOMATCH);
   3379             }
   3380           }
   3381         else
   3382 #endif
   3383         /* Not UTF-8 mode */
   3384           {
   3385           for (fi = min;; fi++)
   3386             {
   3387             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
   3388             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3389             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   3390             if (eptr >= md->end_subject)
   3391               {
   3392               SCHECK_PARTIAL();
   3393               MRRETURN(MATCH_NOMATCH);
   3394               }
   3395             if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
   3396             }
   3397           }
   3398         /* Control never gets here */
   3399         }
   3400 
   3401       /* Maximize case */
   3402 
   3403       else
   3404         {
   3405         pp = eptr;
   3406 
   3407 #ifdef SUPPORT_UTF8
   3408         /* UTF-8 mode */
   3409         if (utf8)
   3410           {
   3411           register unsigned int d;
   3412           for (i = min; i < max; i++)
   3413             {
   3414             int len = 1;
   3415             if (eptr >= md->end_subject)
   3416               {
   3417               SCHECK_PARTIAL();
   3418               break;
   3419               }
   3420             GETCHARLEN(d, eptr, len);
   3421             if (fc == d) break;
   3422             eptr += len;
   3423             }
   3424           if (possessive) continue;
   3425           for(;;)
   3426             {
   3427             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
   3428             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3429             if (eptr-- == pp) break;        /* Stop if tried at original pos */
   3430             BACKCHAR(eptr);
   3431             }
   3432           }
   3433         else
   3434 #endif
   3435         /* Not UTF-8 mode */
   3436           {
   3437           for (i = min; i < max; i++)
   3438             {
   3439             if (eptr >= md->end_subject)
   3440               {
   3441               SCHECK_PARTIAL();
   3442               break;
   3443               }
   3444             if (fc == *eptr) break;
   3445             eptr++;
   3446             }
   3447           if (possessive) continue;
   3448           while (eptr >= pp)
   3449             {
   3450             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
   3451             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   3452             eptr--;
   3453             }
   3454           }
   3455 
   3456         MRRETURN(MATCH_NOMATCH);
   3457         }
   3458       }
   3459     /* Control never gets here */
   3460 
   3461     /* Match a single character type repeatedly; several different opcodes
   3462     share code. This is very similar to the code for single characters, but we
   3463     repeat it in the interests of efficiency. */
   3464 
   3465     case OP_TYPEEXACT:
   3466     min = max = GET2(ecode, 1);
   3467     minimize = TRUE;
   3468     ecode += 3;
   3469     goto REPEATTYPE;
   3470 
   3471     case OP_TYPEUPTO:
   3472     case OP_TYPEMINUPTO:
   3473     min = 0;
   3474     max = GET2(ecode, 1);
   3475     minimize = *ecode == OP_TYPEMINUPTO;
   3476     ecode += 3;
   3477     goto REPEATTYPE;
   3478 
   3479     case OP_TYPEPOSSTAR:
   3480     possessive = TRUE;
   3481     min = 0;
   3482     max = INT_MAX;
   3483     ecode++;
   3484     goto REPEATTYPE;
   3485 
   3486     case OP_TYPEPOSPLUS:
   3487     possessive = TRUE;
   3488     min = 1;
   3489     max = INT_MAX;
   3490     ecode++;
   3491     goto REPEATTYPE;
   3492 
   3493     case OP_TYPEPOSQUERY:
   3494     possessive = TRUE;
   3495     min = 0;
   3496     max = 1;
   3497     ecode++;
   3498     goto REPEATTYPE;
   3499 
   3500     case OP_TYPEPOSUPTO:
   3501     possessive = TRUE;
   3502     min = 0;
   3503     max = GET2(ecode, 1);
   3504     ecode += 3;
   3505     goto REPEATTYPE;
   3506 
   3507     case OP_TYPESTAR:
   3508     case OP_TYPEMINSTAR:
   3509     case OP_TYPEPLUS:
   3510     case OP_TYPEMINPLUS:
   3511     case OP_TYPEQUERY:
   3512     case OP_TYPEMINQUERY:
   3513     c = *ecode++ - OP_TYPESTAR;
   3514     minimize = (c & 1) != 0;
   3515     min = rep_min[c];                 /* Pick up values from tables; */
   3516     max = rep_max[c];                 /* zero for max => infinity */
   3517     if (max == 0) max = INT_MAX;
   3518 
   3519     /* Common code for all repeated single character type matches. Note that
   3520     in UTF-8 mode, '.' matches a character of any length, but for the other
   3521     character types, the valid characters are all one-byte long. */
   3522 
   3523     REPEATTYPE:
   3524     ctype = *ecode++;      /* Code for the character type */
   3525 
   3526 #ifdef SUPPORT_UCP
   3527     if (ctype == OP_PROP || ctype == OP_NOTPROP)
   3528       {
   3529       prop_fail_result = ctype == OP_NOTPROP;
   3530       prop_type = *ecode++;
   3531       prop_value = *ecode++;
   3532       }
   3533     else prop_type = -1;
   3534 #endif
   3535 
   3536     /* First, ensure the minimum number of matches are present. Use inline
   3537     code for maximizing the speed, and do the type test once at the start
   3538     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
   3539     is tidier. Also separate the UCP code, which can be the same for both UTF-8
   3540     and single-bytes. */
   3541 
   3542     if (min > 0)
   3543       {
   3544 #ifdef SUPPORT_UCP
   3545       if (prop_type >= 0)
   3546         {
   3547         switch(prop_type)
   3548           {
   3549           case PT_ANY:
   3550           if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
   3551           for (i = 1; i <= min; i++)
   3552             {
   3553             if (eptr >= md->end_subject)
   3554               {
   3555               SCHECK_PARTIAL();
   3556               MRRETURN(MATCH_NOMATCH);
   3557               }
   3558             GETCHARINCTEST(c, eptr);
   3559             }
   3560           break;
   3561 
   3562           case PT_LAMP:
   3563           for (i = 1; i <= min; i++)
   3564             {
   3565             if (eptr >= md->end_subject)
   3566               {
   3567               SCHECK_PARTIAL();
   3568               MRRETURN(MATCH_NOMATCH);
   3569               }
   3570             GETCHARINCTEST(c, eptr);
   3571             prop_chartype = UCD_CHARTYPE(c);
   3572             if ((prop_chartype == ucp_Lu ||
   3573                  prop_chartype == ucp_Ll ||
   3574                  prop_chartype == ucp_Lt) == prop_fail_result)
   3575               MRRETURN(MATCH_NOMATCH);
   3576             }
   3577           break;
   3578 
   3579           case PT_GC:
   3580           for (i = 1; i <= min; i++)
   3581             {
   3582             if (eptr >= md->end_subject)
   3583               {
   3584               SCHECK_PARTIAL();
   3585               MRRETURN(MATCH_NOMATCH);
   3586               }
   3587             GETCHARINCTEST(c, eptr);
   3588             prop_category = UCD_CATEGORY(c);
   3589             if ((prop_category == prop_value) == prop_fail_result)
   3590               MRRETURN(MATCH_NOMATCH);
   3591             }
   3592           break;
   3593 
   3594           case PT_PC:
   3595           for (i = 1; i <= min; i++)
   3596             {
   3597             if (eptr >= md->end_subject)
   3598               {
   3599               SCHECK_PARTIAL();
   3600               MRRETURN(MATCH_NOMATCH);
   3601               }
   3602             GETCHARINCTEST(c, eptr);
   3603             prop_chartype = UCD_CHARTYPE(c);
   3604             if ((prop_chartype == prop_value) == prop_fail_result)
   3605               MRRETURN(MATCH_NOMATCH);
   3606             }
   3607           break;
   3608 
   3609           case PT_SC:
   3610           for (i = 1; i <= min; i++)
   3611             {
   3612             if (eptr >= md->end_subject)
   3613               {
   3614               SCHECK_PARTIAL();
   3615               MRRETURN(MATCH_NOMATCH);
   3616               }
   3617             GETCHARINCTEST(c, eptr);
   3618             prop_script = UCD_SCRIPT(c);
   3619             if ((prop_script == prop_value) == prop_fail_result)
   3620               MRRETURN(MATCH_NOMATCH);
   3621             }
   3622           break;
   3623 
   3624           case PT_ALNUM:
   3625           for (i = 1; i <= min; i++)
   3626             {
   3627             if (eptr >= md->end_subject)
   3628               {
   3629               SCHECK_PARTIAL();
   3630               MRRETURN(MATCH_NOMATCH);
   3631               }
   3632             GETCHARINCTEST(c, eptr);
   3633             prop_category = UCD_CATEGORY(c);
   3634             if ((prop_category == ucp_L || prop_category == ucp_N)
   3635                    == prop_fail_result)
   3636               MRRETURN(MATCH_NOMATCH);
   3637             }
   3638           break;
   3639 
   3640           case PT_SPACE:    /* Perl space */
   3641           for (i = 1; i <= min; i++)
   3642             {
   3643             if (eptr >= md->end_subject)
   3644               {
   3645               SCHECK_PARTIAL();
   3646               MRRETURN(MATCH_NOMATCH);
   3647               }
   3648             GETCHARINCTEST(c, eptr);
   3649             prop_category = UCD_CATEGORY(c);
   3650             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
   3651                  c == CHAR_FF || c == CHAR_CR)
   3652                    == prop_fail_result)
   3653               MRRETURN(MATCH_NOMATCH);
   3654             }
   3655           break;
   3656 
   3657           case PT_PXSPACE:  /* POSIX space */
   3658           for (i = 1; i <= min; i++)
   3659             {
   3660             if (eptr >= md->end_subject)
   3661               {
   3662               SCHECK_PARTIAL();
   3663               MRRETURN(MATCH_NOMATCH);
   3664               }
   3665             GETCHARINCTEST(c, eptr);
   3666             prop_category = UCD_CATEGORY(c);
   3667             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
   3668                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
   3669                    == prop_fail_result)
   3670               MRRETURN(MATCH_NOMATCH);
   3671             }
   3672           break;
   3673 
   3674           case PT_WORD:
   3675           for (i = 1; i <= min; i++)
   3676             {
   3677             if (eptr >= md->end_subject)
   3678               {
   3679               SCHECK_PARTIAL();
   3680               MRRETURN(MATCH_NOMATCH);
   3681               }
   3682             GETCHARINCTEST(c, eptr);
   3683             prop_category = UCD_CATEGORY(c);
   3684             if ((prop_category == ucp_L || prop_category == ucp_N ||
   3685                  c == CHAR_UNDERSCORE)
   3686                    == prop_fail_result)
   3687               MRRETURN(MATCH_NOMATCH);
   3688             }
   3689           break;
   3690 
   3691           /* This should not occur */
   3692 
   3693           default:
   3694           RRETURN(PCRE_ERROR_INTERNAL);
   3695           }
   3696         }
   3697 
   3698       /* Match extended Unicode sequences. We will get here only if the
   3699       support is in the binary; otherwise a compile-time error occurs. */
   3700 
   3701       else if (ctype == OP_EXTUNI)
   3702         {
   3703         for (i = 1; i <= min; i++)
   3704           {
   3705           if (eptr >= md->end_subject)
   3706             {
   3707             SCHECK_PARTIAL();
   3708             MRRETURN(MATCH_NOMATCH);
   3709             }
   3710           GETCHARINCTEST(c, eptr);
   3711           prop_category = UCD_CATEGORY(c);
   3712           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
   3713           while (eptr < md->end_subject)
   3714             {
   3715             int len = 1;
   3716             if (!utf8) c = *eptr;
   3717               else { GETCHARLEN(c, eptr, len); }
   3718             prop_category = UCD_CATEGORY(c);
   3719             if (prop_category != ucp_M) break;
   3720             eptr += len;
   3721             }
   3722           }
   3723         }
   3724 
   3725       else
   3726 #endif     /* SUPPORT_UCP */
   3727 
   3728 /* Handle all other cases when the coding is UTF-8 */
   3729 
   3730 #ifdef SUPPORT_UTF8
   3731       if (utf8) switch(ctype)
   3732         {
   3733         case OP_ANY:
   3734         for (i = 1; i <= min; i++)
   3735           {
   3736           if (eptr >= md->end_subject)
   3737             {
   3738             SCHECK_PARTIAL();
   3739             MRRETURN(MATCH_NOMATCH);
   3740             }
   3741           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
   3742           eptr++;
   3743           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
   3744           }
   3745         break;
   3746 
   3747         case OP_ALLANY:
   3748         for (i = 1; i <= min; i++)
   3749           {
   3750           if (eptr >= md->end_subject)
   3751             {
   3752             SCHECK_PARTIAL();
   3753             MRRETURN(MATCH_NOMATCH);
   3754             }
   3755           eptr++;
   3756           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
   3757           }
   3758         break;
   3759 
   3760         case OP_ANYBYTE:
   3761         if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
   3762         eptr += min;
   3763         break;
   3764 
   3765         case OP_ANYNL:
   3766         for (i = 1; i <= min; i++)
   3767           {
   3768           if (eptr >= md->end_subject)
   3769             {
   3770             SCHECK_PARTIAL();
   3771             MRRETURN(MATCH_NOMATCH);
   3772             }
   3773           GETCHARINC(c, eptr);
   3774           switch(c)
   3775             {
   3776             default: MRRETURN(MATCH_NOMATCH);
   3777             case 0x000d:
   3778             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
   3779             break;
   3780 
   3781             case 0x000a:
   3782             break;
   3783 
   3784             case 0x000b:
   3785             case 0x000c:
   3786             case 0x0085:
   3787             case 0x2028:
   3788             case 0x2029:
   3789             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
   3790             break;
   3791             }
   3792           }
   3793         break;
   3794 
   3795         case OP_NOT_HSPACE:
   3796         for (i = 1; i <= min; i++)
   3797           {
   3798           if (eptr >= md->end_subject)
   3799             {
   3800             SCHECK_PARTIAL();
   3801             MRRETURN(MATCH_NOMATCH);
   3802             }
   3803           GETCHARINC(c, eptr);
   3804           switch(c)
   3805             {
   3806             default: break;
   3807             case 0x09:      /* HT */
   3808             case 0x20:      /* SPACE */
   3809             case 0xa0:      /* NBSP */
   3810             case 0x1680:    /* OGHAM SPACE MARK */
   3811             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   3812             case 0x2000:    /* EN QUAD */
   3813             case 0x2001:    /* EM QUAD */
   3814             case 0x2002:    /* EN SPACE */
   3815             case 0x2003:    /* EM SPACE */
   3816             case 0x2004:    /* THREE-PER-EM SPACE */
   3817             case 0x2005:    /* FOUR-PER-EM SPACE */
   3818             case 0x2006:    /* SIX-PER-EM SPACE */
   3819             case 0x2007:    /* FIGURE SPACE */
   3820             case 0x2008:    /* PUNCTUATION SPACE */
   3821             case 0x2009:    /* THIN SPACE */
   3822             case 0x200A:    /* HAIR SPACE */
   3823             case 0x202f:    /* NARROW NO-BREAK SPACE */
   3824             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   3825             case 0x3000:    /* IDEOGRAPHIC SPACE */
   3826             MRRETURN(MATCH_NOMATCH);
   3827             }
   3828           }
   3829         break;
   3830 
   3831         case OP_HSPACE:
   3832         for (i = 1; i <= min; i++)
   3833           {
   3834           if (eptr >= md->end_subject)
   3835             {
   3836             SCHECK_PARTIAL();
   3837             MRRETURN(MATCH_NOMATCH);
   3838             }
   3839           GETCHARINC(c, eptr);
   3840           switch(c)
   3841             {
   3842             default: MRRETURN(MATCH_NOMATCH);
   3843             case 0x09:      /* HT */
   3844             case 0x20:      /* SPACE */
   3845             case 0xa0:      /* NBSP */
   3846             case 0x1680:    /* OGHAM SPACE MARK */
   3847             case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   3848             case 0x2000:    /* EN QUAD */
   3849             case 0x2001:    /* EM QUAD */
   3850             case 0x2002:    /* EN SPACE */
   3851             case 0x2003:    /* EM SPACE */
   3852             case 0x2004:    /* THREE-PER-EM SPACE */
   3853             case 0x2005:    /* FOUR-PER-EM SPACE */
   3854             case 0x2006:    /* SIX-PER-EM SPACE */
   3855             case 0x2007:    /* FIGURE SPACE */
   3856             case 0x2008:    /* PUNCTUATION SPACE */
   3857             case 0x2009:    /* THIN SPACE */
   3858             case 0x200A:    /* HAIR SPACE */
   3859             case 0x202f:    /* NARROW NO-BREAK SPACE */
   3860             case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   3861             case 0x3000:    /* IDEOGRAPHIC SPACE */
   3862             break;
   3863             }
   3864           }
   3865         break;
   3866 
   3867         case OP_NOT_VSPACE:
   3868         for (i = 1; i <= min; i++)
   3869           {
   3870           if (eptr >= md->end_subject)
   3871             {
   3872             SCHECK_PARTIAL();
   3873             MRRETURN(MATCH_NOMATCH);
   3874             }
   3875           GETCHARINC(c, eptr);
   3876           switch(c)
   3877             {
   3878             default: break;
   3879             case 0x0a:      /* LF */
   3880             case 0x0b:      /* VT */
   3881             case 0x0c:      /* FF */
   3882             case 0x0d:      /* CR */
   3883             case 0x85:      /* NEL */
   3884             case 0x2028:    /* LINE SEPARATOR */
   3885             case 0x2029:    /* PARAGRAPH SEPARATOR */
   3886             MRRETURN(MATCH_NOMATCH);
   3887             }
   3888           }
   3889         break;
   3890 
   3891         case OP_VSPACE:
   3892         for (i = 1; i <= min; i++)
   3893           {
   3894           if (eptr >= md->end_subject)
   3895             {
   3896             SCHECK_PARTIAL();
   3897             MRRETURN(MATCH_NOMATCH);
   3898             }
   3899           GETCHARINC(c, eptr);
   3900           switch(c)
   3901             {
   3902             default: MRRETURN(MATCH_NOMATCH);
   3903             case 0x0a:      /* LF */
   3904             case 0x0b:      /* VT */
   3905             case 0x0c:      /* FF */
   3906             case 0x0d:      /* CR */
   3907             case 0x85:      /* NEL */
   3908             case 0x2028:    /* LINE SEPARATOR */
   3909             case 0x2029:    /* PARAGRAPH SEPARATOR */
   3910             break;
   3911             }
   3912           }
   3913         break;
   3914 
   3915         case OP_NOT_DIGIT:
   3916         for (i = 1; i <= min; i++)
   3917           {
   3918           if (eptr >= md->end_subject)
   3919             {
   3920             SCHECK_PARTIAL();
   3921             MRRETURN(MATCH_NOMATCH);
   3922             }
   3923           GETCHARINC(c, eptr);
   3924           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
   3925             MRRETURN(MATCH_NOMATCH);
   3926           }
   3927         break;
   3928 
   3929         case OP_DIGIT:
   3930         for (i = 1; i <= min; i++)
   3931           {
   3932           if (eptr >= md->end_subject)
   3933             {
   3934             SCHECK_PARTIAL();
   3935             MRRETURN(MATCH_NOMATCH);
   3936             }
   3937           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
   3938             MRRETURN(MATCH_NOMATCH);
   3939           /* No need to skip more bytes - we know it's a 1-byte character */
   3940           }
   3941         break;
   3942 
   3943         case OP_NOT_WHITESPACE:
   3944         for (i = 1; i <= min; i++)
   3945           {
   3946           if (eptr >= md->end_subject)
   3947             {
   3948             SCHECK_PARTIAL();
   3949             MRRETURN(MATCH_NOMATCH);
   3950             }
   3951           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
   3952             MRRETURN(MATCH_NOMATCH);
   3953           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
   3954           }
   3955         break;
   3956 
   3957         case OP_WHITESPACE:
   3958         for (i = 1; i <= min; i++)
   3959           {
   3960           if (eptr >= md->end_subject)
   3961             {
   3962             SCHECK_PARTIAL();
   3963             MRRETURN(MATCH_NOMATCH);
   3964             }
   3965           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
   3966             MRRETURN(MATCH_NOMATCH);
   3967           /* No need to skip more bytes - we know it's a 1-byte character */
   3968           }
   3969         break;
   3970 
   3971         case OP_NOT_WORDCHAR:
   3972         for (i = 1; i <= min; i++)
   3973           {
   3974           if (eptr >= md->end_subject)
   3975             {
   3976             SCHECK_PARTIAL();
   3977             MRRETURN(MATCH_NOMATCH);
   3978             }
   3979           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
   3980             MRRETURN(MATCH_NOMATCH);
   3981           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
   3982           }
   3983         break;
   3984 
   3985         case OP_WORDCHAR:
   3986         for (i = 1; i <= min; i++)
   3987           {
   3988           if (eptr >= md->end_subject)
   3989             {
   3990             SCHECK_PARTIAL();
   3991             MRRETURN(MATCH_NOMATCH);
   3992             }
   3993           if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
   3994             MRRETURN(MATCH_NOMATCH);
   3995           /* No need to skip more bytes - we know it's a 1-byte character */
   3996           }
   3997         break;
   3998 
   3999         default:
   4000         RRETURN(PCRE_ERROR_INTERNAL);
   4001         }  /* End switch(ctype) */
   4002 
   4003       else
   4004 #endif     /* SUPPORT_UTF8 */
   4005 
   4006       /* Code for the non-UTF-8 case for minimum matching of operators other
   4007       than OP_PROP and OP_NOTPROP. */
   4008 
   4009       switch(ctype)
   4010         {
   4011         case OP_ANY:
   4012         for (i = 1; i <= min; i++)
   4013           {
   4014           if (eptr >= md->end_subject)
   4015             {
   4016             SCHECK_PARTIAL();
   4017             MRRETURN(MATCH_NOMATCH);
   4018             }
   4019           if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
   4020           eptr++;
   4021           }
   4022         break;
   4023 
   4024         case OP_ALLANY:
   4025         if (eptr > md->end_subject - min)
   4026           {
   4027           SCHECK_PARTIAL();
   4028           MRRETURN(MATCH_NOMATCH);
   4029           }
   4030         eptr += min;
   4031         break;
   4032 
   4033         case OP_ANYBYTE:
   4034         if (eptr > md->end_subject - min)
   4035           {
   4036           SCHECK_PARTIAL();
   4037           MRRETURN(MATCH_NOMATCH);
   4038           }
   4039         eptr += min;
   4040         break;
   4041 
   4042         case OP_ANYNL:
   4043         for (i = 1; i <= min; i++)
   4044           {
   4045           if (eptr >= md->end_subject)
   4046             {
   4047             SCHECK_PARTIAL();
   4048             MRRETURN(MATCH_NOMATCH);
   4049             }
   4050           switch(*eptr++)
   4051             {
   4052             default: MRRETURN(MATCH_NOMATCH);
   4053             case 0x000d:
   4054             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
   4055             break;
   4056             case 0x000a:
   4057             break;
   4058 
   4059             case 0x000b:
   4060             case 0x000c:
   4061             case 0x0085:
   4062             if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
   4063             break;
   4064             }
   4065           }
   4066         break;
   4067 
   4068         case OP_NOT_HSPACE:
   4069         for (i = 1; i <= min; i++)
   4070           {
   4071           if (eptr >= md->end_subject)
   4072             {
   4073             SCHECK_PARTIAL();
   4074             MRRETURN(MATCH_NOMATCH);
   4075             }
   4076           switch(*eptr++)
   4077             {
   4078             default: break;
   4079             case 0x09:      /* HT */
   4080             case 0x20:      /* SPACE */
   4081             case 0xa0:      /* NBSP */
   4082             MRRETURN(MATCH_NOMATCH);
   4083             }
   4084           }
   4085         break;
   4086 
   4087         case OP_HSPACE:
   4088         for (i = 1; i <= min; i++)
   4089           {
   4090           if (eptr >= md->end_subject)
   4091             {
   4092             SCHECK_PARTIAL();
   4093             MRRETURN(MATCH_NOMATCH);
   4094             }
   4095           switch(*eptr++)
   4096             {
   4097             default: MRRETURN(MATCH_NOMATCH);
   4098             case 0x09:      /* HT */
   4099             case 0x20:      /* SPACE */
   4100             case 0xa0:      /* NBSP */
   4101             break;
   4102             }
   4103           }
   4104         break;
   4105 
   4106         case OP_NOT_VSPACE:
   4107         for (i = 1; i <= min; i++)
   4108           {
   4109           if (eptr >= md->end_subject)
   4110             {
   4111             SCHECK_PARTIAL();
   4112             MRRETURN(MATCH_NOMATCH);
   4113             }
   4114           switch(*eptr++)
   4115             {
   4116             default: break;
   4117             case 0x0a:      /* LF */
   4118             case 0x0b:      /* VT */
   4119             case 0x0c:      /* FF */
   4120             case 0x0d:      /* CR */
   4121             case 0x85:      /* NEL */
   4122             MRRETURN(MATCH_NOMATCH);
   4123             }
   4124           }
   4125         break;
   4126 
   4127         case OP_VSPACE:
   4128         for (i = 1; i <= min; i++)
   4129           {
   4130           if (eptr >= md->end_subject)
   4131             {
   4132             SCHECK_PARTIAL();
   4133             MRRETURN(MATCH_NOMATCH);
   4134             }
   4135           switch(*eptr++)
   4136             {
   4137             default: MRRETURN(MATCH_NOMATCH);
   4138             case 0x0a:      /* LF */
   4139             case 0x0b:      /* VT */
   4140             case 0x0c:      /* FF */
   4141             case 0x0d:      /* CR */
   4142             case 0x85:      /* NEL */
   4143             break;
   4144             }
   4145           }
   4146         break;
   4147 
   4148         case OP_NOT_DIGIT:
   4149         for (i = 1; i <= min; i++)
   4150           {
   4151           if (eptr >= md->end_subject)
   4152             {
   4153             SCHECK_PARTIAL();
   4154             MRRETURN(MATCH_NOMATCH);
   4155             }
   4156           if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
   4157           }
   4158         break;
   4159 
   4160         case OP_DIGIT:
   4161         for (i = 1; i <= min; i++)
   4162           {
   4163           if (eptr >= md->end_subject)
   4164             {
   4165             SCHECK_PARTIAL();
   4166             MRRETURN(MATCH_NOMATCH);
   4167             }
   4168           if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
   4169           }
   4170         break;
   4171 
   4172         case OP_NOT_WHITESPACE:
   4173         for (i = 1; i <= min; i++)
   4174           {
   4175           if (eptr >= md->end_subject)
   4176             {
   4177             SCHECK_PARTIAL();
   4178             MRRETURN(MATCH_NOMATCH);
   4179             }
   4180           if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
   4181           }
   4182         break;
   4183 
   4184         case OP_WHITESPACE:
   4185         for (i = 1; i <= min; i++)
   4186           {
   4187           if (eptr >= md->end_subject)
   4188             {
   4189             SCHECK_PARTIAL();
   4190             MRRETURN(MATCH_NOMATCH);
   4191             }
   4192           if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
   4193           }
   4194         break;
   4195 
   4196         case OP_NOT_WORDCHAR:
   4197         for (i = 1; i <= min; i++)
   4198           {
   4199           if (eptr >= md->end_subject)
   4200             {
   4201             SCHECK_PARTIAL();
   4202             MRRETURN(MATCH_NOMATCH);
   4203             }
   4204           if ((md->ctypes[*eptr++] & ctype_word) != 0)
   4205             MRRETURN(MATCH_NOMATCH);
   4206           }
   4207         break;
   4208 
   4209         case OP_WORDCHAR:
   4210         for (i = 1; i <= min; i++)
   4211           {
   4212           if (eptr >= md->end_subject)
   4213             {
   4214             SCHECK_PARTIAL();
   4215             MRRETURN(MATCH_NOMATCH);
   4216             }
   4217           if ((md->ctypes[*eptr++] & ctype_word) == 0)
   4218             MRRETURN(MATCH_NOMATCH);
   4219           }
   4220         break;
   4221 
   4222         default:
   4223         RRETURN(PCRE_ERROR_INTERNAL);
   4224         }
   4225       }
   4226 
   4227     /* If min = max, continue at the same level without recursing */
   4228 
   4229     if (min == max) continue;
   4230 
   4231     /* If minimizing, we have to test the rest of the pattern before each
   4232     subsequent match. Again, separate the UTF-8 case for speed, and also
   4233     separate the UCP cases. */
   4234 
   4235     if (minimize)
   4236       {
   4237 #ifdef SUPPORT_UCP
   4238       if (prop_type >= 0)
   4239         {
   4240         switch(prop_type)
   4241           {
   4242           case PT_ANY:
   4243           for (fi = min;; fi++)
   4244             {
   4245             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
   4246             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4247             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4248             if (eptr >= md->end_subject)
   4249               {
   4250               SCHECK_PARTIAL();
   4251               MRRETURN(MATCH_NOMATCH);
   4252               }
   4253             GETCHARINCTEST(c, eptr);
   4254             if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
   4255             }
   4256           /* Control never gets here */
   4257 
   4258           case PT_LAMP:
   4259           for (fi = min;; fi++)
   4260             {
   4261             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
   4262             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4263             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4264             if (eptr >= md->end_subject)
   4265               {
   4266               SCHECK_PARTIAL();
   4267               MRRETURN(MATCH_NOMATCH);
   4268               }
   4269             GETCHARINCTEST(c, eptr);
   4270             prop_chartype = UCD_CHARTYPE(c);
   4271             if ((prop_chartype == ucp_Lu ||
   4272                  prop_chartype == ucp_Ll ||
   4273                  prop_chartype == ucp_Lt) == prop_fail_result)
   4274               MRRETURN(MATCH_NOMATCH);
   4275             }
   4276           /* Control never gets here */
   4277 
   4278           case PT_GC:
   4279           for (fi = min;; fi++)
   4280             {
   4281             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
   4282             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4283             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4284             if (eptr >= md->end_subject)
   4285               {
   4286               SCHECK_PARTIAL();
   4287               MRRETURN(MATCH_NOMATCH);
   4288               }
   4289             GETCHARINCTEST(c, eptr);
   4290             prop_category = UCD_CATEGORY(c);
   4291             if ((prop_category == prop_value) == prop_fail_result)
   4292               MRRETURN(MATCH_NOMATCH);
   4293             }
   4294           /* Control never gets here */
   4295 
   4296           case PT_PC:
   4297           for (fi = min;; fi++)
   4298             {
   4299             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
   4300             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4301             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4302             if (eptr >= md->end_subject)
   4303               {
   4304               SCHECK_PARTIAL();
   4305               MRRETURN(MATCH_NOMATCH);
   4306               }
   4307             GETCHARINCTEST(c, eptr);
   4308             prop_chartype = UCD_CHARTYPE(c);
   4309             if ((prop_chartype == prop_value) == prop_fail_result)
   4310               MRRETURN(MATCH_NOMATCH);
   4311             }
   4312           /* Control never gets here */
   4313 
   4314           case PT_SC:
   4315           for (fi = min;; fi++)
   4316             {
   4317             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
   4318             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4319             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4320             if (eptr >= md->end_subject)
   4321               {
   4322               SCHECK_PARTIAL();
   4323               MRRETURN(MATCH_NOMATCH);
   4324               }
   4325             GETCHARINCTEST(c, eptr);
   4326             prop_script = UCD_SCRIPT(c);
   4327             if ((prop_script == prop_value) == prop_fail_result)
   4328               MRRETURN(MATCH_NOMATCH);
   4329             }
   4330           /* Control never gets here */
   4331 
   4332           case PT_ALNUM:
   4333           for (fi = min;; fi++)
   4334             {
   4335             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
   4336             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4337             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4338             if (eptr >= md->end_subject)
   4339               {
   4340               SCHECK_PARTIAL();
   4341               MRRETURN(MATCH_NOMATCH);
   4342               }
   4343             GETCHARINCTEST(c, eptr);
   4344             prop_category = UCD_CATEGORY(c);
   4345             if ((prop_category == ucp_L || prop_category == ucp_N)
   4346                    == prop_fail_result)
   4347               MRRETURN(MATCH_NOMATCH);
   4348             }
   4349           /* Control never gets here */
   4350 
   4351           case PT_SPACE:    /* Perl space */
   4352           for (fi = min;; fi++)
   4353             {
   4354             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
   4355             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4356             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4357             if (eptr >= md->end_subject)
   4358               {
   4359               SCHECK_PARTIAL();
   4360               MRRETURN(MATCH_NOMATCH);
   4361               }
   4362             GETCHARINCTEST(c, eptr);
   4363             prop_category = UCD_CATEGORY(c);
   4364             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
   4365                  c == CHAR_FF || c == CHAR_CR)
   4366                    == prop_fail_result)
   4367               MRRETURN(MATCH_NOMATCH);
   4368             }
   4369           /* Control never gets here */
   4370 
   4371           case PT_PXSPACE:  /* POSIX space */
   4372           for (fi = min;; fi++)
   4373             {
   4374             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
   4375             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4376             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4377             if (eptr >= md->end_subject)
   4378               {
   4379               SCHECK_PARTIAL();
   4380               MRRETURN(MATCH_NOMATCH);
   4381               }
   4382             GETCHARINCTEST(c, eptr);
   4383             prop_category = UCD_CATEGORY(c);
   4384             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
   4385                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
   4386                    == prop_fail_result)
   4387               MRRETURN(MATCH_NOMATCH);
   4388             }
   4389           /* Control never gets here */
   4390 
   4391           case PT_WORD:
   4392           for (fi = min;; fi++)
   4393             {
   4394             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
   4395             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4396             if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4397             if (eptr >= md->end_subject)
   4398               {
   4399               SCHECK_PARTIAL();
   4400               MRRETURN(MATCH_NOMATCH);
   4401               }
   4402             GETCHARINCTEST(c, eptr);
   4403             prop_category = UCD_CATEGORY(c);
   4404             if ((prop_category == ucp_L ||
   4405                  prop_category == ucp_N ||
   4406                  c == CHAR_UNDERSCORE)
   4407                    == prop_fail_result)
   4408               MRRETURN(MATCH_NOMATCH);
   4409             }
   4410           /* Control never gets here */
   4411 
   4412           /* This should never occur */
   4413 
   4414           default:
   4415           RRETURN(PCRE_ERROR_INTERNAL);
   4416           }
   4417         }
   4418 
   4419       /* Match extended Unicode sequences. We will get here only if the
   4420       support is in the binary; otherwise a compile-time error occurs. */
   4421 
   4422       else if (ctype == OP_EXTUNI)
   4423         {
   4424         for (fi = min;; fi++)
   4425           {
   4426           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
   4427           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4428           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4429           if (eptr >= md->end_subject)
   4430             {
   4431             SCHECK_PARTIAL();
   4432             MRRETURN(MATCH_NOMATCH);
   4433             }
   4434           GETCHARINCTEST(c, eptr);
   4435           prop_category = UCD_CATEGORY(c);
   4436           if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
   4437           while (eptr < md->end_subject)
   4438             {
   4439             int len = 1;
   4440             if (!utf8) c = *eptr;
   4441               else { GETCHARLEN(c, eptr, len); }
   4442             prop_category = UCD_CATEGORY(c);
   4443             if (prop_category != ucp_M) break;
   4444             eptr += len;
   4445             }
   4446           }
   4447         }
   4448 
   4449       else
   4450 #endif     /* SUPPORT_UCP */
   4451 
   4452 #ifdef SUPPORT_UTF8
   4453       /* UTF-8 mode */
   4454       if (utf8)
   4455         {
   4456         for (fi = min;; fi++)
   4457           {
   4458           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
   4459           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4460           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4461           if (eptr >= md->end_subject)
   4462             {
   4463             SCHECK_PARTIAL();
   4464             MRRETURN(MATCH_NOMATCH);
   4465             }
   4466           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   4467             MRRETURN(MATCH_NOMATCH);
   4468           GETCHARINC(c, eptr);
   4469           switch(ctype)
   4470             {
   4471             case OP_ANY:        /* This is the non-NL case */
   4472             case OP_ALLANY:
   4473             case OP_ANYBYTE:
   4474             break;
   4475 
   4476             case OP_ANYNL:
   4477             switch(c)
   4478               {
   4479               default: MRRETURN(MATCH_NOMATCH);
   4480               case 0x000d:
   4481               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
   4482               break;
   4483               case 0x000a:
   4484               break;
   4485 
   4486               case 0x000b:
   4487               case 0x000c:
   4488               case 0x0085:
   4489               case 0x2028:
   4490               case 0x2029:
   4491               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
   4492               break;
   4493               }
   4494             break;
   4495 
   4496             case OP_NOT_HSPACE:
   4497             switch(c)
   4498               {
   4499               default: break;
   4500               case 0x09:      /* HT */
   4501               case 0x20:      /* SPACE */
   4502               case 0xa0:      /* NBSP */
   4503               case 0x1680:    /* OGHAM SPACE MARK */
   4504               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   4505               case 0x2000:    /* EN QUAD */
   4506               case 0x2001:    /* EM QUAD */
   4507               case 0x2002:    /* EN SPACE */
   4508               case 0x2003:    /* EM SPACE */
   4509               case 0x2004:    /* THREE-PER-EM SPACE */
   4510               case 0x2005:    /* FOUR-PER-EM SPACE */
   4511               case 0x2006:    /* SIX-PER-EM SPACE */
   4512               case 0x2007:    /* FIGURE SPACE */
   4513               case 0x2008:    /* PUNCTUATION SPACE */
   4514               case 0x2009:    /* THIN SPACE */
   4515               case 0x200A:    /* HAIR SPACE */
   4516               case 0x202f:    /* NARROW NO-BREAK SPACE */
   4517               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   4518               case 0x3000:    /* IDEOGRAPHIC SPACE */
   4519               MRRETURN(MATCH_NOMATCH);
   4520               }
   4521             break;
   4522 
   4523             case OP_HSPACE:
   4524             switch(c)
   4525               {
   4526               default: MRRETURN(MATCH_NOMATCH);
   4527               case 0x09:      /* HT */
   4528               case 0x20:      /* SPACE */
   4529               case 0xa0:      /* NBSP */
   4530               case 0x1680:    /* OGHAM SPACE MARK */
   4531               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   4532               case 0x2000:    /* EN QUAD */
   4533               case 0x2001:    /* EM QUAD */
   4534               case 0x2002:    /* EN SPACE */
   4535               case 0x2003:    /* EM SPACE */
   4536               case 0x2004:    /* THREE-PER-EM SPACE */
   4537               case 0x2005:    /* FOUR-PER-EM SPACE */
   4538               case 0x2006:    /* SIX-PER-EM SPACE */
   4539               case 0x2007:    /* FIGURE SPACE */
   4540               case 0x2008:    /* PUNCTUATION SPACE */
   4541               case 0x2009:    /* THIN SPACE */
   4542               case 0x200A:    /* HAIR SPACE */
   4543               case 0x202f:    /* NARROW NO-BREAK SPACE */
   4544               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   4545               case 0x3000:    /* IDEOGRAPHIC SPACE */
   4546               break;
   4547               }
   4548             break;
   4549 
   4550             case OP_NOT_VSPACE:
   4551             switch(c)
   4552               {
   4553               default: break;
   4554               case 0x0a:      /* LF */
   4555               case 0x0b:      /* VT */
   4556               case 0x0c:      /* FF */
   4557               case 0x0d:      /* CR */
   4558               case 0x85:      /* NEL */
   4559               case 0x2028:    /* LINE SEPARATOR */
   4560               case 0x2029:    /* PARAGRAPH SEPARATOR */
   4561               MRRETURN(MATCH_NOMATCH);
   4562               }
   4563             break;
   4564 
   4565             case OP_VSPACE:
   4566             switch(c)
   4567               {
   4568               default: MRRETURN(MATCH_NOMATCH);
   4569               case 0x0a:      /* LF */
   4570               case 0x0b:      /* VT */
   4571               case 0x0c:      /* FF */
   4572               case 0x0d:      /* CR */
   4573               case 0x85:      /* NEL */
   4574               case 0x2028:    /* LINE SEPARATOR */
   4575               case 0x2029:    /* PARAGRAPH SEPARATOR */
   4576               break;
   4577               }
   4578             break;
   4579 
   4580             case OP_NOT_DIGIT:
   4581             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
   4582               MRRETURN(MATCH_NOMATCH);
   4583             break;
   4584 
   4585             case OP_DIGIT:
   4586             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
   4587               MRRETURN(MATCH_NOMATCH);
   4588             break;
   4589 
   4590             case OP_NOT_WHITESPACE:
   4591             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
   4592               MRRETURN(MATCH_NOMATCH);
   4593             break;
   4594 
   4595             case OP_WHITESPACE:
   4596             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
   4597               MRRETURN(MATCH_NOMATCH);
   4598             break;
   4599 
   4600             case OP_NOT_WORDCHAR:
   4601             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
   4602               MRRETURN(MATCH_NOMATCH);
   4603             break;
   4604 
   4605             case OP_WORDCHAR:
   4606             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
   4607               MRRETURN(MATCH_NOMATCH);
   4608             break;
   4609 
   4610             default:
   4611             RRETURN(PCRE_ERROR_INTERNAL);
   4612             }
   4613           }
   4614         }
   4615       else
   4616 #endif
   4617       /* Not UTF-8 mode */
   4618         {
   4619         for (fi = min;; fi++)
   4620           {
   4621           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
   4622           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4623           if (fi >= max) MRRETURN(MATCH_NOMATCH);
   4624           if (eptr >= md->end_subject)
   4625             {
   4626             SCHECK_PARTIAL();
   4627             MRRETURN(MATCH_NOMATCH);
   4628             }
   4629           if (ctype == OP_ANY && IS_NEWLINE(eptr))
   4630             MRRETURN(MATCH_NOMATCH);
   4631           c = *eptr++;
   4632           switch(ctype)
   4633             {
   4634             case OP_ANY:     /* This is the non-NL case */
   4635             case OP_ALLANY:
   4636             case OP_ANYBYTE:
   4637             break;
   4638 
   4639             case OP_ANYNL:
   4640             switch(c)
   4641               {
   4642               default: MRRETURN(MATCH_NOMATCH);
   4643               case 0x000d:
   4644               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
   4645               break;
   4646 
   4647               case 0x000a:
   4648               break;
   4649 
   4650               case 0x000b:
   4651               case 0x000c:
   4652               case 0x0085:
   4653               if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
   4654               break;
   4655               }
   4656             break;
   4657 
   4658             case OP_NOT_HSPACE:
   4659             switch(c)
   4660               {
   4661               default: break;
   4662               case 0x09:      /* HT */
   4663               case 0x20:      /* SPACE */
   4664               case 0xa0:      /* NBSP */
   4665               MRRETURN(MATCH_NOMATCH);
   4666               }
   4667             break;
   4668 
   4669             case OP_HSPACE:
   4670             switch(c)
   4671               {
   4672               default: MRRETURN(MATCH_NOMATCH);
   4673               case 0x09:      /* HT */
   4674               case 0x20:      /* SPACE */
   4675               case 0xa0:      /* NBSP */
   4676               break;
   4677               }
   4678             break;
   4679 
   4680             case OP_NOT_VSPACE:
   4681             switch(c)
   4682               {
   4683               default: break;
   4684               case 0x0a:      /* LF */
   4685               case 0x0b:      /* VT */
   4686               case 0x0c:      /* FF */
   4687               case 0x0d:      /* CR */
   4688               case 0x85:      /* NEL */
   4689               MRRETURN(MATCH_NOMATCH);
   4690               }
   4691             break;
   4692 
   4693             case OP_VSPACE:
   4694             switch(c)
   4695               {
   4696               default: MRRETURN(MATCH_NOMATCH);
   4697               case 0x0a:      /* LF */
   4698               case 0x0b:      /* VT */
   4699               case 0x0c:      /* FF */
   4700               case 0x0d:      /* CR */
   4701               case 0x85:      /* NEL */
   4702               break;
   4703               }
   4704             break;
   4705 
   4706             case OP_NOT_DIGIT:
   4707             if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
   4708             break;
   4709 
   4710             case OP_DIGIT:
   4711             if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
   4712             break;
   4713 
   4714             case OP_NOT_WHITESPACE:
   4715             if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
   4716             break;
   4717 
   4718             case OP_WHITESPACE:
   4719             if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
   4720             break;
   4721 
   4722             case OP_NOT_WORDCHAR:
   4723             if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
   4724             break;
   4725 
   4726             case OP_WORDCHAR:
   4727             if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
   4728             break;
   4729 
   4730             default:
   4731             RRETURN(PCRE_ERROR_INTERNAL);
   4732             }
   4733           }
   4734         }
   4735       /* Control never gets here */
   4736       }
   4737 
   4738     /* If maximizing, it is worth using inline code for speed, doing the type
   4739     test once at the start (i.e. keep it out of the loop). Again, keep the
   4740     UTF-8 and UCP stuff separate. */
   4741 
   4742     else
   4743       {
   4744       pp = eptr;  /* Remember where we started */
   4745 
   4746 #ifdef SUPPORT_UCP
   4747       if (prop_type >= 0)
   4748         {
   4749         switch(prop_type)
   4750           {
   4751           case PT_ANY:
   4752           for (i = min; i < max; i++)
   4753             {
   4754             int len = 1;
   4755             if (eptr >= md->end_subject)
   4756               {
   4757               SCHECK_PARTIAL();
   4758               break;
   4759               }
   4760             GETCHARLENTEST(c, eptr, len);
   4761             if (prop_fail_result) break;
   4762             eptr+= len;
   4763             }
   4764           break;
   4765 
   4766           case PT_LAMP:
   4767           for (i = min; i < max; i++)
   4768             {
   4769             int len = 1;
   4770             if (eptr >= md->end_subject)
   4771               {
   4772               SCHECK_PARTIAL();
   4773               break;
   4774               }
   4775             GETCHARLENTEST(c, eptr, len);
   4776             prop_chartype = UCD_CHARTYPE(c);
   4777             if ((prop_chartype == ucp_Lu ||
   4778                  prop_chartype == ucp_Ll ||
   4779                  prop_chartype == ucp_Lt) == prop_fail_result)
   4780               break;
   4781             eptr+= len;
   4782             }
   4783           break;
   4784 
   4785           case PT_GC:
   4786           for (i = min; i < max; i++)
   4787             {
   4788             int len = 1;
   4789             if (eptr >= md->end_subject)
   4790               {
   4791               SCHECK_PARTIAL();
   4792               break;
   4793               }
   4794             GETCHARLENTEST(c, eptr, len);
   4795             prop_category = UCD_CATEGORY(c);
   4796             if ((prop_category == prop_value) == prop_fail_result)
   4797               break;
   4798             eptr+= len;
   4799             }
   4800           break;
   4801 
   4802           case PT_PC:
   4803           for (i = min; i < max; i++)
   4804             {
   4805             int len = 1;
   4806             if (eptr >= md->end_subject)
   4807               {
   4808               SCHECK_PARTIAL();
   4809               break;
   4810               }
   4811             GETCHARLENTEST(c, eptr, len);
   4812             prop_chartype = UCD_CHARTYPE(c);
   4813             if ((prop_chartype == prop_value) == prop_fail_result)
   4814               break;
   4815             eptr+= len;
   4816             }
   4817           break;
   4818 
   4819           case PT_SC:
   4820           for (i = min; i < max; i++)
   4821             {
   4822             int len = 1;
   4823             if (eptr >= md->end_subject)
   4824               {
   4825               SCHECK_PARTIAL();
   4826               break;
   4827               }
   4828             GETCHARLENTEST(c, eptr, len);
   4829             prop_script = UCD_SCRIPT(c);
   4830             if ((prop_script == prop_value) == prop_fail_result)
   4831               break;
   4832             eptr+= len;
   4833             }
   4834           break;
   4835 
   4836           case PT_ALNUM:
   4837           for (i = min; i < max; i++)
   4838             {
   4839             int len = 1;
   4840             if (eptr >= md->end_subject)
   4841               {
   4842               SCHECK_PARTIAL();
   4843               break;
   4844               }
   4845             GETCHARLENTEST(c, eptr, len);
   4846             prop_category = UCD_CATEGORY(c);
   4847             if ((prop_category == ucp_L || prop_category == ucp_N)
   4848                  == prop_fail_result)
   4849               break;
   4850             eptr+= len;
   4851             }
   4852           break;
   4853 
   4854           case PT_SPACE:    /* Perl space */
   4855           for (i = min; i < max; i++)
   4856             {
   4857             int len = 1;
   4858             if (eptr >= md->end_subject)
   4859               {
   4860               SCHECK_PARTIAL();
   4861               break;
   4862               }
   4863             GETCHARLENTEST(c, eptr, len);
   4864             prop_category = UCD_CATEGORY(c);
   4865             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
   4866                  c == CHAR_FF || c == CHAR_CR)
   4867                  == prop_fail_result)
   4868               break;
   4869             eptr+= len;
   4870             }
   4871           break;
   4872 
   4873           case PT_PXSPACE:  /* POSIX space */
   4874           for (i = min; i < max; i++)
   4875             {
   4876             int len = 1;
   4877             if (eptr >= md->end_subject)
   4878               {
   4879               SCHECK_PARTIAL();
   4880               break;
   4881               }
   4882             GETCHARLENTEST(c, eptr, len);
   4883             prop_category = UCD_CATEGORY(c);
   4884             if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
   4885                  c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
   4886                  == prop_fail_result)
   4887               break;
   4888             eptr+= len;
   4889             }
   4890           break;
   4891 
   4892           case PT_WORD:
   4893           for (i = min; i < max; i++)
   4894             {
   4895             int len = 1;
   4896             if (eptr >= md->end_subject)
   4897               {
   4898               SCHECK_PARTIAL();
   4899               break;
   4900               }
   4901             GETCHARLENTEST(c, eptr, len);
   4902             prop_category = UCD_CATEGORY(c);
   4903             if ((prop_category == ucp_L || prop_category == ucp_N ||
   4904                  c == CHAR_UNDERSCORE) == prop_fail_result)
   4905               break;
   4906             eptr+= len;
   4907             }
   4908           break;
   4909 
   4910           default:
   4911           RRETURN(PCRE_ERROR_INTERNAL);
   4912           }
   4913 
   4914         /* eptr is now past the end of the maximum run */
   4915 
   4916         if (possessive) continue;
   4917         for(;;)
   4918           {
   4919           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
   4920           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4921           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   4922           if (utf8) BACKCHAR(eptr);
   4923           }
   4924         }
   4925 
   4926       /* Match extended Unicode sequences. We will get here only if the
   4927       support is in the binary; otherwise a compile-time error occurs. */
   4928 
   4929       else if (ctype == OP_EXTUNI)
   4930         {
   4931         for (i = min; i < max; i++)
   4932           {
   4933           if (eptr >= md->end_subject)
   4934             {
   4935             SCHECK_PARTIAL();
   4936             break;
   4937             }
   4938           GETCHARINCTEST(c, eptr);
   4939           prop_category = UCD_CATEGORY(c);
   4940           if (prop_category == ucp_M) break;
   4941           while (eptr < md->end_subject)
   4942             {
   4943             int len = 1;
   4944             if (!utf8) c = *eptr; else
   4945               {
   4946               GETCHARLEN(c, eptr, len);
   4947               }
   4948             prop_category = UCD_CATEGORY(c);
   4949             if (prop_category != ucp_M) break;
   4950             eptr += len;
   4951             }
   4952           }
   4953 
   4954         /* eptr is now past the end of the maximum run */
   4955 
   4956         if (possessive) continue;
   4957 
   4958         for(;;)
   4959           {
   4960           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
   4961           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   4962           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   4963           for (;;)                        /* Move back over one extended */
   4964             {
   4965             int len = 1;
   4966             if (!utf8) c = *eptr; else
   4967               {
   4968               BACKCHAR(eptr);
   4969               GETCHARLEN(c, eptr, len);
   4970               }
   4971             prop_category = UCD_CATEGORY(c);
   4972             if (prop_category != ucp_M) break;
   4973             eptr--;
   4974             }
   4975           }
   4976         }
   4977 
   4978       else
   4979 #endif   /* SUPPORT_UCP */
   4980 
   4981 #ifdef SUPPORT_UTF8
   4982       /* UTF-8 mode */
   4983 
   4984       if (utf8)
   4985         {
   4986         switch(ctype)
   4987           {
   4988           case OP_ANY:
   4989           if (max < INT_MAX)
   4990             {
   4991             for (i = min; i < max; i++)
   4992               {
   4993               if (eptr >= md->end_subject)
   4994                 {
   4995                 SCHECK_PARTIAL();
   4996                 break;
   4997                 }
   4998               if (IS_NEWLINE(eptr)) break;
   4999               eptr++;
   5000               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
   5001               }
   5002             }
   5003 
   5004           /* Handle unlimited UTF-8 repeat */
   5005 
   5006           else
   5007             {
   5008             for (i = min; i < max; i++)
   5009               {
   5010               if (eptr >= md->end_subject)
   5011                 {
   5012                 SCHECK_PARTIAL();
   5013                 break;
   5014                 }
   5015               if (IS_NEWLINE(eptr)) break;
   5016               eptr++;
   5017               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
   5018               }
   5019             }
   5020           break;
   5021 
   5022           case OP_ALLANY:
   5023           if (max < INT_MAX)
   5024             {
   5025             for (i = min; i < max; i++)
   5026               {
   5027               if (eptr >= md->end_subject)
   5028                 {
   5029                 SCHECK_PARTIAL();
   5030                 break;
   5031                 }
   5032               eptr++;
   5033               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
   5034               }
   5035             }
   5036           else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
   5037           break;
   5038 
   5039           /* The byte case is the same as non-UTF8 */
   5040 
   5041           case OP_ANYBYTE:
   5042           c = max - min;
   5043           if (c > (unsigned int)(md->end_subject - eptr))
   5044             {
   5045             eptr = md->end_subject;
   5046             SCHECK_PARTIAL();
   5047             }
   5048           else eptr += c;
   5049           break;
   5050 
   5051           case OP_ANYNL:
   5052           for (i = min; i < max; i++)
   5053             {
   5054             int len = 1;
   5055             if (eptr >= md->end_subject)
   5056               {
   5057               SCHECK_PARTIAL();
   5058               break;
   5059               }
   5060             GETCHARLEN(c, eptr, len);
   5061             if (c == 0x000d)
   5062               {
   5063               if (++eptr >= md->end_subject) break;
   5064               if (*eptr == 0x000a) eptr++;
   5065               }
   5066             else
   5067               {
   5068               if (c != 0x000a &&
   5069                   (md->bsr_anycrlf ||
   5070                    (c != 0x000b && c != 0x000c &&
   5071                     c != 0x0085 && c != 0x2028 && c != 0x2029)))
   5072                 break;
   5073               eptr += len;
   5074               }
   5075             }
   5076           break;
   5077 
   5078           case OP_NOT_HSPACE:
   5079           case OP_HSPACE:
   5080           for (i = min; i < max; i++)
   5081             {
   5082             BOOL gotspace;
   5083             int len = 1;
   5084             if (eptr >= md->end_subject)
   5085               {
   5086               SCHECK_PARTIAL();
   5087               break;
   5088               }
   5089             GETCHARLEN(c, eptr, len);
   5090             switch(c)
   5091               {
   5092               default: gotspace = FALSE; break;
   5093               case 0x09:      /* HT */
   5094               case 0x20:      /* SPACE */
   5095               case 0xa0:      /* NBSP */
   5096               case 0x1680:    /* OGHAM SPACE MARK */
   5097               case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
   5098               case 0x2000:    /* EN QUAD */
   5099               case 0x2001:    /* EM QUAD */
   5100               case 0x2002:    /* EN SPACE */
   5101               case 0x2003:    /* EM SPACE */
   5102               case 0x2004:    /* THREE-PER-EM SPACE */
   5103               case 0x2005:    /* FOUR-PER-EM SPACE */
   5104               case 0x2006:    /* SIX-PER-EM SPACE */
   5105               case 0x2007:    /* FIGURE SPACE */
   5106               case 0x2008:    /* PUNCTUATION SPACE */
   5107               case 0x2009:    /* THIN SPACE */
   5108               case 0x200A:    /* HAIR SPACE */
   5109               case 0x202f:    /* NARROW NO-BREAK SPACE */
   5110               case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
   5111               case 0x3000:    /* IDEOGRAPHIC SPACE */
   5112               gotspace = TRUE;
   5113               break;
   5114               }
   5115             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
   5116             eptr += len;
   5117             }
   5118           break;
   5119 
   5120           case OP_NOT_VSPACE:
   5121           case OP_VSPACE:
   5122           for (i = min; i < max; i++)
   5123             {
   5124             BOOL gotspace;
   5125             int len = 1;
   5126             if (eptr >= md->end_subject)
   5127               {
   5128               SCHECK_PARTIAL();
   5129               break;
   5130               }
   5131             GETCHARLEN(c, eptr, len);
   5132             switch(c)
   5133               {
   5134               default: gotspace = FALSE; break;
   5135               case 0x0a:      /* LF */
   5136               case 0x0b:      /* VT */
   5137               case 0x0c:      /* FF */
   5138               case 0x0d:      /* CR */
   5139               case 0x85:      /* NEL */
   5140               case 0x2028:    /* LINE SEPARATOR */
   5141               case 0x2029:    /* PARAGRAPH SEPARATOR */
   5142               gotspace = TRUE;
   5143               break;
   5144               }
   5145             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
   5146             eptr += len;
   5147             }
   5148           break;
   5149 
   5150           case OP_NOT_DIGIT:
   5151           for (i = min; i < max; i++)
   5152             {
   5153             int len = 1;
   5154             if (eptr >= md->end_subject)
   5155               {
   5156               SCHECK_PARTIAL();
   5157               break;
   5158               }
   5159             GETCHARLEN(c, eptr, len);
   5160             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
   5161             eptr+= len;
   5162             }
   5163           break;
   5164 
   5165           case OP_DIGIT:
   5166           for (i = min; i < max; i++)
   5167             {
   5168             int len = 1;
   5169             if (eptr >= md->end_subject)
   5170               {
   5171               SCHECK_PARTIAL();
   5172               break;
   5173               }
   5174             GETCHARLEN(c, eptr, len);
   5175             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
   5176             eptr+= len;
   5177             }
   5178           break;
   5179 
   5180           case OP_NOT_WHITESPACE:
   5181           for (i = min; i < max; i++)
   5182             {
   5183             int len = 1;
   5184             if (eptr >= md->end_subject)
   5185               {
   5186               SCHECK_PARTIAL();
   5187               break;
   5188               }
   5189             GETCHARLEN(c, eptr, len);
   5190             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
   5191             eptr+= len;
   5192             }
   5193           break;
   5194 
   5195           case OP_WHITESPACE:
   5196           for (i = min; i < max; i++)
   5197             {
   5198             int len = 1;
   5199             if (eptr >= md->end_subject)
   5200               {
   5201               SCHECK_PARTIAL();
   5202               break;
   5203               }
   5204             GETCHARLEN(c, eptr, len);
   5205             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
   5206             eptr+= len;
   5207             }
   5208           break;
   5209 
   5210           case OP_NOT_WORDCHAR:
   5211           for (i = min; i < max; i++)
   5212             {
   5213             int len = 1;
   5214             if (eptr >= md->end_subject)
   5215               {
   5216               SCHECK_PARTIAL();
   5217               break;
   5218               }
   5219             GETCHARLEN(c, eptr, len);
   5220             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
   5221             eptr+= len;
   5222             }
   5223           break;
   5224 
   5225           case OP_WORDCHAR:
   5226           for (i = min; i < max; i++)
   5227             {
   5228             int len = 1;
   5229             if (eptr >= md->end_subject)
   5230               {
   5231               SCHECK_PARTIAL();
   5232               break;
   5233               }
   5234             GETCHARLEN(c, eptr, len);
   5235             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
   5236             eptr+= len;
   5237             }
   5238           break;
   5239 
   5240           default:
   5241           RRETURN(PCRE_ERROR_INTERNAL);
   5242           }
   5243 
   5244         /* eptr is now past the end of the maximum run */
   5245 
   5246         if (possessive) continue;
   5247         for(;;)
   5248           {
   5249           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
   5250           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5251           if (eptr-- == pp) break;        /* Stop if tried at original pos */
   5252           BACKCHAR(eptr);
   5253           }
   5254         }
   5255       else
   5256 #endif  /* SUPPORT_UTF8 */
   5257 
   5258       /* Not UTF-8 mode */
   5259         {
   5260         switch(ctype)
   5261           {
   5262           case OP_ANY:
   5263           for (i = min; i < max; i++)
   5264             {
   5265             if (eptr >= md->end_subject)
   5266               {
   5267               SCHECK_PARTIAL();
   5268               break;
   5269               }
   5270             if (IS_NEWLINE(eptr)) break;
   5271             eptr++;
   5272             }
   5273           break;
   5274 
   5275           case OP_ALLANY:
   5276           case OP_ANYBYTE:
   5277           c = max - min;
   5278           if (c > (unsigned int)(md->end_subject - eptr))
   5279             {
   5280             eptr = md->end_subject;
   5281             SCHECK_PARTIAL();
   5282             }
   5283           else eptr += c;
   5284           break;
   5285 
   5286           case OP_ANYNL:
   5287           for (i = min; i < max; i++)
   5288             {
   5289             if (eptr >= md->end_subject)
   5290               {
   5291               SCHECK_PARTIAL();
   5292               break;
   5293               }
   5294             c = *eptr;
   5295             if (c == 0x000d)
   5296               {
   5297               if (++eptr >= md->end_subject) break;
   5298               if (*eptr == 0x000a) eptr++;
   5299               }
   5300             else
   5301               {
   5302               if (c != 0x000a &&
   5303                   (md->bsr_anycrlf ||
   5304                     (c != 0x000b && c != 0x000c && c != 0x0085)))
   5305                 break;
   5306               eptr++;
   5307               }
   5308             }
   5309           break;
   5310 
   5311           case OP_NOT_HSPACE:
   5312           for (i = min; i < max; i++)
   5313             {
   5314             if (eptr >= md->end_subject)
   5315               {
   5316               SCHECK_PARTIAL();
   5317               break;
   5318               }
   5319             c = *eptr;
   5320             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
   5321             eptr++;
   5322             }
   5323           break;
   5324 
   5325           case OP_HSPACE:
   5326           for (i = min; i < max; i++)
   5327             {
   5328             if (eptr >= md->end_subject)
   5329               {
   5330               SCHECK_PARTIAL();
   5331               break;
   5332               }
   5333             c = *eptr;
   5334             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
   5335             eptr++;
   5336             }
   5337           break;
   5338 
   5339           case OP_NOT_VSPACE:
   5340           for (i = min; i < max; i++)
   5341             {
   5342             if (eptr >= md->end_subject)
   5343               {
   5344               SCHECK_PARTIAL();
   5345               break;
   5346               }
   5347             c = *eptr;
   5348             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
   5349               break;
   5350             eptr++;
   5351             }
   5352           break;
   5353 
   5354           case OP_VSPACE:
   5355           for (i = min; i < max; i++)
   5356             {
   5357             if (eptr >= md->end_subject)
   5358               {
   5359               SCHECK_PARTIAL();
   5360               break;
   5361               }
   5362             c = *eptr;
   5363             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
   5364               break;
   5365             eptr++;
   5366             }
   5367           break;
   5368 
   5369           case OP_NOT_DIGIT:
   5370           for (i = min; i < max; i++)
   5371             {
   5372             if (eptr >= md->end_subject)
   5373               {
   5374               SCHECK_PARTIAL();
   5375               break;
   5376               }
   5377             if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
   5378             eptr++;
   5379             }
   5380           break;
   5381 
   5382           case OP_DIGIT:
   5383           for (i = min; i < max; i++)
   5384             {
   5385             if (eptr >= md->end_subject)
   5386               {
   5387               SCHECK_PARTIAL();
   5388               break;
   5389               }
   5390             if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
   5391             eptr++;
   5392             }
   5393           break;
   5394 
   5395           case OP_NOT_WHITESPACE:
   5396           for (i = min; i < max; i++)
   5397             {
   5398             if (eptr >= md->end_subject)
   5399               {
   5400               SCHECK_PARTIAL();
   5401               break;
   5402               }
   5403             if ((md->ctypes[*eptr] & ctype_space) != 0) break;
   5404             eptr++;
   5405             }
   5406           break;
   5407 
   5408           case OP_WHITESPACE:
   5409           for (i = min; i < max; i++)
   5410             {
   5411             if (eptr >= md->end_subject)
   5412               {
   5413               SCHECK_PARTIAL();
   5414               break;
   5415               }
   5416             if ((md->ctypes[*eptr] & ctype_space) == 0) break;
   5417             eptr++;
   5418             }
   5419           break;
   5420 
   5421           case OP_NOT_WORDCHAR:
   5422           for (i = min; i < max; i++)
   5423             {
   5424             if (eptr >= md->end_subject)
   5425               {
   5426               SCHECK_PARTIAL();
   5427               break;
   5428               }
   5429             if ((md->ctypes[*eptr] & ctype_word) != 0) break;
   5430             eptr++;
   5431             }
   5432           break;
   5433 
   5434           case OP_WORDCHAR:
   5435           for (i = min; i < max; i++)
   5436             {
   5437             if (eptr >= md->end_subject)
   5438               {
   5439               SCHECK_PARTIAL();
   5440               break;
   5441               }
   5442             if ((md->ctypes[*eptr] & ctype_word) == 0) break;
   5443             eptr++;
   5444             }
   5445           break;
   5446 
   5447           default:
   5448           RRETURN(PCRE_ERROR_INTERNAL);
   5449           }
   5450 
   5451         /* eptr is now past the end of the maximum run */
   5452 
   5453         if (possessive) continue;
   5454         while (eptr >= pp)
   5455           {
   5456           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
   5457           eptr--;
   5458           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
   5459           }
   5460         }
   5461 
   5462       /* Get here if we can't make it match with any permitted repetitions */
   5463 
   5464       MRRETURN(MATCH_NOMATCH);
   5465       }
   5466     /* Control never gets here */
   5467 
   5468     /* There's been some horrible disaster. Arrival here can only mean there is
   5469     something seriously wrong in the code above or the OP_xxx definitions. */
   5470 
   5471     default:
   5472     DPRINTF(("Unknown opcode %d\n", *ecode));
   5473     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
   5474     }
   5475 
   5476   /* Do not stick any code in here without much thought; it is assumed
   5477   that "continue" in the code above comes out to here to repeat the main
   5478   loop. */
   5479 
   5480   }             /* End of main loop */
   5481 /* Control never reaches here */
   5482 
   5483 
   5484 /* When compiling to use the heap rather than the stack for recursive calls to
   5485 match(), the RRETURN() macro jumps here. The number that is saved in
   5486 frame->Xwhere indicates which label we actually want to return to. */
   5487 
   5488 #ifdef NO_RECURSE
   5489 #define LBL(val) case val: goto L_RM##val;
   5490 HEAP_RETURN:
   5491 switch (frame->Xwhere)
   5492   {
   5493   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
   5494   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
   5495   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
   5496   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
   5497   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
   5498 #ifdef SUPPORT_UTF8
   5499   LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
   5500   LBL(32) LBL(34) LBL(42) LBL(46)
   5501 #ifdef SUPPORT_UCP
   5502   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
   5503   LBL(59) LBL(60) LBL(61) LBL(62)
   5504 #endif  /* SUPPORT_UCP */
   5505 #endif  /* SUPPORT_UTF8 */
   5506   default:
   5507   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   5508   return PCRE_ERROR_INTERNAL;
   5509   }
   5510 #undef LBL
   5511 #endif  /* NO_RECURSE */
   5512 }
   5513 
   5514 
   5515 /***************************************************************************
   5516 ****************************************************************************
   5517                    RECURSION IN THE match() FUNCTION
   5518 
   5519 Undefine all the macros that were defined above to handle this. */
   5520 
   5521 #ifdef NO_RECURSE
   5522 #undef eptr
   5523 #undef ecode
   5524 #undef mstart
   5525 #undef offset_top
   5526 #undef ims
   5527 #undef eptrb
   5528 #undef flags
   5529 
   5530 #undef callpat
   5531 #undef charptr
   5532 #undef data
   5533 #undef next
   5534 #undef pp
   5535 #undef prev
   5536 #undef saved_eptr
   5537 
   5538 #undef new_recursive
   5539 
   5540 #undef cur_is_word
   5541 #undef condition
   5542 #undef prev_is_word
   5543 
   5544 #undef original_ims
   5545 
   5546 #undef ctype
   5547 #undef length
   5548 #undef max
   5549 #undef min
   5550 #undef number
   5551 #undef offset
   5552 #undef op
   5553 #undef save_capture_last
   5554 #undef save_offset1
   5555 #undef save_offset2
   5556 #undef save_offset3
   5557 #undef stacksave
   5558 
   5559 #undef newptrb
   5560 
   5561 #endif
   5562 
   5563 /* These two are defined as macros in both cases */
   5564 
   5565 #undef fc
   5566 #undef fi
   5567 
   5568 /***************************************************************************
   5569 ***************************************************************************/
   5570 
   5571 
   5572 
   5573 /*************************************************
   5574 *         Execute a Regular Expression           *
   5575 *************************************************/
   5576 
   5577 /* This function applies a compiled re to a subject string and picks out
   5578 portions of the string if it matches. Two elements in the vector are set for
   5579 each substring: the offsets to the start and end of the substring.
   5580 
   5581 Arguments:
   5582   argument_re     points to the compiled expression
   5583   extra_data      points to extra data or is NULL
   5584   subject         points to the subject string
   5585   length          length of subject string (may contain binary zeros)
   5586   start_offset    where to start in the subject string
   5587   options         option bits
   5588   offsets         points to a vector of ints to be filled in with offsets
   5589   offsetcount     the number of elements in the vector
   5590 
   5591 Returns:          > 0 => success; value is the number of elements filled in
   5592                   = 0 => success, but offsets is not big enough
   5593                    -1 => failed to match
   5594                  < -1 => some kind of unexpected problem
   5595 */
   5596 
   5597 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
   5598 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
   5599   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
   5600   int offsetcount)
   5601 {
   5602 int rc, resetcount, ocount;
   5603 int first_byte = -1;
   5604 int req_byte = -1;
   5605 int req_byte2 = -1;
   5606 int newline;
   5607 unsigned long int ims;
   5608 BOOL using_temporary_offsets = FALSE;
   5609 BOOL anchored;
   5610 BOOL startline;
   5611 BOOL firstline;
   5612 BOOL first_byte_caseless = FALSE;
   5613 BOOL req_byte_caseless = FALSE;
   5614 BOOL utf8;
   5615 match_data match_block;
   5616 match_data *md = &match_block;
   5617 const uschar *tables;
   5618 const uschar *start_bits = NULL;
   5619 USPTR start_match = (USPTR)subject + start_offset;
   5620 USPTR end_subject;
   5621 USPTR start_partial = NULL;
   5622 USPTR req_byte_ptr = start_match - 1;
   5623 
   5624 pcre_study_data internal_study;
   5625 const pcre_study_data *study;
   5626 
   5627 real_pcre internal_re;
   5628 const real_pcre *external_re = (const real_pcre *)argument_re;
   5629 const real_pcre *re = external_re;
   5630 
   5631 /* Plausibility checks */
   5632 
   5633 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
   5634 if (re == NULL || subject == NULL ||
   5635    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
   5636 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
   5637 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
   5638 
   5639 /* This information is for finding all the numbers associated with a given
   5640 name, for condition testing. */
   5641 
   5642 md->name_table = (uschar *)re + re->name_table_offset;
   5643 md->name_count = re->name_count;
   5644 md->name_entry_size = re->name_entry_size;
   5645 
   5646 /* Fish out the optional data from the extra_data structure, first setting
   5647 the default values. */
   5648 
   5649 study = NULL;
   5650 md->match_limit = MATCH_LIMIT;
   5651 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
   5652 md->callout_data = NULL;
   5653 
   5654 /* The table pointer is always in native byte order. */
   5655 
   5656 tables = external_re->tables;
   5657 
   5658 if (extra_data != NULL)
   5659   {
   5660   register unsigned int flags = extra_data->flags;
   5661   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
   5662     study = (const pcre_study_data *)extra_data->study_data;
   5663   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
   5664     md->match_limit = extra_data->match_limit;
   5665   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
   5666     md->match_limit_recursion = extra_data->match_limit_recursion;
   5667   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
   5668     md->callout_data = extra_data->callout_data;
   5669   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
   5670   }
   5671 
   5672 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
   5673 is a feature that makes it possible to save compiled regex and re-use them
   5674 in other programs later. */
   5675 
   5676 if (tables == NULL) tables = _pcre_default_tables;
   5677 
   5678 /* Check that the first field in the block is the magic number. If it is not,
   5679 test for a regex that was compiled on a host of opposite endianness. If this is
   5680 the case, flipped values are put in internal_re and internal_study if there was
   5681 study data too. */
   5682 
   5683 if (re->magic_number != MAGIC_NUMBER)
   5684   {
   5685   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
   5686   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   5687   if (study != NULL) study = &internal_study;
   5688   }
   5689 
   5690 /* Set up other data */
   5691 
   5692 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
   5693 startline = (re->flags & PCRE_STARTLINE) != 0;
   5694 firstline = (re->options & PCRE_FIRSTLINE) != 0;
   5695 
   5696 /* The code starts after the real_pcre block and the capture name table. */
   5697 
   5698 md->start_code = (const uschar *)external_re + re->name_table_offset +
   5699   re->name_count * re->name_entry_size;
   5700 
   5701 md->start_subject = (USPTR)subject;
   5702 md->start_offset = start_offset;
   5703 md->end_subject = md->start_subject + length;
   5704 end_subject = md->end_subject;
   5705 
   5706 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
   5707 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
   5708 md->use_ucp = (re->options & PCRE_UCP) != 0;
   5709 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
   5710 
   5711 md->notbol = (options & PCRE_NOTBOL) != 0;
   5712 md->noteol = (options & PCRE_NOTEOL) != 0;
   5713 md->notempty = (options & PCRE_NOTEMPTY) != 0;
   5714 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
   5715 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
   5716               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
   5717 md->hitend = FALSE;
   5718 md->mark = NULL;                        /* In case never set */
   5719 
   5720 md->recursive = NULL;                   /* No recursion at top level */
   5721 
   5722 md->lcc = tables + lcc_offset;
   5723 md->ctypes = tables + ctypes_offset;
   5724 
   5725 /* Handle different \R options. */
   5726 
   5727 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
   5728   {
   5729   case 0:
   5730   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
   5731     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
   5732   else
   5733 #ifdef BSR_ANYCRLF
   5734   md->bsr_anycrlf = TRUE;
   5735 #else
   5736   md->bsr_anycrlf = FALSE;
   5737 #endif
   5738   break;
   5739 
   5740   case PCRE_BSR_ANYCRLF:
   5741   md->bsr_anycrlf = TRUE;
   5742   break;
   5743 
   5744   case PCRE_BSR_UNICODE:
   5745   md->bsr_anycrlf = FALSE;
   5746   break;
   5747 
   5748   default: return PCRE_ERROR_BADNEWLINE;
   5749   }
   5750 
   5751 /* Handle different types of newline. The three bits give eight cases. If
   5752 nothing is set at run time, whatever was used at compile time applies. */
   5753 
   5754 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
   5755         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
   5756   {
   5757   case 0: newline = NEWLINE; break;   /* Compile-time default */
   5758   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
   5759   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
   5760   case PCRE_NEWLINE_CR+
   5761        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
   5762   case PCRE_NEWLINE_ANY: newline = -1; break;
   5763   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
   5764   default: return PCRE_ERROR_BADNEWLINE;
   5765   }
   5766 
   5767 if (newline == -2)
   5768   {
   5769   md->nltype = NLTYPE_ANYCRLF;
   5770   }
   5771 else if (newline < 0)
   5772   {
   5773   md->nltype = NLTYPE_ANY;
   5774   }
   5775 else
   5776   {
   5777   md->nltype = NLTYPE_FIXED;
   5778   if (newline > 255)
   5779     {
   5780     md->nllen = 2;
   5781     md->nl[0] = (newline >> 8) & 255;
   5782     md->nl[1] = newline & 255;
   5783     }
   5784   else
   5785     {
   5786     md->nllen = 1;
   5787     md->nl[0] = newline;
   5788     }
   5789   }
   5790 
   5791 /* Partial matching was originally supported only for a restricted set of
   5792 regexes; from release 8.00 there are no restrictions, but the bits are still
   5793 defined (though never set). So there's no harm in leaving this code. */
   5794 
   5795 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
   5796   return PCRE_ERROR_BADPARTIAL;
   5797 
   5798 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
   5799 back the character offset. */
   5800 
   5801 #ifdef SUPPORT_UTF8
   5802 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
   5803   {
   5804   int tb;
   5805   if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
   5806     return (tb == length && md->partial > 1)?
   5807       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
   5808   if (start_offset > 0 && start_offset < length)
   5809     {
   5810     tb = ((USPTR)subject)[start_offset] & 0xc0;
   5811     if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
   5812     }
   5813   }
   5814 #endif
   5815 
   5816 /* The ims options can vary during the matching as a result of the presence
   5817 of (?ims) items in the pattern. They are kept in a local variable so that
   5818 restoring at the exit of a group is easy. */
   5819 
   5820 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
   5821 
   5822 /* If the expression has got more back references than the offsets supplied can
   5823 hold, we get a temporary chunk of working store to use during the matching.
   5824 Otherwise, we can use the vector supplied, rounding down its size to a multiple
   5825 of 3. */
   5826 
   5827 ocount = offsetcount - (offsetcount % 3);
   5828 
   5829 if (re->top_backref > 0 && re->top_backref >= ocount/3)
   5830   {
   5831   ocount = re->top_backref * 3 + 3;
   5832   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
   5833   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
   5834   using_temporary_offsets = TRUE;
   5835   DPRINTF(("Got memory to hold back references\n"));
   5836   }
   5837 else md->offset_vector = offsets;
   5838 
   5839 md->offset_end = ocount;
   5840 md->offset_max = (2*ocount)/3;
   5841 md->offset_overflow = FALSE;
   5842 md->capture_last = -1;
   5843 
   5844 /* Compute the minimum number of offsets that we need to reset each time. Doing
   5845 this makes a huge difference to execution time when there aren't many brackets
   5846 in the pattern. */
   5847 
   5848 resetcount = 2 + re->top_bracket * 2;
   5849 if (resetcount > offsetcount) resetcount = ocount;
   5850 
   5851 /* Reset the working variable associated with each extraction. These should
   5852 never be used unless previously set, but they get saved and restored, and so we
   5853 initialize them to avoid reading uninitialized locations. */
   5854 
   5855 if (md->offset_vector != NULL)
   5856   {
   5857   register int *iptr = md->offset_vector + ocount;
   5858   register int *iend = iptr - resetcount/2 + 1;
   5859   while (--iptr >= iend) *iptr = -1;
   5860   }
   5861 
   5862 /* Set up the first character to match, if available. The first_byte value is
   5863 never set for an anchored regular expression, but the anchoring may be forced
   5864 at run time, so we have to test for anchoring. The first char may be unset for
   5865 an unanchored pattern, of course. If there's no first char and the pattern was
   5866 studied, there may be a bitmap of possible first characters. */
   5867 
   5868 if (!anchored)
   5869   {
   5870   if ((re->flags & PCRE_FIRSTSET) != 0)
   5871     {
   5872     first_byte = re->first_byte & 255;
   5873     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
   5874       first_byte = md->lcc[first_byte];
   5875     }
   5876   else
   5877     if (!startline && study != NULL &&
   5878       (study->flags & PCRE_STUDY_MAPPED) != 0)
   5879         start_bits = study->start_bits;
   5880   }
   5881 
   5882 /* For anchored or unanchored matches, there may be a "last known required
   5883 character" set. */
   5884 
   5885 if ((re->flags & PCRE_REQCHSET) != 0)
   5886   {
   5887   req_byte = re->req_byte & 255;
   5888   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
   5889   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
   5890   }
   5891 
   5892 
   5893 /* ==========================================================================*/
   5894 
   5895 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
   5896 the loop runs just once. */
   5897 
   5898 for(;;)
   5899   {
   5900   USPTR save_end_subject = end_subject;
   5901   USPTR new_start_match;
   5902 
   5903   /* Reset the maximum number of extractions we might see. */
   5904 
   5905   if (md->offset_vector != NULL)
   5906     {
   5907     register int *iptr = md->offset_vector;
   5908     register int *iend = iptr + resetcount;
   5909     while (iptr < iend) *iptr++ = -1;
   5910     }
   5911 
   5912   /* If firstline is TRUE, the start of the match is constrained to the first
   5913   line of a multiline string. That is, the match must be before or at the first
   5914   newline. Implement this by temporarily adjusting end_subject so that we stop
   5915   scanning at a newline. If the match fails at the newline, later code breaks
   5916   this loop. */
   5917 
   5918   if (firstline)
   5919     {
   5920     USPTR t = start_match;
   5921 #ifdef SUPPORT_UTF8
   5922     if (utf8)
   5923       {
   5924       while (t < md->end_subject && !IS_NEWLINE(t))
   5925         {
   5926         t++;
   5927         while (t < end_subject && (*t & 0xc0) == 0x80) t++;
   5928         }
   5929       }
   5930     else
   5931 #endif
   5932     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
   5933     end_subject = t;
   5934     }
   5935 
   5936   /* There are some optimizations that avoid running the match if a known
   5937   starting point is not found, or if a known later character is not present.
   5938   However, there is an option that disables these, for testing and for ensuring
   5939   that all callouts do actually occur. The option can be set in the regex by
   5940   (*NO_START_OPT) or passed in match-time options. */
   5941 
   5942   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
   5943     {
   5944     /* Advance to a unique first byte if there is one. */
   5945 
   5946     if (first_byte >= 0)
   5947       {
   5948       if (first_byte_caseless)
   5949         while (start_match < end_subject && md->lcc[*start_match] != first_byte)
   5950           start_match++;
   5951       else
   5952         while (start_match < end_subject && *start_match != first_byte)
   5953           start_match++;
   5954       }
   5955 
   5956     /* Or to just after a linebreak for a multiline match */
   5957 
   5958     else if (startline)
   5959       {
   5960       if (start_match > md->start_subject + start_offset)
   5961         {
   5962 #ifdef SUPPORT_UTF8
   5963         if (utf8)
   5964           {
   5965           while (start_match < end_subject && !WAS_NEWLINE(start_match))
   5966             {
   5967             start_match++;
   5968             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
   5969               start_match++;
   5970             }
   5971           }
   5972         else
   5973 #endif
   5974         while (start_match < end_subject && !WAS_NEWLINE(start_match))
   5975           start_match++;
   5976 
   5977         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
   5978         and we are now at a LF, advance the match position by one more character.
   5979         */
   5980 
   5981         if (start_match[-1] == CHAR_CR &&
   5982              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
   5983              start_match < end_subject &&
   5984              *start_match == CHAR_NL)
   5985           start_match++;
   5986         }
   5987       }
   5988 
   5989     /* Or to a non-unique first byte after study */
   5990 
   5991     else if (start_bits != NULL)
   5992       {
   5993       while (start_match < end_subject)
   5994         {
   5995         register unsigned int c = *start_match;
   5996         if ((start_bits[c/8] & (1 << (c&7))) == 0)
   5997           {
   5998           start_match++;
   5999 #ifdef SUPPORT_UTF8
   6000           if (utf8)
   6001             while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
   6002               start_match++;
   6003 #endif
   6004           }
   6005         else break;
   6006         }
   6007       }
   6008     }   /* Starting optimizations */
   6009 
   6010   /* Restore fudged end_subject */
   6011 
   6012   end_subject = save_end_subject;
   6013 
   6014   /* The following two optimizations are disabled for partial matching or if
   6015   disabling is explicitly requested. */
   6016 
   6017   if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
   6018     {
   6019     /* If the pattern was studied, a minimum subject length may be set. This is
   6020     a lower bound; no actual string of that length may actually match the
   6021     pattern. Although the value is, strictly, in characters, we treat it as
   6022     bytes to avoid spending too much time in this optimization. */
   6023 
   6024     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
   6025         (pcre_uint32)(end_subject - start_match) < study->minlength)
   6026       {
   6027       rc = MATCH_NOMATCH;
   6028       break;
   6029       }
   6030 
   6031     /* If req_byte is set, we know that that character must appear in the
   6032     subject for the match to succeed. If the first character is set, req_byte
   6033     must be later in the subject; otherwise the test starts at the match point.
   6034     This optimization can save a huge amount of backtracking in patterns with
   6035     nested unlimited repeats that aren't going to match. Writing separate code
   6036     for cased/caseless versions makes it go faster, as does using an
   6037     autoincrement and backing off on a match.
   6038 
   6039     HOWEVER: when the subject string is very, very long, searching to its end
   6040     can take a long time, and give bad performance on quite ordinary patterns.
   6041     This showed up when somebody was matching something like /^\d+C/ on a
   6042     32-megabyte string... so we don't do this when the string is sufficiently
   6043     long. */
   6044 
   6045     if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
   6046       {
   6047       register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
   6048 
   6049       /* We don't need to repeat the search if we haven't yet reached the
   6050       place we found it at last time. */
   6051 
   6052       if (p > req_byte_ptr)
   6053         {
   6054         if (req_byte_caseless)
   6055           {
   6056           while (p < end_subject)
   6057             {
   6058             register int pp = *p++;
   6059             if (pp == req_byte || pp == req_byte2) { p--; break; }
   6060             }
   6061           }
   6062         else
   6063           {
   6064           while (p < end_subject)
   6065             {
   6066             if (*p++ == req_byte) { p--; break; }
   6067             }
   6068           }
   6069 
   6070         /* If we can't find the required character, break the matching loop,
   6071         forcing a match failure. */
   6072 
   6073         if (p >= end_subject)
   6074           {
   6075           rc = MATCH_NOMATCH;
   6076           break;
   6077           }
   6078 
   6079         /* If we have found the required character, save the point where we
   6080         found it, so that we don't search again next time round the loop if
   6081         the start hasn't passed this character yet. */
   6082 
   6083         req_byte_ptr = p;
   6084         }
   6085       }
   6086     }
   6087 
   6088 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
   6089   printf(">>>> Match against: ");
   6090   pchars(start_match, end_subject - start_match, TRUE, md);
   6091   printf("\n");
   6092 #endif
   6093 
   6094   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
   6095   first starting point for which a partial match was found. */
   6096 
   6097   md->start_match_ptr = start_match;
   6098   md->start_used_ptr = start_match;
   6099   md->match_call_count = 0;
   6100   rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
   6101     0, 0);
   6102   if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
   6103 
   6104   switch(rc)
   6105     {
   6106     /* SKIP passes back the next starting point explicitly, but if it is the
   6107     same as the match we have just done, treat it as NOMATCH. */
   6108 
   6109     case MATCH_SKIP:
   6110     if (md->start_match_ptr != start_match)
   6111       {
   6112       new_start_match = md->start_match_ptr;
   6113       break;
   6114       }
   6115     /* Fall through */
   6116 
   6117     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
   6118     the SKIP's arg was not found. We also treat this as NOMATCH. */
   6119 
   6120     case MATCH_SKIP_ARG:
   6121     /* Fall through */
   6122 
   6123     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
   6124     exactly like PRUNE. */
   6125 
   6126     case MATCH_NOMATCH:
   6127     case MATCH_PRUNE:
   6128     case MATCH_THEN:
   6129     new_start_match = start_match + 1;
   6130 #ifdef SUPPORT_UTF8
   6131     if (utf8)
   6132       while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
   6133         new_start_match++;
   6134 #endif
   6135     break;
   6136 
   6137     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
   6138 
   6139     case MATCH_COMMIT:
   6140     rc = MATCH_NOMATCH;
   6141     goto ENDLOOP;
   6142 
   6143     /* Any other return is either a match, or some kind of error. */
   6144 
   6145     default:
   6146     goto ENDLOOP;
   6147     }
   6148 
   6149   /* Control reaches here for the various types of "no match at this point"
   6150   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
   6151 
   6152   rc = MATCH_NOMATCH;
   6153 
   6154   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
   6155   newline in the subject (though it may continue over the newline). Therefore,
   6156   if we have just failed to match, starting at a newline, do not continue. */
   6157 
   6158   if (firstline && IS_NEWLINE(start_match)) break;
   6159 
   6160   /* Advance to new matching position */
   6161 
   6162   start_match = new_start_match;
   6163 
   6164   /* Break the loop if the pattern is anchored or if we have passed the end of
   6165   the subject. */
   6166 
   6167   if (anchored || start_match > end_subject) break;
   6168 
   6169   /* If we have just passed a CR and we are now at a LF, and the pattern does
   6170   not contain any explicit matches for \r or \n, and the newline option is CRLF
   6171   or ANY or ANYCRLF, advance the match position by one more character. */
   6172 
   6173   if (start_match[-1] == CHAR_CR &&
   6174       start_match < end_subject &&
   6175       *start_match == CHAR_NL &&
   6176       (re->flags & PCRE_HASCRORLF) == 0 &&
   6177         (md->nltype == NLTYPE_ANY ||
   6178          md->nltype == NLTYPE_ANYCRLF ||
   6179          md->nllen == 2))
   6180     start_match++;
   6181 
   6182   md->mark = NULL;   /* Reset for start of next match attempt */
   6183   }                  /* End of for(;;) "bumpalong" loop */
   6184 
   6185 /* ==========================================================================*/
   6186 
   6187 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
   6188 conditions is true:
   6189 
   6190 (1) The pattern is anchored or the match was failed by (*COMMIT);
   6191 
   6192 (2) We are past the end of the subject;
   6193 
   6194 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
   6195     this option requests that a match occur at or before the first newline in
   6196     the subject.
   6197 
   6198 When we have a match and the offset vector is big enough to deal with any
   6199 backreferences, captured substring offsets will already be set up. In the case
   6200 where we had to get some local store to hold offsets for backreference
   6201 processing, copy those that we can. In this case there need not be overflow if
   6202 certain parts of the pattern were not used, even though there are more
   6203 capturing parentheses than vector slots. */
   6204 
   6205 ENDLOOP:
   6206 
   6207 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   6208   {
   6209   if (using_temporary_offsets)
   6210     {
   6211     if (offsetcount >= 4)
   6212       {
   6213       memcpy(offsets + 2, md->offset_vector + 2,
   6214         (offsetcount - 2) * sizeof(int));
   6215       DPRINTF(("Copied offsets from temporary memory\n"));
   6216       }
   6217     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
   6218     DPRINTF(("Freeing temporary memory\n"));
   6219     (pcre_free)(md->offset_vector);
   6220     }
   6221 
   6222   /* Set the return code to the number of captured strings, or 0 if there are
   6223   too many to fit into the vector. */
   6224 
   6225   rc = md->offset_overflow? 0 : md->end_offset_top/2;
   6226 
   6227   /* If there is space, set up the whole thing as substring 0. The value of
   6228   md->start_match_ptr might be modified if \K was encountered on the success
   6229   matching path. */
   6230 
   6231   if (offsetcount < 2) rc = 0; else
   6232     {
   6233     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
   6234     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
   6235     }
   6236 
   6237   DPRINTF((">>>> returning %d\n", rc));
   6238   goto RETURN_MARK;
   6239   }
   6240 
   6241 /* Control gets here if there has been an error, or if the overall match
   6242 attempt has failed at all permitted starting positions. */
   6243 
   6244 if (using_temporary_offsets)
   6245   {
   6246   DPRINTF(("Freeing temporary memory\n"));
   6247   (pcre_free)(md->offset_vector);
   6248   }
   6249 
   6250 /* For anything other than nomatch or partial match, just return the code. */
   6251 
   6252 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
   6253   {
   6254   DPRINTF((">>>> error: returning %d\n", rc));
   6255   return rc;
   6256   }
   6257 
   6258 /* Handle partial matches - disable any mark data */
   6259 
   6260 if (start_partial != NULL)
   6261   {
   6262   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
   6263   md->mark = NULL;
   6264   if (offsetcount > 1)
   6265     {
   6266     offsets[0] = (int)(start_partial - (USPTR)subject);
   6267     offsets[1] = (int)(end_subject - (USPTR)subject);
   6268     }
   6269   rc = PCRE_ERROR_PARTIAL;
   6270   }
   6271 
   6272 /* This is the classic nomatch case */
   6273 
   6274 else
   6275   {
   6276   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
   6277   rc = PCRE_ERROR_NOMATCH;
   6278   }
   6279 
   6280 /* Return the MARK data if it has been requested. */
   6281 
   6282 RETURN_MARK:
   6283 
   6284 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
   6285   *(extra_data->mark) = (unsigned char *)(md->mark);
   6286 return rc;
   6287 }
   6288 
   6289 /* End of pcre_exec.c */
   6290