Home | History | Annotate | Download | only in src
      1 /*************************************************
      2 *      Perl-Compatible Regular Expressions       *
      3 *************************************************/
      4 
      5 /* PCRE is a library of functions to support regular expressions whose syntax
      6 and semantics are as close as possible to those of the Perl 5 language.
      7 
      8                        Written by Philip Hazel
      9      Original API code Copyright (c) 1997-2012 University of Cambridge
     10          New API code Copyright (c) 2016 University of Cambridge
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 
     42 /* This module contains the external function pcre2_dfa_match(), which is an
     43 alternative matching function that uses a sort of DFA algorithm (not a true
     44 FSM). This is NOT Perl-compatible, but it has advantages in certain
     45 applications. */
     46 
     47 
     48 /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
     49 the performance of his patterns greatly. I could not use it as it stood, as it
     50 was not thread safe, and made assumptions about pattern sizes. Also, it caused
     51 test 7 to loop, and test 9 to crash with a segfault.
     52 
     53 The issue is the check for duplicate states, which is done by a simple linear
     54 search up the state list. (Grep for "duplicate" below to find the code.) For
     55 many patterns, there will never be many states active at one time, so a simple
     56 linear search is fine. In patterns that have many active states, it might be a
     57 bottleneck. The suggested code used an indexing scheme to remember which states
     58 had previously been used for each character, and avoided the linear search when
     59 it knew there was no chance of a duplicate. This was implemented when adding
     60 states to the state lists.
     61 
     62 I wrote some thread-safe, not-limited code to try something similar at the time
     63 of checking for duplicates (instead of when adding states), using index vectors
     64 on the stack. It did give a 13% improvement with one specially constructed
     65 pattern for certain subject strings, but on other strings and on many of the
     66 simpler patterns in the test suite it did worse. The major problem, I think,
     67 was the extra time to initialize the index. This had to be done for each call
     68 of internal_dfa_match(). (The supplied patch used a static vector, initialized
     69 only once - I suspect this was the cause of the problems with the tests.)
     70 
     71 Overall, I concluded that the gains in some cases did not outweigh the losses
     72 in others, so I abandoned this code. */
     73 
     74 
     75 #ifdef HAVE_CONFIG_H
     76 #include "config.h"
     77 #endif
     78 
     79 #define NLBLOCK mb             /* Block containing newline information */
     80 #define PSSTART start_subject  /* Field containing processed string start */
     81 #define PSEND   end_subject    /* Field containing processed string end */
     82 
     83 #include "pcre2_internal.h"
     84 
     85 #define PUBLIC_DFA_MATCH_OPTIONS \
     86   (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
     87    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
     88    PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART)
     89 
     90 
     91 /*************************************************
     92 *      Code parameters and static tables         *
     93 *************************************************/
     94 
     95 /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
     96 into others, under special conditions. A gap of 20 between the blocks should be
     97 enough. The resulting opcodes don't have to be less than 256 because they are
     98 never stored, so we push them well clear of the normal opcodes. */
     99 
    100 #define OP_PROP_EXTRA       300
    101 #define OP_EXTUNI_EXTRA     320
    102 #define OP_ANYNL_EXTRA      340
    103 #define OP_HSPACE_EXTRA     360
    104 #define OP_VSPACE_EXTRA     380
    105 
    106 
    107 /* This table identifies those opcodes that are followed immediately by a
    108 character that is to be tested in some way. This makes it possible to
    109 centralize the loading of these characters. In the case of Type * etc, the
    110 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
    111 small value. Non-zero values in the table are the offsets from the opcode where
    112 the character is to be found. ***NOTE*** If the start of this table is
    113 modified, the three tables that follow must also be modified. */
    114 
    115 static const uint8_t coptable[] = {
    116   0,                             /* End                                    */
    117   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
    118   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
    119   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
    120   0, 0,                          /* \P, \p                                 */
    121   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
    122   0,                             /* \X                                     */
    123   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
    124   1,                             /* Char                                   */
    125   1,                             /* Chari                                  */
    126   1,                             /* not                                    */
    127   1,                             /* noti                                   */
    128   /* Positive single-char repeats                                          */
    129   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
    130   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
    131   1+IMM2_SIZE,                   /* exact                                  */
    132   1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
    133   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
    134   1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
    135   1+IMM2_SIZE,                   /* exact I                                */
    136   1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
    137   /* Negative single-char repeats - only for chars < 256                   */
    138   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
    139   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
    140   1+IMM2_SIZE,                   /* NOT exact                              */
    141   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
    142   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
    143   1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
    144   1+IMM2_SIZE,                   /* NOT exact I                            */
    145   1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
    146   /* Positive type repeats                                                 */
    147   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
    148   1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
    149   1+IMM2_SIZE,                   /* Type exact                             */
    150   1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
    151   /* Character class & ref repeats                                         */
    152   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
    153   0, 0,                          /* CRRANGE, CRMINRANGE                    */
    154   0, 0, 0, 0,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
    155   0,                             /* CLASS                                  */
    156   0,                             /* NCLASS                                 */
    157   0,                             /* XCLASS - variable length               */
    158   0,                             /* REF                                    */
    159   0,                             /* REFI                                   */
    160   0,                             /* DNREF                                  */
    161   0,                             /* DNREFI                                 */
    162   0,                             /* RECURSE                                */
    163   0,                             /* CALLOUT                                */
    164   0,                             /* CALLOUT_STR                            */
    165   0,                             /* Alt                                    */
    166   0,                             /* Ket                                    */
    167   0,                             /* KetRmax                                */
    168   0,                             /* KetRmin                                */
    169   0,                             /* KetRpos                                */
    170   0,                             /* Reverse                                */
    171   0,                             /* Assert                                 */
    172   0,                             /* Assert not                             */
    173   0,                             /* Assert behind                          */
    174   0,                             /* Assert behind not                      */
    175   0, 0,                          /* ONCE, ONCE_NC                          */
    176   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
    177   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
    178   0, 0,                          /* CREF, DNCREF                           */
    179   0, 0,                          /* RREF, DNRREF                           */
    180   0, 0,                          /* FALSE, TRUE                            */
    181   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
    182   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
    183   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
    184   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
    185   0, 0, 0                        /* CLOSE, SKIPZERO, DEFINE                */
    186 };
    187 
    188 /* This table identifies those opcodes that inspect a character. It is used to
    189 remember the fact that a character could have been inspected when the end of
    190 the subject is reached. ***NOTE*** If the start of this table is modified, the
    191 two tables that follow must also be modified. */
    192 
    193 static const uint8_t poptable[] = {
    194   0,                             /* End                                    */
    195   0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
    196   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
    197   1, 1, 1,                       /* Any, AllAny, Anybyte                   */
    198   1, 1,                          /* \P, \p                                 */
    199   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
    200   1,                             /* \X                                     */
    201   0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
    202   1,                             /* Char                                   */
    203   1,                             /* Chari                                  */
    204   1,                             /* not                                    */
    205   1,                             /* noti                                   */
    206   /* Positive single-char repeats                                          */
    207   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
    208   1, 1, 1,                       /* upto, minupto, exact                   */
    209   1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
    210   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
    211   1, 1, 1,                       /* upto I, minupto I, exact I             */
    212   1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
    213   /* Negative single-char repeats - only for chars < 256                   */
    214   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
    215   1, 1, 1,                       /* NOT upto, minupto, exact               */
    216   1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
    217   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
    218   1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
    219   1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
    220   /* Positive type repeats                                                 */
    221   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
    222   1, 1, 1,                       /* Type upto, minupto, exact              */
    223   1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
    224   /* Character class & ref repeats                                         */
    225   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
    226   1, 1,                          /* CRRANGE, CRMINRANGE                    */
    227   1, 1, 1, 1,                    /* Possessive *+, ++, ?+, CRPOSRANGE      */
    228   1,                             /* CLASS                                  */
    229   1,                             /* NCLASS                                 */
    230   1,                             /* XCLASS - variable length               */
    231   0,                             /* REF                                    */
    232   0,                             /* REFI                                   */
    233   0,                             /* DNREF                                  */
    234   0,                             /* DNREFI                                 */
    235   0,                             /* RECURSE                                */
    236   0,                             /* CALLOUT                                */
    237   0,                             /* CALLOUT_STR                            */
    238   0,                             /* Alt                                    */
    239   0,                             /* Ket                                    */
    240   0,                             /* KetRmax                                */
    241   0,                             /* KetRmin                                */
    242   0,                             /* KetRpos                                */
    243   0,                             /* Reverse                                */
    244   0,                             /* Assert                                 */
    245   0,                             /* Assert not                             */
    246   0,                             /* Assert behind                          */
    247   0,                             /* Assert behind not                      */
    248   0, 0,                          /* ONCE, ONCE_NC                          */
    249   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
    250   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
    251   0, 0,                          /* CREF, DNCREF                           */
    252   0, 0,                          /* RREF, DNRREF                           */
    253   0, 0,                          /* FALSE, TRUE                            */
    254   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
    255   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
    256   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
    257   0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
    258   0, 0, 0                        /* CLOSE, SKIPZERO, DEFINE                */
    259 };
    260 
    261 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
    262 and \w */
    263 
    264 static const uint8_t toptable1[] = {
    265   0, 0, 0, 0, 0, 0,
    266   ctype_digit, ctype_digit,
    267   ctype_space, ctype_space,
    268   ctype_word,  ctype_word,
    269   0, 0                            /* OP_ANY, OP_ALLANY */
    270 };
    271 
    272 static const uint8_t toptable2[] = {
    273   0, 0, 0, 0, 0, 0,
    274   ctype_digit, 0,
    275   ctype_space, 0,
    276   ctype_word,  0,
    277   1, 1                            /* OP_ANY, OP_ALLANY */
    278 };
    279 
    280 
    281 /* Structure for holding data about a particular state, which is in effect the
    282 current data for an active path through the match tree. It must consist
    283 entirely of ints because the working vector we are passed, and which we put
    284 these structures in, is a vector of ints. */
    285 
    286 typedef struct stateblock {
    287   int offset;                     /* Offset to opcode (-ve has meaning) */
    288   int count;                      /* Count for repeats */
    289   int data;                       /* Some use extra data */
    290 } stateblock;
    291 
    292 #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
    293 
    294 
    295 
    296 /*************************************************
    297 *     Match a Regular Expression - DFA engine    *
    298 *************************************************/
    299 
    300 /* This internal function applies a compiled pattern to a subject string,
    301 starting at a given point, using a DFA engine. This function is called from the
    302 external one, possibly multiple times if the pattern is not anchored. The
    303 function calls itself recursively for some kinds of subpattern.
    304 
    305 Arguments:
    306   mb                the match_data block with fixed information
    307   this_start_code   the opening bracket of this subexpression's code
    308   current_subject   where we currently are in the subject string
    309   start_offset      start offset in the subject string
    310   offsets           vector to contain the matching string offsets
    311   offsetcount       size of same
    312   workspace         vector of workspace
    313   wscount           size of same
    314   rlevel            function call recursion level
    315 
    316 Returns:            > 0 => number of match offset pairs placed in offsets
    317                     = 0 => offsets overflowed; longest matches are present
    318                      -1 => failed to match
    319                    < -1 => some kind of unexpected problem
    320 
    321 The following macros are used for adding states to the two state vectors (one
    322 for the current character, one for the following character). */
    323 
    324 #define ADD_ACTIVE(x,y) \
    325   if (active_count++ < wscount) \
    326     { \
    327     next_active_state->offset = (x); \
    328     next_active_state->count  = (y); \
    329     next_active_state++; \
    330     } \
    331   else return PCRE2_ERROR_DFA_WSSIZE
    332 
    333 #define ADD_ACTIVE_DATA(x,y,z) \
    334   if (active_count++ < wscount) \
    335     { \
    336     next_active_state->offset = (x); \
    337     next_active_state->count  = (y); \
    338     next_active_state->data   = (z); \
    339     next_active_state++; \
    340     } \
    341   else return PCRE2_ERROR_DFA_WSSIZE
    342 
    343 #define ADD_NEW(x,y) \
    344   if (new_count++ < wscount) \
    345     { \
    346     next_new_state->offset = (x); \
    347     next_new_state->count  = (y); \
    348     next_new_state++; \
    349     } \
    350   else return PCRE2_ERROR_DFA_WSSIZE
    351 
    352 #define ADD_NEW_DATA(x,y,z) \
    353   if (new_count++ < wscount) \
    354     { \
    355     next_new_state->offset = (x); \
    356     next_new_state->count  = (y); \
    357     next_new_state->data   = (z); \
    358     next_new_state++; \
    359     } \
    360   else return PCRE2_ERROR_DFA_WSSIZE
    361 
    362 /* And now, here is the code */
    363 
    364 static int
    365 internal_dfa_match(
    366   dfa_match_block *mb,
    367   PCRE2_SPTR this_start_code,
    368   PCRE2_SPTR current_subject,
    369   PCRE2_SIZE start_offset,
    370   PCRE2_SIZE *offsets,
    371   uint32_t offsetcount,
    372   int *workspace,
    373   int wscount,
    374   int  rlevel)
    375 {
    376 stateblock *active_states, *new_states, *temp_states;
    377 stateblock *next_active_state, *next_new_state;
    378 
    379 const uint8_t *ctypes, *lcc, *fcc;
    380 PCRE2_SPTR ptr;
    381 PCRE2_SPTR end_code;
    382 PCRE2_SPTR first_op;
    383 
    384 dfa_recursion_info new_recursive;
    385 
    386 int active_count, new_count, match_count;
    387 
    388 /* Some fields in the mb block are frequently referenced, so we load them into
    389 independent variables in the hope that this will perform better. */
    390 
    391 PCRE2_SPTR start_subject = mb->start_subject;
    392 PCRE2_SPTR end_subject = mb->end_subject;
    393 PCRE2_SPTR start_code = mb->start_code;
    394 
    395 #ifdef SUPPORT_UNICODE
    396 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
    397 #else
    398 BOOL utf = FALSE;
    399 #endif
    400 
    401 BOOL reset_could_continue = FALSE;
    402 
    403 rlevel++;
    404 offsetcount &= (uint32_t)(-2);  /* Round down */
    405 
    406 wscount -= 2;
    407 wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
    408           (2 * INTS_PER_STATEBLOCK);
    409 
    410 ctypes = mb->tables + ctypes_offset;
    411 lcc = mb->tables + lcc_offset;
    412 fcc = mb->tables + fcc_offset;
    413 
    414 match_count = PCRE2_ERROR_NOMATCH;   /* A negative number */
    415 
    416 active_states = (stateblock *)(workspace + 2);
    417 next_new_state = new_states = active_states + wscount;
    418 new_count = 0;
    419 
    420 first_op = this_start_code + 1 + LINK_SIZE +
    421   ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
    422     *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
    423     ? IMM2_SIZE:0);
    424 
    425 /* The first thing in any (sub) pattern is a bracket of some sort. Push all
    426 the alternative states onto the list, and find out where the end is. This
    427 makes is possible to use this function recursively, when we want to stop at a
    428 matching internal ket rather than at the end.
    429 
    430 If the first opcode in the first alternative is OP_REVERSE, we are dealing with
    431 a backward assertion. In that case, we have to find out the maximum amount to
    432 move back, and set up each alternative appropriately. */
    433 
    434 if (*first_op == OP_REVERSE)
    435   {
    436   size_t max_back = 0;
    437   size_t gone_back;
    438 
    439   end_code = this_start_code;
    440   do
    441     {
    442     size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
    443     if (back > max_back) max_back = back;
    444     end_code += GET(end_code, 1);
    445     }
    446   while (*end_code == OP_ALT);
    447 
    448   /* If we can't go back the amount required for the longest lookbehind
    449   pattern, go back as far as we can; some alternatives may still be viable. */
    450 
    451 #ifdef SUPPORT_UNICODE
    452   /* In character mode we have to step back character by character */
    453 
    454   if (utf)
    455     {
    456     for (gone_back = 0; gone_back < max_back; gone_back++)
    457       {
    458       if (current_subject <= start_subject) break;
    459       current_subject--;
    460       ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
    461       }
    462     }
    463   else
    464 #endif
    465 
    466   /* In byte-mode we can do this quickly. */
    467 
    468     {
    469     size_t current_offset = (size_t)(current_subject - start_subject);
    470     gone_back = (current_offset < max_back)? current_offset : max_back;
    471     current_subject -= gone_back;
    472     }
    473 
    474   /* Save the earliest consulted character */
    475 
    476   if (current_subject < mb->start_used_ptr)
    477     mb->start_used_ptr = current_subject;
    478 
    479   /* Now we can process the individual branches. */
    480 
    481   end_code = this_start_code;
    482   do
    483     {
    484     size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
    485     if (back <= gone_back)
    486       {
    487       int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
    488       ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
    489       }
    490     end_code += GET(end_code, 1);
    491     }
    492   while (*end_code == OP_ALT);
    493  }
    494 
    495 /* This is the code for a "normal" subpattern (not a backward assertion). The
    496 start of a whole pattern is always one of these. If we are at the top level,
    497 we may be asked to restart matching from the same point that we reached for a
    498 previous partial match. We still have to scan through the top-level branches to
    499 find the end state. */
    500 
    501 else
    502   {
    503   end_code = this_start_code;
    504 
    505   /* Restarting */
    506 
    507   if (rlevel == 1 && (mb->moptions & PCRE2_DFA_RESTART) != 0)
    508     {
    509     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
    510     new_count = workspace[1];
    511     if (!workspace[0])
    512       memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock));
    513     }
    514 
    515   /* Not restarting */
    516 
    517   else
    518     {
    519     int length = 1 + LINK_SIZE +
    520       ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
    521         *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
    522         ? IMM2_SIZE:0);
    523     do
    524       {
    525       ADD_NEW((int)(end_code - start_code + length), 0);
    526       end_code += GET(end_code, 1);
    527       length = 1 + LINK_SIZE;
    528       }
    529     while (*end_code == OP_ALT);
    530     }
    531   }
    532 
    533 workspace[0] = 0;    /* Bit indicating which vector is current */
    534 
    535 /* Loop for scanning the subject */
    536 
    537 ptr = current_subject;
    538 for (;;)
    539   {
    540   int i, j;
    541   int clen, dlen;
    542   uint32_t c, d;
    543   int forced_fail = 0;
    544   BOOL partial_newline = FALSE;
    545   BOOL could_continue = reset_could_continue;
    546   reset_could_continue = FALSE;
    547 
    548   if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
    549 
    550   /* Make the new state list into the active state list and empty the
    551   new state list. */
    552 
    553   temp_states = active_states;
    554   active_states = new_states;
    555   new_states = temp_states;
    556   active_count = new_count;
    557   new_count = 0;
    558 
    559   workspace[0] ^= 1;              /* Remember for the restarting feature */
    560   workspace[1] = active_count;
    561 
    562   /* Set the pointers for adding new states */
    563 
    564   next_active_state = active_states + active_count;
    565   next_new_state = new_states;
    566 
    567   /* Load the current character from the subject outside the loop, as many
    568   different states may want to look at it, and we assume that at least one
    569   will. */
    570 
    571   if (ptr < end_subject)
    572     {
    573     clen = 1;        /* Number of data items in the character */
    574 #ifdef SUPPORT_UNICODE
    575     GETCHARLENTEST(c, ptr, clen);
    576 #else
    577     c = *ptr;
    578 #endif  /* SUPPORT_UNICODE */
    579     }
    580   else
    581     {
    582     clen = 0;        /* This indicates the end of the subject */
    583     c = NOTACHAR;    /* This value should never actually be used */
    584     }
    585 
    586   /* Scan up the active states and act on each one. The result of an action
    587   may be to add more states to the currently active list (e.g. on hitting a
    588   parenthesis) or it may be to put states on the new list, for considering
    589   when we move the character pointer on. */
    590 
    591   for (i = 0; i < active_count; i++)
    592     {
    593     stateblock *current_state = active_states + i;
    594     BOOL caseless = FALSE;
    595     PCRE2_SPTR code;
    596     uint32_t codevalue;
    597     int state_offset = current_state->offset;
    598     int rrc;
    599     int count;
    600 
    601     /* A negative offset is a special case meaning "hold off going to this
    602     (negated) state until the number of characters in the data field have
    603     been skipped". If the could_continue flag was passed over from a previous
    604     state, arrange for it to passed on. */
    605 
    606     if (state_offset < 0)
    607       {
    608       if (current_state->data > 0)
    609         {
    610         ADD_NEW_DATA(state_offset, current_state->count,
    611           current_state->data - 1);
    612         if (could_continue) reset_could_continue = TRUE;
    613         continue;
    614         }
    615       else
    616         {
    617         current_state->offset = state_offset = -state_offset;
    618         }
    619       }
    620 
    621     /* Check for a duplicate state with the same count, and skip if found.
    622     See the note at the head of this module about the possibility of improving
    623     performance here. */
    624 
    625     for (j = 0; j < i; j++)
    626       {
    627       if (active_states[j].offset == state_offset &&
    628           active_states[j].count == current_state->count)
    629         goto NEXT_ACTIVE_STATE;
    630       }
    631 
    632     /* The state offset is the offset to the opcode */
    633 
    634     code = start_code + state_offset;
    635     codevalue = *code;
    636 
    637     /* If this opcode inspects a character, but we are at the end of the
    638     subject, remember the fact for use when testing for a partial match. */
    639 
    640     if (clen == 0 && poptable[codevalue] != 0)
    641       could_continue = TRUE;
    642 
    643     /* If this opcode is followed by an inline character, load it. It is
    644     tempting to test for the presence of a subject character here, but that
    645     is wrong, because sometimes zero repetitions of the subject are
    646     permitted.
    647 
    648     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
    649     argument that is not a data character - but is always one byte long because
    650     the values are small. We have to take special action to deal with  \P, \p,
    651     \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
    652     these ones to new opcodes. */
    653 
    654     if (coptable[codevalue] > 0)
    655       {
    656       dlen = 1;
    657 #ifdef SUPPORT_UNICODE
    658       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
    659 #endif  /* SUPPORT_UNICODE */
    660       d = code[coptable[codevalue]];
    661       if (codevalue >= OP_TYPESTAR)
    662         {
    663         switch(d)
    664           {
    665           case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM;
    666           case OP_NOTPROP:
    667           case OP_PROP: codevalue += OP_PROP_EXTRA; break;
    668           case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
    669           case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
    670           case OP_NOT_HSPACE:
    671           case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
    672           case OP_NOT_VSPACE:
    673           case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
    674           default: break;
    675           }
    676         }
    677       }
    678     else
    679       {
    680       dlen = 0;         /* Not strictly necessary, but compilers moan */
    681       d = NOTACHAR;     /* if these variables are not set. */
    682       }
    683 
    684 
    685     /* Now process the individual opcodes */
    686 
    687     switch (codevalue)
    688       {
    689 /* ========================================================================== */
    690       /* These cases are never obeyed. This is a fudge that causes a compile-
    691       time error if the vectors coptable or poptable, which are indexed by
    692       opcode, are not the correct length. It seems to be the only way to do
    693       such a check at compile time, as the sizeof() operator does not work
    694       in the C preprocessor. */
    695 
    696       case OP_TABLE_LENGTH:
    697       case OP_TABLE_LENGTH +
    698         ((sizeof(coptable) == OP_TABLE_LENGTH) &&
    699          (sizeof(poptable) == OP_TABLE_LENGTH)):
    700       break;
    701 
    702 /* ========================================================================== */
    703       /* Reached a closing bracket. If not at the end of the pattern, carry
    704       on with the next opcode. For repeating opcodes, also add the repeat
    705       state. Note that KETRPOS will always be encountered at the end of the
    706       subpattern, because the possessive subpattern repeats are always handled
    707       using recursive calls. Thus, it never adds any new states.
    708 
    709       At the end of the (sub)pattern, unless we have an empty string and
    710       PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the
    711       start of the subject, save the match data, shifting up all previous
    712       matches so we always have the longest first. */
    713 
    714       case OP_KET:
    715       case OP_KETRMIN:
    716       case OP_KETRMAX:
    717       case OP_KETRPOS:
    718       if (code != end_code)
    719         {
    720         ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
    721         if (codevalue != OP_KET)
    722           {
    723           ADD_ACTIVE(state_offset - (int)GET(code, 1), 0);
    724           }
    725         }
    726       else
    727         {
    728         if (ptr > current_subject ||
    729             ((mb->moptions & PCRE2_NOTEMPTY) == 0 &&
    730               ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == 0 ||
    731                 current_subject > start_subject + mb->start_offset)))
    732           {
    733           if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
    734             else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
    735               match_count = 0;
    736           count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
    737           if (count > 0) memmove(offsets + 2, offsets,
    738             (size_t)count * sizeof(PCRE2_SIZE));
    739           if (offsetcount >= 2)
    740             {
    741             offsets[0] = (PCRE2_SIZE)(current_subject - start_subject);
    742             offsets[1] = (PCRE2_SIZE)(ptr - start_subject);
    743             }
    744           if ((mb->moptions & PCRE2_DFA_SHORTEST) != 0) return match_count;
    745           }
    746         }
    747       break;
    748 
    749 /* ========================================================================== */
    750       /* These opcodes add to the current list of states without looking
    751       at the current character. */
    752 
    753       /*-----------------------------------------------------------------*/
    754       case OP_ALT:
    755       do { code += GET(code, 1); } while (*code == OP_ALT);
    756       ADD_ACTIVE((int)(code - start_code), 0);
    757       break;
    758 
    759       /*-----------------------------------------------------------------*/
    760       case OP_BRA:
    761       case OP_SBRA:
    762       do
    763         {
    764         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
    765         code += GET(code, 1);
    766         }
    767       while (*code == OP_ALT);
    768       break;
    769 
    770       /*-----------------------------------------------------------------*/
    771       case OP_CBRA:
    772       case OP_SCBRA:
    773       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
    774       code += GET(code, 1);
    775       while (*code == OP_ALT)
    776         {
    777         ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
    778         code += GET(code, 1);
    779         }
    780       break;
    781 
    782       /*-----------------------------------------------------------------*/
    783       case OP_BRAZERO:
    784       case OP_BRAMINZERO:
    785       ADD_ACTIVE(state_offset + 1, 0);
    786       code += 1 + GET(code, 2);
    787       while (*code == OP_ALT) code += GET(code, 1);
    788       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
    789       break;
    790 
    791       /*-----------------------------------------------------------------*/
    792       case OP_SKIPZERO:
    793       code += 1 + GET(code, 2);
    794       while (*code == OP_ALT) code += GET(code, 1);
    795       ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
    796       break;
    797 
    798       /*-----------------------------------------------------------------*/
    799       case OP_CIRC:
    800       if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0)
    801         { ADD_ACTIVE(state_offset + 1, 0); }
    802       break;
    803 
    804       /*-----------------------------------------------------------------*/
    805       case OP_CIRCM:
    806       if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == 0) ||
    807           ((ptr != end_subject || (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != 0 )
    808             && WAS_NEWLINE(ptr)))
    809         { ADD_ACTIVE(state_offset + 1, 0); }
    810       break;
    811 
    812       /*-----------------------------------------------------------------*/
    813       case OP_EOD:
    814       if (ptr >= end_subject)
    815         {
    816         if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
    817           could_continue = TRUE;
    818         else { ADD_ACTIVE(state_offset + 1, 0); }
    819         }
    820       break;
    821 
    822       /*-----------------------------------------------------------------*/
    823       case OP_SOD:
    824       if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
    825       break;
    826 
    827       /*-----------------------------------------------------------------*/
    828       case OP_SOM:
    829       if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
    830       break;
    831 
    832 
    833 /* ========================================================================== */
    834       /* These opcodes inspect the next subject character, and sometimes
    835       the previous one as well, but do not have an argument. The variable
    836       clen contains the length of the current character and is zero if we are
    837       at the end of the subject. */
    838 
    839       /*-----------------------------------------------------------------*/
    840       case OP_ANY:
    841       if (clen > 0 && !IS_NEWLINE(ptr))
    842         {
    843         if (ptr + 1 >= mb->end_subject &&
    844             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
    845             NLBLOCK->nltype == NLTYPE_FIXED &&
    846             NLBLOCK->nllen == 2 &&
    847             c == NLBLOCK->nl[0])
    848           {
    849           could_continue = partial_newline = TRUE;
    850           }
    851         else
    852           {
    853           ADD_NEW(state_offset + 1, 0);
    854           }
    855         }
    856       break;
    857 
    858       /*-----------------------------------------------------------------*/
    859       case OP_ALLANY:
    860       if (clen > 0)
    861         { ADD_NEW(state_offset + 1, 0); }
    862       break;
    863 
    864       /*-----------------------------------------------------------------*/
    865       case OP_EODN:
    866       if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
    867         could_continue = TRUE;
    868       else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
    869         { ADD_ACTIVE(state_offset + 1, 0); }
    870       break;
    871 
    872       /*-----------------------------------------------------------------*/
    873       case OP_DOLL:
    874       if ((mb->moptions & PCRE2_NOTEOL) == 0)
    875         {
    876         if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
    877           could_continue = TRUE;
    878         else if (clen == 0 ||
    879             ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
    880                (ptr == end_subject - mb->nllen)
    881             ))
    882           { ADD_ACTIVE(state_offset + 1, 0); }
    883         else if (ptr + 1 >= mb->end_subject &&
    884                  (mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
    885                  NLBLOCK->nltype == NLTYPE_FIXED &&
    886                  NLBLOCK->nllen == 2 &&
    887                  c == NLBLOCK->nl[0])
    888           {
    889           if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
    890             {
    891             reset_could_continue = TRUE;
    892             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
    893             }
    894           else could_continue = partial_newline = TRUE;
    895           }
    896         }
    897       break;
    898 
    899       /*-----------------------------------------------------------------*/
    900       case OP_DOLLM:
    901       if ((mb->moptions & PCRE2_NOTEOL) == 0)
    902         {
    903         if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
    904           could_continue = TRUE;
    905         else if (clen == 0 ||
    906             ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
    907           { ADD_ACTIVE(state_offset + 1, 0); }
    908         else if (ptr + 1 >= mb->end_subject &&
    909                  (mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
    910                  NLBLOCK->nltype == NLTYPE_FIXED &&
    911                  NLBLOCK->nllen == 2 &&
    912                  c == NLBLOCK->nl[0])
    913           {
    914           if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
    915             {
    916             reset_could_continue = TRUE;
    917             ADD_NEW_DATA(-(state_offset + 1), 0, 1);
    918             }
    919           else could_continue = partial_newline = TRUE;
    920           }
    921         }
    922       else if (IS_NEWLINE(ptr))
    923         { ADD_ACTIVE(state_offset + 1, 0); }
    924       break;
    925 
    926       /*-----------------------------------------------------------------*/
    927 
    928       case OP_DIGIT:
    929       case OP_WHITESPACE:
    930       case OP_WORDCHAR:
    931       if (clen > 0 && c < 256 &&
    932             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
    933         { ADD_NEW(state_offset + 1, 0); }
    934       break;
    935 
    936       /*-----------------------------------------------------------------*/
    937       case OP_NOT_DIGIT:
    938       case OP_NOT_WHITESPACE:
    939       case OP_NOT_WORDCHAR:
    940       if (clen > 0 && (c >= 256 ||
    941             ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
    942         { ADD_NEW(state_offset + 1, 0); }
    943       break;
    944 
    945       /*-----------------------------------------------------------------*/
    946       case OP_WORD_BOUNDARY:
    947       case OP_NOT_WORD_BOUNDARY:
    948         {
    949         int left_word, right_word;
    950 
    951         if (ptr > start_subject)
    952           {
    953           PCRE2_SPTR temp = ptr - 1;
    954           if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
    955 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
    956           if (utf) { BACKCHAR(temp); }
    957 #endif
    958           GETCHARTEST(d, temp);
    959 #ifdef SUPPORT_UNICODE
    960           if ((mb->poptions & PCRE2_UCP) != 0)
    961             {
    962             if (d == '_') left_word = TRUE; else
    963               {
    964               uint32_t cat = UCD_CATEGORY(d);
    965               left_word = (cat == ucp_L || cat == ucp_N);
    966               }
    967             }
    968           else
    969 #endif
    970           left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
    971           }
    972         else left_word = FALSE;
    973 
    974         if (clen > 0)
    975           {
    976           if (ptr >= mb->last_used_ptr)
    977             {
    978             PCRE2_SPTR temp = ptr + 1;
    979 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
    980             if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
    981 #endif
    982             mb->last_used_ptr = temp;
    983             }
    984 #ifdef SUPPORT_UNICODE
    985           if ((mb->poptions & PCRE2_UCP) != 0)
    986             {
    987             if (c == '_') right_word = TRUE; else
    988               {
    989               uint32_t cat = UCD_CATEGORY(c);
    990               right_word = (cat == ucp_L || cat == ucp_N);
    991               }
    992             }
    993           else
    994 #endif
    995           right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
    996           }
    997         else right_word = FALSE;
    998 
    999         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
   1000           { ADD_ACTIVE(state_offset + 1, 0); }
   1001         }
   1002       break;
   1003 
   1004 
   1005       /*-----------------------------------------------------------------*/
   1006       /* Check the next character by Unicode property. We will get here only
   1007       if the support is in the binary; otherwise a compile-time error occurs.
   1008       */
   1009 
   1010 #ifdef SUPPORT_UNICODE
   1011       case OP_PROP:
   1012       case OP_NOTPROP:
   1013       if (clen > 0)
   1014         {
   1015         BOOL OK;
   1016         const uint32_t *cp;
   1017         const ucd_record * prop = GET_UCD(c);
   1018         switch(code[1])
   1019           {
   1020           case PT_ANY:
   1021           OK = TRUE;
   1022           break;
   1023 
   1024           case PT_LAMP:
   1025           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
   1026                prop->chartype == ucp_Lt;
   1027           break;
   1028 
   1029           case PT_GC:
   1030           OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
   1031           break;
   1032 
   1033           case PT_PC:
   1034           OK = prop->chartype == code[2];
   1035           break;
   1036 
   1037           case PT_SC:
   1038           OK = prop->script == code[2];
   1039           break;
   1040 
   1041           /* These are specials for combination cases. */
   1042 
   1043           case PT_ALNUM:
   1044           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1045                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
   1046           break;
   1047 
   1048           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   1049           which means that Perl space and POSIX space are now identical. PCRE
   1050           was changed at release 8.34. */
   1051 
   1052           case PT_SPACE:    /* Perl space */
   1053           case PT_PXSPACE:  /* POSIX space */
   1054           switch(c)
   1055             {
   1056             HSPACE_CASES:
   1057             VSPACE_CASES:
   1058             OK = TRUE;
   1059             break;
   1060 
   1061             default:
   1062             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
   1063             break;
   1064             }
   1065           break;
   1066 
   1067           case PT_WORD:
   1068           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1069                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   1070                c == CHAR_UNDERSCORE;
   1071           break;
   1072 
   1073           case PT_CLIST:
   1074           cp = PRIV(ucd_caseless_sets) + code[2];
   1075           for (;;)
   1076             {
   1077             if (c < *cp) { OK = FALSE; break; }
   1078             if (c == *cp++) { OK = TRUE; break; }
   1079             }
   1080           break;
   1081 
   1082           case PT_UCNC:
   1083           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   1084                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   1085                c >= 0xe000;
   1086           break;
   1087 
   1088           /* Should never occur, but keep compilers from grumbling. */
   1089 
   1090           default:
   1091           OK = codevalue != OP_PROP;
   1092           break;
   1093           }
   1094 
   1095         if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
   1096         }
   1097       break;
   1098 #endif
   1099 
   1100 
   1101 
   1102 /* ========================================================================== */
   1103       /* These opcodes likewise inspect the subject character, but have an
   1104       argument that is not a data character. It is one of these opcodes:
   1105       OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
   1106       OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
   1107 
   1108       case OP_TYPEPLUS:
   1109       case OP_TYPEMINPLUS:
   1110       case OP_TYPEPOSPLUS:
   1111       count = current_state->count;  /* Already matched */
   1112       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
   1113       if (clen > 0)
   1114         {
   1115         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
   1116             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
   1117             NLBLOCK->nltype == NLTYPE_FIXED &&
   1118             NLBLOCK->nllen == 2 &&
   1119             c == NLBLOCK->nl[0])
   1120           {
   1121           could_continue = partial_newline = TRUE;
   1122           }
   1123         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
   1124             (c < 256 &&
   1125               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
   1126               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
   1127           {
   1128           if (count > 0 && codevalue == OP_TYPEPOSPLUS)
   1129             {
   1130             active_count--;            /* Remove non-match possibility */
   1131             next_active_state--;
   1132             }
   1133           count++;
   1134           ADD_NEW(state_offset, count);
   1135           }
   1136         }
   1137       break;
   1138 
   1139       /*-----------------------------------------------------------------*/
   1140       case OP_TYPEQUERY:
   1141       case OP_TYPEMINQUERY:
   1142       case OP_TYPEPOSQUERY:
   1143       ADD_ACTIVE(state_offset + 2, 0);
   1144       if (clen > 0)
   1145         {
   1146         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
   1147             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
   1148             NLBLOCK->nltype == NLTYPE_FIXED &&
   1149             NLBLOCK->nllen == 2 &&
   1150             c == NLBLOCK->nl[0])
   1151           {
   1152           could_continue = partial_newline = TRUE;
   1153           }
   1154         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
   1155             (c < 256 &&
   1156               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
   1157               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
   1158           {
   1159           if (codevalue == OP_TYPEPOSQUERY)
   1160             {
   1161             active_count--;            /* Remove non-match possibility */
   1162             next_active_state--;
   1163             }
   1164           ADD_NEW(state_offset + 2, 0);
   1165           }
   1166         }
   1167       break;
   1168 
   1169       /*-----------------------------------------------------------------*/
   1170       case OP_TYPESTAR:
   1171       case OP_TYPEMINSTAR:
   1172       case OP_TYPEPOSSTAR:
   1173       ADD_ACTIVE(state_offset + 2, 0);
   1174       if (clen > 0)
   1175         {
   1176         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
   1177             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
   1178             NLBLOCK->nltype == NLTYPE_FIXED &&
   1179             NLBLOCK->nllen == 2 &&
   1180             c == NLBLOCK->nl[0])
   1181           {
   1182           could_continue = partial_newline = TRUE;
   1183           }
   1184         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
   1185             (c < 256 &&
   1186               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
   1187               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
   1188           {
   1189           if (codevalue == OP_TYPEPOSSTAR)
   1190             {
   1191             active_count--;            /* Remove non-match possibility */
   1192             next_active_state--;
   1193             }
   1194           ADD_NEW(state_offset, 0);
   1195           }
   1196         }
   1197       break;
   1198 
   1199       /*-----------------------------------------------------------------*/
   1200       case OP_TYPEEXACT:
   1201       count = current_state->count;  /* Number already matched */
   1202       if (clen > 0)
   1203         {
   1204         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
   1205             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
   1206             NLBLOCK->nltype == NLTYPE_FIXED &&
   1207             NLBLOCK->nllen == 2 &&
   1208             c == NLBLOCK->nl[0])
   1209           {
   1210           could_continue = partial_newline = TRUE;
   1211           }
   1212         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
   1213             (c < 256 &&
   1214               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
   1215               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
   1216           {
   1217           if (++count >= (int)GET2(code, 1))
   1218             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
   1219           else
   1220             { ADD_NEW(state_offset, count); }
   1221           }
   1222         }
   1223       break;
   1224 
   1225       /*-----------------------------------------------------------------*/
   1226       case OP_TYPEUPTO:
   1227       case OP_TYPEMINUPTO:
   1228       case OP_TYPEPOSUPTO:
   1229       ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
   1230       count = current_state->count;  /* Number already matched */
   1231       if (clen > 0)
   1232         {
   1233         if (d == OP_ANY && ptr + 1 >= mb->end_subject &&
   1234             (mb->moptions & (PCRE2_PARTIAL_HARD)) != 0 &&
   1235             NLBLOCK->nltype == NLTYPE_FIXED &&
   1236             NLBLOCK->nllen == 2 &&
   1237             c == NLBLOCK->nl[0])
   1238           {
   1239           could_continue = partial_newline = TRUE;
   1240           }
   1241         else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
   1242             (c < 256 &&
   1243               (d != OP_ANY || !IS_NEWLINE(ptr)) &&
   1244               ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
   1245           {
   1246           if (codevalue == OP_TYPEPOSUPTO)
   1247             {
   1248             active_count--;           /* Remove non-match possibility */
   1249             next_active_state--;
   1250             }
   1251           if (++count >= (int)GET2(code, 1))
   1252             { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
   1253           else
   1254             { ADD_NEW(state_offset, count); }
   1255           }
   1256         }
   1257       break;
   1258 
   1259 /* ========================================================================== */
   1260       /* These are virtual opcodes that are used when something like
   1261       OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
   1262       argument. It keeps the code above fast for the other cases. The argument
   1263       is in the d variable. */
   1264 
   1265 #ifdef SUPPORT_UNICODE
   1266       case OP_PROP_EXTRA + OP_TYPEPLUS:
   1267       case OP_PROP_EXTRA + OP_TYPEMINPLUS:
   1268       case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
   1269       count = current_state->count;           /* Already matched */
   1270       if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
   1271       if (clen > 0)
   1272         {
   1273         BOOL OK;
   1274         const uint32_t *cp;
   1275         const ucd_record * prop = GET_UCD(c);
   1276         switch(code[2])
   1277           {
   1278           case PT_ANY:
   1279           OK = TRUE;
   1280           break;
   1281 
   1282           case PT_LAMP:
   1283           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
   1284             prop->chartype == ucp_Lt;
   1285           break;
   1286 
   1287           case PT_GC:
   1288           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
   1289           break;
   1290 
   1291           case PT_PC:
   1292           OK = prop->chartype == code[3];
   1293           break;
   1294 
   1295           case PT_SC:
   1296           OK = prop->script == code[3];
   1297           break;
   1298 
   1299           /* These are specials for combination cases. */
   1300 
   1301           case PT_ALNUM:
   1302           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1303                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
   1304           break;
   1305 
   1306           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   1307           which means that Perl space and POSIX space are now identical. PCRE
   1308           was changed at release 8.34. */
   1309 
   1310           case PT_SPACE:    /* Perl space */
   1311           case PT_PXSPACE:  /* POSIX space */
   1312           switch(c)
   1313             {
   1314             HSPACE_CASES:
   1315             VSPACE_CASES:
   1316             OK = TRUE;
   1317             break;
   1318 
   1319             default:
   1320             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
   1321             break;
   1322             }
   1323           break;
   1324 
   1325           case PT_WORD:
   1326           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1327                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   1328                c == CHAR_UNDERSCORE;
   1329           break;
   1330 
   1331           case PT_CLIST:
   1332           cp = PRIV(ucd_caseless_sets) + code[3];
   1333           for (;;)
   1334             {
   1335             if (c < *cp) { OK = FALSE; break; }
   1336             if (c == *cp++) { OK = TRUE; break; }
   1337             }
   1338           break;
   1339 
   1340           case PT_UCNC:
   1341           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   1342                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   1343                c >= 0xe000;
   1344           break;
   1345 
   1346           /* Should never occur, but keep compilers from grumbling. */
   1347 
   1348           default:
   1349           OK = codevalue != OP_PROP;
   1350           break;
   1351           }
   1352 
   1353         if (OK == (d == OP_PROP))
   1354           {
   1355           if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
   1356             {
   1357             active_count--;           /* Remove non-match possibility */
   1358             next_active_state--;
   1359             }
   1360           count++;
   1361           ADD_NEW(state_offset, count);
   1362           }
   1363         }
   1364       break;
   1365 
   1366       /*-----------------------------------------------------------------*/
   1367       case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
   1368       case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
   1369       case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
   1370       count = current_state->count;  /* Already matched */
   1371       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
   1372       if (clen > 0)
   1373         {
   1374         uint32_t lgb, rgb;
   1375         PCRE2_SPTR nptr = ptr + clen;
   1376         int ncount = 0;
   1377         if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
   1378           {
   1379           active_count--;           /* Remove non-match possibility */
   1380           next_active_state--;
   1381           }
   1382         lgb = UCD_GRAPHBREAK(c);
   1383         while (nptr < end_subject)
   1384           {
   1385           dlen = 1;
   1386           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
   1387           rgb = UCD_GRAPHBREAK(d);
   1388           if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
   1389           ncount++;
   1390           lgb = rgb;
   1391           nptr += dlen;
   1392           }
   1393         count++;
   1394         ADD_NEW_DATA(-state_offset, count, ncount);
   1395         }
   1396       break;
   1397 #endif
   1398 
   1399       /*-----------------------------------------------------------------*/
   1400       case OP_ANYNL_EXTRA + OP_TYPEPLUS:
   1401       case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
   1402       case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
   1403       count = current_state->count;  /* Already matched */
   1404       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
   1405       if (clen > 0)
   1406         {
   1407         int ncount = 0;
   1408         switch (c)
   1409           {
   1410           case CHAR_VT:
   1411           case CHAR_FF:
   1412           case CHAR_NEL:
   1413 #ifndef EBCDIC
   1414           case 0x2028:
   1415           case 0x2029:
   1416 #endif  /* Not EBCDIC */
   1417           if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
   1418           goto ANYNL01;
   1419 
   1420           case CHAR_CR:
   1421           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
   1422           /* Fall through */
   1423 
   1424           ANYNL01:
   1425           case CHAR_LF:
   1426           if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
   1427             {
   1428             active_count--;           /* Remove non-match possibility */
   1429             next_active_state--;
   1430             }
   1431           count++;
   1432           ADD_NEW_DATA(-state_offset, count, ncount);
   1433           break;
   1434 
   1435           default:
   1436           break;
   1437           }
   1438         }
   1439       break;
   1440 
   1441       /*-----------------------------------------------------------------*/
   1442       case OP_VSPACE_EXTRA + OP_TYPEPLUS:
   1443       case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
   1444       case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
   1445       count = current_state->count;  /* Already matched */
   1446       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
   1447       if (clen > 0)
   1448         {
   1449         BOOL OK;
   1450         switch (c)
   1451           {
   1452           VSPACE_CASES:
   1453           OK = TRUE;
   1454           break;
   1455 
   1456           default:
   1457           OK = FALSE;
   1458           break;
   1459           }
   1460 
   1461         if (OK == (d == OP_VSPACE))
   1462           {
   1463           if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
   1464             {
   1465             active_count--;           /* Remove non-match possibility */
   1466             next_active_state--;
   1467             }
   1468           count++;
   1469           ADD_NEW_DATA(-state_offset, count, 0);
   1470           }
   1471         }
   1472       break;
   1473 
   1474       /*-----------------------------------------------------------------*/
   1475       case OP_HSPACE_EXTRA + OP_TYPEPLUS:
   1476       case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
   1477       case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
   1478       count = current_state->count;  /* Already matched */
   1479       if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
   1480       if (clen > 0)
   1481         {
   1482         BOOL OK;
   1483         switch (c)
   1484           {
   1485           HSPACE_CASES:
   1486           OK = TRUE;
   1487           break;
   1488 
   1489           default:
   1490           OK = FALSE;
   1491           break;
   1492           }
   1493 
   1494         if (OK == (d == OP_HSPACE))
   1495           {
   1496           if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
   1497             {
   1498             active_count--;           /* Remove non-match possibility */
   1499             next_active_state--;
   1500             }
   1501           count++;
   1502           ADD_NEW_DATA(-state_offset, count, 0);
   1503           }
   1504         }
   1505       break;
   1506 
   1507       /*-----------------------------------------------------------------*/
   1508 #ifdef SUPPORT_UNICODE
   1509       case OP_PROP_EXTRA + OP_TYPEQUERY:
   1510       case OP_PROP_EXTRA + OP_TYPEMINQUERY:
   1511       case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
   1512       count = 4;
   1513       goto QS1;
   1514 
   1515       case OP_PROP_EXTRA + OP_TYPESTAR:
   1516       case OP_PROP_EXTRA + OP_TYPEMINSTAR:
   1517       case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
   1518       count = 0;
   1519 
   1520       QS1:
   1521 
   1522       ADD_ACTIVE(state_offset + 4, 0);
   1523       if (clen > 0)
   1524         {
   1525         BOOL OK;
   1526         const uint32_t *cp;
   1527         const ucd_record * prop = GET_UCD(c);
   1528         switch(code[2])
   1529           {
   1530           case PT_ANY:
   1531           OK = TRUE;
   1532           break;
   1533 
   1534           case PT_LAMP:
   1535           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
   1536             prop->chartype == ucp_Lt;
   1537           break;
   1538 
   1539           case PT_GC:
   1540           OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
   1541           break;
   1542 
   1543           case PT_PC:
   1544           OK = prop->chartype == code[3];
   1545           break;
   1546 
   1547           case PT_SC:
   1548           OK = prop->script == code[3];
   1549           break;
   1550 
   1551           /* These are specials for combination cases. */
   1552 
   1553           case PT_ALNUM:
   1554           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1555                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
   1556           break;
   1557 
   1558           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   1559           which means that Perl space and POSIX space are now identical. PCRE
   1560           was changed at release 8.34. */
   1561 
   1562           case PT_SPACE:    /* Perl space */
   1563           case PT_PXSPACE:  /* POSIX space */
   1564           switch(c)
   1565             {
   1566             HSPACE_CASES:
   1567             VSPACE_CASES:
   1568             OK = TRUE;
   1569             break;
   1570 
   1571             default:
   1572             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
   1573             break;
   1574             }
   1575           break;
   1576 
   1577           case PT_WORD:
   1578           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1579                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   1580                c == CHAR_UNDERSCORE;
   1581           break;
   1582 
   1583           case PT_CLIST:
   1584           cp = PRIV(ucd_caseless_sets) + code[3];
   1585           for (;;)
   1586             {
   1587             if (c < *cp) { OK = FALSE; break; }
   1588             if (c == *cp++) { OK = TRUE; break; }
   1589             }
   1590           break;
   1591 
   1592           case PT_UCNC:
   1593           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   1594                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   1595                c >= 0xe000;
   1596           break;
   1597 
   1598           /* Should never occur, but keep compilers from grumbling. */
   1599 
   1600           default:
   1601           OK = codevalue != OP_PROP;
   1602           break;
   1603           }
   1604 
   1605         if (OK == (d == OP_PROP))
   1606           {
   1607           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
   1608               codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
   1609             {
   1610             active_count--;           /* Remove non-match possibility */
   1611             next_active_state--;
   1612             }
   1613           ADD_NEW(state_offset + count, 0);
   1614           }
   1615         }
   1616       break;
   1617 
   1618       /*-----------------------------------------------------------------*/
   1619       case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
   1620       case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
   1621       case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
   1622       count = 2;
   1623       goto QS2;
   1624 
   1625       case OP_EXTUNI_EXTRA + OP_TYPESTAR:
   1626       case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
   1627       case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
   1628       count = 0;
   1629 
   1630       QS2:
   1631 
   1632       ADD_ACTIVE(state_offset + 2, 0);
   1633       if (clen > 0)
   1634         {
   1635         uint32_t lgb, rgb;
   1636         PCRE2_SPTR nptr = ptr + clen;
   1637         int ncount = 0;
   1638         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
   1639             codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
   1640           {
   1641           active_count--;           /* Remove non-match possibility */
   1642           next_active_state--;
   1643           }
   1644         lgb = UCD_GRAPHBREAK(c);
   1645         while (nptr < end_subject)
   1646           {
   1647           dlen = 1;
   1648           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
   1649           rgb = UCD_GRAPHBREAK(d);
   1650           if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
   1651           ncount++;
   1652           lgb = rgb;
   1653           nptr += dlen;
   1654           }
   1655         ADD_NEW_DATA(-(state_offset + count), 0, ncount);
   1656         }
   1657       break;
   1658 #endif
   1659 
   1660       /*-----------------------------------------------------------------*/
   1661       case OP_ANYNL_EXTRA + OP_TYPEQUERY:
   1662       case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
   1663       case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
   1664       count = 2;
   1665       goto QS3;
   1666 
   1667       case OP_ANYNL_EXTRA + OP_TYPESTAR:
   1668       case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
   1669       case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
   1670       count = 0;
   1671 
   1672       QS3:
   1673       ADD_ACTIVE(state_offset + 2, 0);
   1674       if (clen > 0)
   1675         {
   1676         int ncount = 0;
   1677         switch (c)
   1678           {
   1679           case CHAR_VT:
   1680           case CHAR_FF:
   1681           case CHAR_NEL:
   1682 #ifndef EBCDIC
   1683           case 0x2028:
   1684           case 0x2029:
   1685 #endif  /* Not EBCDIC */
   1686           if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
   1687           goto ANYNL02;
   1688 
   1689           case CHAR_CR:
   1690           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
   1691           /* Fall through */
   1692 
   1693           ANYNL02:
   1694           case CHAR_LF:
   1695           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
   1696               codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
   1697             {
   1698             active_count--;           /* Remove non-match possibility */
   1699             next_active_state--;
   1700             }
   1701           ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
   1702           break;
   1703 
   1704           default:
   1705           break;
   1706           }
   1707         }
   1708       break;
   1709 
   1710       /*-----------------------------------------------------------------*/
   1711       case OP_VSPACE_EXTRA + OP_TYPEQUERY:
   1712       case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
   1713       case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
   1714       count = 2;
   1715       goto QS4;
   1716 
   1717       case OP_VSPACE_EXTRA + OP_TYPESTAR:
   1718       case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
   1719       case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
   1720       count = 0;
   1721 
   1722       QS4:
   1723       ADD_ACTIVE(state_offset + 2, 0);
   1724       if (clen > 0)
   1725         {
   1726         BOOL OK;
   1727         switch (c)
   1728           {
   1729           VSPACE_CASES:
   1730           OK = TRUE;
   1731           break;
   1732 
   1733           default:
   1734           OK = FALSE;
   1735           break;
   1736           }
   1737         if (OK == (d == OP_VSPACE))
   1738           {
   1739           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
   1740               codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
   1741             {
   1742             active_count--;           /* Remove non-match possibility */
   1743             next_active_state--;
   1744             }
   1745           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
   1746           }
   1747         }
   1748       break;
   1749 
   1750       /*-----------------------------------------------------------------*/
   1751       case OP_HSPACE_EXTRA + OP_TYPEQUERY:
   1752       case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
   1753       case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
   1754       count = 2;
   1755       goto QS5;
   1756 
   1757       case OP_HSPACE_EXTRA + OP_TYPESTAR:
   1758       case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
   1759       case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
   1760       count = 0;
   1761 
   1762       QS5:
   1763       ADD_ACTIVE(state_offset + 2, 0);
   1764       if (clen > 0)
   1765         {
   1766         BOOL OK;
   1767         switch (c)
   1768           {
   1769           HSPACE_CASES:
   1770           OK = TRUE;
   1771           break;
   1772 
   1773           default:
   1774           OK = FALSE;
   1775           break;
   1776           }
   1777 
   1778         if (OK == (d == OP_HSPACE))
   1779           {
   1780           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
   1781               codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
   1782             {
   1783             active_count--;           /* Remove non-match possibility */
   1784             next_active_state--;
   1785             }
   1786           ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
   1787           }
   1788         }
   1789       break;
   1790 
   1791       /*-----------------------------------------------------------------*/
   1792 #ifdef SUPPORT_UNICODE
   1793       case OP_PROP_EXTRA + OP_TYPEEXACT:
   1794       case OP_PROP_EXTRA + OP_TYPEUPTO:
   1795       case OP_PROP_EXTRA + OP_TYPEMINUPTO:
   1796       case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
   1797       if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
   1798         { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
   1799       count = current_state->count;  /* Number already matched */
   1800       if (clen > 0)
   1801         {
   1802         BOOL OK;
   1803         const uint32_t *cp;
   1804         const ucd_record * prop = GET_UCD(c);
   1805         switch(code[1 + IMM2_SIZE + 1])
   1806           {
   1807           case PT_ANY:
   1808           OK = TRUE;
   1809           break;
   1810 
   1811           case PT_LAMP:
   1812           OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
   1813             prop->chartype == ucp_Lt;
   1814           break;
   1815 
   1816           case PT_GC:
   1817           OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
   1818           break;
   1819 
   1820           case PT_PC:
   1821           OK = prop->chartype == code[1 + IMM2_SIZE + 2];
   1822           break;
   1823 
   1824           case PT_SC:
   1825           OK = prop->script == code[1 + IMM2_SIZE + 2];
   1826           break;
   1827 
   1828           /* These are specials for combination cases. */
   1829 
   1830           case PT_ALNUM:
   1831           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1832                PRIV(ucp_gentype)[prop->chartype] == ucp_N;
   1833           break;
   1834 
   1835           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
   1836           which means that Perl space and POSIX space are now identical. PCRE
   1837           was changed at release 8.34. */
   1838 
   1839           case PT_SPACE:    /* Perl space */
   1840           case PT_PXSPACE:  /* POSIX space */
   1841           switch(c)
   1842             {
   1843             HSPACE_CASES:
   1844             VSPACE_CASES:
   1845             OK = TRUE;
   1846             break;
   1847 
   1848             default:
   1849             OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
   1850             break;
   1851             }
   1852           break;
   1853 
   1854           case PT_WORD:
   1855           OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
   1856                PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
   1857                c == CHAR_UNDERSCORE;
   1858           break;
   1859 
   1860           case PT_CLIST:
   1861           cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
   1862           for (;;)
   1863             {
   1864             if (c < *cp) { OK = FALSE; break; }
   1865             if (c == *cp++) { OK = TRUE; break; }
   1866             }
   1867           break;
   1868 
   1869           case PT_UCNC:
   1870           OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
   1871                c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
   1872                c >= 0xe000;
   1873           break;
   1874 
   1875           /* Should never occur, but keep compilers from grumbling. */
   1876 
   1877           default:
   1878           OK = codevalue != OP_PROP;
   1879           break;
   1880           }
   1881 
   1882         if (OK == (d == OP_PROP))
   1883           {
   1884           if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
   1885             {
   1886             active_count--;           /* Remove non-match possibility */
   1887             next_active_state--;
   1888             }
   1889           if (++count >= (int)GET2(code, 1))
   1890             { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
   1891           else
   1892             { ADD_NEW(state_offset, count); }
   1893           }
   1894         }
   1895       break;
   1896 
   1897       /*-----------------------------------------------------------------*/
   1898       case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
   1899       case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
   1900       case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
   1901       case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
   1902       if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
   1903         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
   1904       count = current_state->count;  /* Number already matched */
   1905       if (clen > 0)
   1906         {
   1907         uint32_t lgb, rgb;
   1908         PCRE2_SPTR nptr = ptr + clen;
   1909         int ncount = 0;
   1910         if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
   1911           {
   1912           active_count--;           /* Remove non-match possibility */
   1913           next_active_state--;
   1914           }
   1915         lgb = UCD_GRAPHBREAK(c);
   1916         while (nptr < end_subject)
   1917           {
   1918           dlen = 1;
   1919           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
   1920           rgb = UCD_GRAPHBREAK(d);
   1921           if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
   1922           ncount++;
   1923           lgb = rgb;
   1924           nptr += dlen;
   1925           }
   1926         if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
   1927             reset_could_continue = TRUE;
   1928         if (++count >= (int)GET2(code, 1))
   1929           { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
   1930         else
   1931           { ADD_NEW_DATA(-state_offset, count, ncount); }
   1932         }
   1933       break;
   1934 #endif
   1935 
   1936       /*-----------------------------------------------------------------*/
   1937       case OP_ANYNL_EXTRA + OP_TYPEEXACT:
   1938       case OP_ANYNL_EXTRA + OP_TYPEUPTO:
   1939       case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
   1940       case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
   1941       if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
   1942         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
   1943       count = current_state->count;  /* Number already matched */
   1944       if (clen > 0)
   1945         {
   1946         int ncount = 0;
   1947         switch (c)
   1948           {
   1949           case CHAR_VT:
   1950           case CHAR_FF:
   1951           case CHAR_NEL:
   1952 #ifndef EBCDIC
   1953           case 0x2028:
   1954           case 0x2029:
   1955 #endif  /* Not EBCDIC */
   1956           if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
   1957           goto ANYNL03;
   1958 
   1959           case CHAR_CR:
   1960           if (ptr + 1 < end_subject && UCHAR21TEST(ptr + 1) == CHAR_LF) ncount = 1;
   1961           /* Fall through */
   1962 
   1963           ANYNL03:
   1964           case CHAR_LF:
   1965           if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
   1966             {
   1967             active_count--;           /* Remove non-match possibility */
   1968             next_active_state--;
   1969             }
   1970           if (++count >= (int)GET2(code, 1))
   1971             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
   1972           else
   1973             { ADD_NEW_DATA(-state_offset, count, ncount); }
   1974           break;
   1975 
   1976           default:
   1977           break;
   1978           }
   1979         }
   1980       break;
   1981 
   1982       /*-----------------------------------------------------------------*/
   1983       case OP_VSPACE_EXTRA + OP_TYPEEXACT:
   1984       case OP_VSPACE_EXTRA + OP_TYPEUPTO:
   1985       case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
   1986       case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
   1987       if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
   1988         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
   1989       count = current_state->count;  /* Number already matched */
   1990       if (clen > 0)
   1991         {
   1992         BOOL OK;
   1993         switch (c)
   1994           {
   1995           VSPACE_CASES:
   1996           OK = TRUE;
   1997           break;
   1998 
   1999           default:
   2000           OK = FALSE;
   2001           }
   2002 
   2003         if (OK == (d == OP_VSPACE))
   2004           {
   2005           if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
   2006             {
   2007             active_count--;           /* Remove non-match possibility */
   2008             next_active_state--;
   2009             }
   2010           if (++count >= (int)GET2(code, 1))
   2011             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
   2012           else
   2013             { ADD_NEW_DATA(-state_offset, count, 0); }
   2014           }
   2015         }
   2016       break;
   2017 
   2018       /*-----------------------------------------------------------------*/
   2019       case OP_HSPACE_EXTRA + OP_TYPEEXACT:
   2020       case OP_HSPACE_EXTRA + OP_TYPEUPTO:
   2021       case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
   2022       case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
   2023       if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
   2024         { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
   2025       count = current_state->count;  /* Number already matched */
   2026       if (clen > 0)
   2027         {
   2028         BOOL OK;
   2029         switch (c)
   2030           {
   2031           HSPACE_CASES:
   2032           OK = TRUE;
   2033           break;
   2034 
   2035           default:
   2036           OK = FALSE;
   2037           break;
   2038           }
   2039 
   2040         if (OK == (d == OP_HSPACE))
   2041           {
   2042           if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
   2043             {
   2044             active_count--;           /* Remove non-match possibility */
   2045             next_active_state--;
   2046             }
   2047           if (++count >= (int)GET2(code, 1))
   2048             { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
   2049           else
   2050             { ADD_NEW_DATA(-state_offset, count, 0); }
   2051           }
   2052         }
   2053       break;
   2054 
   2055 /* ========================================================================== */
   2056       /* These opcodes are followed by a character that is usually compared
   2057       to the current subject character; it is loaded into d. We still get
   2058       here even if there is no subject character, because in some cases zero
   2059       repetitions are permitted. */
   2060 
   2061       /*-----------------------------------------------------------------*/
   2062       case OP_CHAR:
   2063       if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
   2064       break;
   2065 
   2066       /*-----------------------------------------------------------------*/
   2067       case OP_CHARI:
   2068       if (clen == 0) break;
   2069 
   2070 #ifdef SUPPORT_UNICODE
   2071       if (utf)
   2072         {
   2073         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
   2074           {
   2075           unsigned int othercase;
   2076           if (c < 128)
   2077             othercase = fcc[c];
   2078           else
   2079             othercase = UCD_OTHERCASE(c);
   2080           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
   2081           }
   2082         }
   2083       else
   2084 #endif  /* SUPPORT_UNICODE */
   2085       /* Not UTF mode */
   2086         {
   2087         if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
   2088           { ADD_NEW(state_offset + 2, 0); }
   2089         }
   2090       break;
   2091 
   2092 
   2093 #ifdef SUPPORT_UNICODE
   2094       /*-----------------------------------------------------------------*/
   2095       /* This is a tricky one because it can match more than one character.
   2096       Find out how many characters to skip, and then set up a negative state
   2097       to wait for them to pass before continuing. */
   2098 
   2099       case OP_EXTUNI:
   2100       if (clen > 0)
   2101         {
   2102         uint32_t lgb, rgb;
   2103         PCRE2_SPTR nptr = ptr + clen;
   2104         int ncount = 0;
   2105         lgb = UCD_GRAPHBREAK(c);
   2106         while (nptr < end_subject)
   2107           {
   2108           dlen = 1;
   2109           if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
   2110           rgb = UCD_GRAPHBREAK(d);
   2111           if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
   2112           ncount++;
   2113           lgb = rgb;
   2114           nptr += dlen;
   2115           }
   2116         if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
   2117             reset_could_continue = TRUE;
   2118         ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
   2119         }
   2120       break;
   2121 #endif
   2122 
   2123       /*-----------------------------------------------------------------*/
   2124       /* This is a tricky like EXTUNI because it too can match more than one
   2125       character (when CR is followed by LF). In this case, set up a negative
   2126       state to wait for one character to pass before continuing. */
   2127 
   2128       case OP_ANYNL:
   2129       if (clen > 0) switch(c)
   2130         {
   2131         case CHAR_VT:
   2132         case CHAR_FF:
   2133         case CHAR_NEL:
   2134 #ifndef EBCDIC
   2135         case 0x2028:
   2136         case 0x2029:
   2137 #endif  /* Not EBCDIC */
   2138         if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
   2139 
   2140         case CHAR_LF:
   2141         ADD_NEW(state_offset + 1, 0);
   2142         break;
   2143 
   2144         case CHAR_CR:
   2145         if (ptr + 1 >= end_subject)
   2146           {
   2147           ADD_NEW(state_offset + 1, 0);
   2148           if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
   2149             reset_could_continue = TRUE;
   2150           }
   2151         else if (UCHAR21TEST(ptr + 1) == CHAR_LF)
   2152           {
   2153           ADD_NEW_DATA(-(state_offset + 1), 0, 1);
   2154           }
   2155         else
   2156           {
   2157           ADD_NEW(state_offset + 1, 0);
   2158           }
   2159         break;
   2160         }
   2161       break;
   2162 
   2163       /*-----------------------------------------------------------------*/
   2164       case OP_NOT_VSPACE:
   2165       if (clen > 0) switch(c)
   2166         {
   2167         VSPACE_CASES:
   2168         break;
   2169 
   2170         default:
   2171         ADD_NEW(state_offset + 1, 0);
   2172         break;
   2173         }
   2174       break;
   2175 
   2176       /*-----------------------------------------------------------------*/
   2177       case OP_VSPACE:
   2178       if (clen > 0) switch(c)
   2179         {
   2180         VSPACE_CASES:
   2181         ADD_NEW(state_offset + 1, 0);
   2182         break;
   2183 
   2184         default:
   2185         break;
   2186         }
   2187       break;
   2188 
   2189       /*-----------------------------------------------------------------*/
   2190       case OP_NOT_HSPACE:
   2191       if (clen > 0) switch(c)
   2192         {
   2193         HSPACE_CASES:
   2194         break;
   2195 
   2196         default:
   2197         ADD_NEW(state_offset + 1, 0);
   2198         break;
   2199         }
   2200       break;
   2201 
   2202       /*-----------------------------------------------------------------*/
   2203       case OP_HSPACE:
   2204       if (clen > 0) switch(c)
   2205         {
   2206         HSPACE_CASES:
   2207         ADD_NEW(state_offset + 1, 0);
   2208         break;
   2209 
   2210         default:
   2211         break;
   2212         }
   2213       break;
   2214 
   2215       /*-----------------------------------------------------------------*/
   2216       /* Match a negated single character casefully. */
   2217 
   2218       case OP_NOT:
   2219       if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
   2220       break;
   2221 
   2222       /*-----------------------------------------------------------------*/
   2223       /* Match a negated single character caselessly. */
   2224 
   2225       case OP_NOTI:
   2226       if (clen > 0)
   2227         {
   2228         unsigned int otherd;
   2229 #ifdef SUPPORT_UNICODE
   2230         if (utf && d >= 128)
   2231           otherd = UCD_OTHERCASE(d);
   2232         else
   2233 #endif  /* SUPPORT_UNICODE */
   2234         otherd = TABLE_GET(d, fcc, d);
   2235         if (c != d && c != otherd)
   2236           { ADD_NEW(state_offset + dlen + 1, 0); }
   2237         }
   2238       break;
   2239 
   2240       /*-----------------------------------------------------------------*/
   2241       case OP_PLUSI:
   2242       case OP_MINPLUSI:
   2243       case OP_POSPLUSI:
   2244       case OP_NOTPLUSI:
   2245       case OP_NOTMINPLUSI:
   2246       case OP_NOTPOSPLUSI:
   2247       caseless = TRUE;
   2248       codevalue -= OP_STARI - OP_STAR;
   2249 
   2250       /* Fall through */
   2251       case OP_PLUS:
   2252       case OP_MINPLUS:
   2253       case OP_POSPLUS:
   2254       case OP_NOTPLUS:
   2255       case OP_NOTMINPLUS:
   2256       case OP_NOTPOSPLUS:
   2257       count = current_state->count;  /* Already matched */
   2258       if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
   2259       if (clen > 0)
   2260         {
   2261         uint32_t otherd = NOTACHAR;
   2262         if (caseless)
   2263           {
   2264 #ifdef SUPPORT_UNICODE
   2265           if (utf && d >= 128)
   2266             otherd = UCD_OTHERCASE(d);
   2267           else
   2268 #endif  /* SUPPORT_UNICODE */
   2269           otherd = TABLE_GET(d, fcc, d);
   2270           }
   2271         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
   2272           {
   2273           if (count > 0 &&
   2274               (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
   2275             {
   2276             active_count--;             /* Remove non-match possibility */
   2277             next_active_state--;
   2278             }
   2279           count++;
   2280           ADD_NEW(state_offset, count);
   2281           }
   2282         }
   2283       break;
   2284 
   2285       /*-----------------------------------------------------------------*/
   2286       case OP_QUERYI:
   2287       case OP_MINQUERYI:
   2288       case OP_POSQUERYI:
   2289       case OP_NOTQUERYI:
   2290       case OP_NOTMINQUERYI:
   2291       case OP_NOTPOSQUERYI:
   2292       caseless = TRUE;
   2293       codevalue -= OP_STARI - OP_STAR;
   2294       /* Fall through */
   2295       case OP_QUERY:
   2296       case OP_MINQUERY:
   2297       case OP_POSQUERY:
   2298       case OP_NOTQUERY:
   2299       case OP_NOTMINQUERY:
   2300       case OP_NOTPOSQUERY:
   2301       ADD_ACTIVE(state_offset + dlen + 1, 0);
   2302       if (clen > 0)
   2303         {
   2304         uint32_t otherd = NOTACHAR;
   2305         if (caseless)
   2306           {
   2307 #ifdef SUPPORT_UNICODE
   2308           if (utf && d >= 128)
   2309             otherd = UCD_OTHERCASE(d);
   2310           else
   2311 #endif  /* SUPPORT_UNICODE */
   2312           otherd = TABLE_GET(d, fcc, d);
   2313           }
   2314         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
   2315           {
   2316           if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
   2317             {
   2318             active_count--;            /* Remove non-match possibility */
   2319             next_active_state--;
   2320             }
   2321           ADD_NEW(state_offset + dlen + 1, 0);
   2322           }
   2323         }
   2324       break;
   2325 
   2326       /*-----------------------------------------------------------------*/
   2327       case OP_STARI:
   2328       case OP_MINSTARI:
   2329       case OP_POSSTARI:
   2330       case OP_NOTSTARI:
   2331       case OP_NOTMINSTARI:
   2332       case OP_NOTPOSSTARI:
   2333       caseless = TRUE;
   2334       codevalue -= OP_STARI - OP_STAR;
   2335       /* Fall through */
   2336       case OP_STAR:
   2337       case OP_MINSTAR:
   2338       case OP_POSSTAR:
   2339       case OP_NOTSTAR:
   2340       case OP_NOTMINSTAR:
   2341       case OP_NOTPOSSTAR:
   2342       ADD_ACTIVE(state_offset + dlen + 1, 0);
   2343       if (clen > 0)
   2344         {
   2345         uint32_t otherd = NOTACHAR;
   2346         if (caseless)
   2347           {
   2348 #ifdef SUPPORT_UNICODE
   2349           if (utf && d >= 128)
   2350             otherd = UCD_OTHERCASE(d);
   2351           else
   2352 #endif  /* SUPPORT_UNICODE */
   2353           otherd = TABLE_GET(d, fcc, d);
   2354           }
   2355         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
   2356           {
   2357           if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
   2358             {
   2359             active_count--;            /* Remove non-match possibility */
   2360             next_active_state--;
   2361             }
   2362           ADD_NEW(state_offset, 0);
   2363           }
   2364         }
   2365       break;
   2366 
   2367       /*-----------------------------------------------------------------*/
   2368       case OP_EXACTI:
   2369       case OP_NOTEXACTI:
   2370       caseless = TRUE;
   2371       codevalue -= OP_STARI - OP_STAR;
   2372       /* Fall through */
   2373       case OP_EXACT:
   2374       case OP_NOTEXACT:
   2375       count = current_state->count;  /* Number already matched */
   2376       if (clen > 0)
   2377         {
   2378         uint32_t otherd = NOTACHAR;
   2379         if (caseless)
   2380           {
   2381 #ifdef SUPPORT_UNICODE
   2382           if (utf && d >= 128)
   2383             otherd = UCD_OTHERCASE(d);
   2384           else
   2385 #endif  /* SUPPORT_UNICODE */
   2386           otherd = TABLE_GET(d, fcc, d);
   2387           }
   2388         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
   2389           {
   2390           if (++count >= (int)GET2(code, 1))
   2391             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
   2392           else
   2393             { ADD_NEW(state_offset, count); }
   2394           }
   2395         }
   2396       break;
   2397 
   2398       /*-----------------------------------------------------------------*/
   2399       case OP_UPTOI:
   2400       case OP_MINUPTOI:
   2401       case OP_POSUPTOI:
   2402       case OP_NOTUPTOI:
   2403       case OP_NOTMINUPTOI:
   2404       case OP_NOTPOSUPTOI:
   2405       caseless = TRUE;
   2406       codevalue -= OP_STARI - OP_STAR;
   2407       /* Fall through */
   2408       case OP_UPTO:
   2409       case OP_MINUPTO:
   2410       case OP_POSUPTO:
   2411       case OP_NOTUPTO:
   2412       case OP_NOTMINUPTO:
   2413       case OP_NOTPOSUPTO:
   2414       ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
   2415       count = current_state->count;  /* Number already matched */
   2416       if (clen > 0)
   2417         {
   2418         uint32_t otherd = NOTACHAR;
   2419         if (caseless)
   2420           {
   2421 #ifdef SUPPORT_UNICODE
   2422           if (utf && d >= 128)
   2423             otherd = UCD_OTHERCASE(d);
   2424           else
   2425 #endif  /* SUPPORT_UNICODE */
   2426           otherd = TABLE_GET(d, fcc, d);
   2427           }
   2428         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
   2429           {
   2430           if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
   2431             {
   2432             active_count--;             /* Remove non-match possibility */
   2433             next_active_state--;
   2434             }
   2435           if (++count >= (int)GET2(code, 1))
   2436             { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
   2437           else
   2438             { ADD_NEW(state_offset, count); }
   2439           }
   2440         }
   2441       break;
   2442 
   2443 
   2444 /* ========================================================================== */
   2445       /* These are the class-handling opcodes */
   2446 
   2447       case OP_CLASS:
   2448       case OP_NCLASS:
   2449       case OP_XCLASS:
   2450         {
   2451         BOOL isinclass = FALSE;
   2452         int next_state_offset;
   2453         PCRE2_SPTR ecode;
   2454 
   2455         /* For a simple class, there is always just a 32-byte table, and we
   2456         can set isinclass from it. */
   2457 
   2458         if (codevalue != OP_XCLASS)
   2459           {
   2460           ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
   2461           if (clen > 0)
   2462             {
   2463             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
   2464               ((((uint8_t *)(code + 1))[c/8] & (1 << (c&7))) != 0);
   2465             }
   2466           }
   2467 
   2468         /* An extended class may have a table or a list of single characters,
   2469         ranges, or both, and it may be positive or negative. There's a
   2470         function that sorts all this out. */
   2471 
   2472         else
   2473          {
   2474          ecode = code + GET(code, 1);
   2475          if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
   2476          }
   2477 
   2478         /* At this point, isinclass is set for all kinds of class, and ecode
   2479         points to the byte after the end of the class. If there is a
   2480         quantifier, this is where it will be. */
   2481 
   2482         next_state_offset = (int)(ecode - start_code);
   2483 
   2484         switch (*ecode)
   2485           {
   2486           case OP_CRSTAR:
   2487           case OP_CRMINSTAR:
   2488           case OP_CRPOSSTAR:
   2489           ADD_ACTIVE(next_state_offset + 1, 0);
   2490           if (isinclass)
   2491             {
   2492             if (*ecode == OP_CRPOSSTAR)
   2493               {
   2494               active_count--;           /* Remove non-match possibility */
   2495               next_active_state--;
   2496               }
   2497             ADD_NEW(state_offset, 0);
   2498             }
   2499           break;
   2500 
   2501           case OP_CRPLUS:
   2502           case OP_CRMINPLUS:
   2503           case OP_CRPOSPLUS:
   2504           count = current_state->count;  /* Already matched */
   2505           if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
   2506           if (isinclass)
   2507             {
   2508             if (count > 0 && *ecode == OP_CRPOSPLUS)
   2509               {
   2510               active_count--;           /* Remove non-match possibility */
   2511               next_active_state--;
   2512               }
   2513             count++;
   2514             ADD_NEW(state_offset, count);
   2515             }
   2516           break;
   2517 
   2518           case OP_CRQUERY:
   2519           case OP_CRMINQUERY:
   2520           case OP_CRPOSQUERY:
   2521           ADD_ACTIVE(next_state_offset + 1, 0);
   2522           if (isinclass)
   2523             {
   2524             if (*ecode == OP_CRPOSQUERY)
   2525               {
   2526               active_count--;           /* Remove non-match possibility */
   2527               next_active_state--;
   2528               }
   2529             ADD_NEW(next_state_offset + 1, 0);
   2530             }
   2531           break;
   2532 
   2533           case OP_CRRANGE:
   2534           case OP_CRMINRANGE:
   2535           case OP_CRPOSRANGE:
   2536           count = current_state->count;  /* Already matched */
   2537           if (count >= (int)GET2(ecode, 1))
   2538             { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
   2539           if (isinclass)
   2540             {
   2541             int max = (int)GET2(ecode, 1 + IMM2_SIZE);
   2542             if (*ecode == OP_CRPOSRANGE)
   2543               {
   2544               active_count--;           /* Remove non-match possibility */
   2545               next_active_state--;
   2546               }
   2547             if (++count >= max && max != 0)   /* Max 0 => no limit */
   2548               { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
   2549             else
   2550               { ADD_NEW(state_offset, count); }
   2551             }
   2552           break;
   2553 
   2554           default:
   2555           if (isinclass) { ADD_NEW(next_state_offset, 0); }
   2556           break;
   2557           }
   2558         }
   2559       break;
   2560 
   2561 /* ========================================================================== */
   2562       /* These are the opcodes for fancy brackets of various kinds. We have
   2563       to use recursion in order to handle them. The "always failing" assertion
   2564       (?!) is optimised to OP_FAIL when compiling, so we have to support that,
   2565       though the other "backtracking verbs" are not supported. */
   2566 
   2567       case OP_FAIL:
   2568       forced_fail++;    /* Count FAILs for multiple states */
   2569       break;
   2570 
   2571       case OP_ASSERT:
   2572       case OP_ASSERT_NOT:
   2573       case OP_ASSERTBACK:
   2574       case OP_ASSERTBACK_NOT:
   2575         {
   2576         PCRE2_SPTR endasscode = code + GET(code, 1);
   2577         PCRE2_SIZE local_offsets[2];
   2578         int rc;
   2579         int local_workspace[1000];
   2580 
   2581         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
   2582 
   2583         rc = internal_dfa_match(
   2584           mb,                                   /* static match data */
   2585           code,                                 /* this subexpression's code */
   2586           ptr,                                  /* where we currently are */
   2587           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
   2588           local_offsets,                        /* offset vector */
   2589           sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
   2590           local_workspace,                      /* workspace vector */
   2591           sizeof(local_workspace)/sizeof(int),  /* size of same */
   2592           rlevel);                              /* function recursion level */
   2593 
   2594         if (rc == PCRE2_ERROR_DFA_UITEM) return rc;
   2595         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
   2596             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
   2597         }
   2598       break;
   2599 
   2600       /*-----------------------------------------------------------------*/
   2601       case OP_COND:
   2602       case OP_SCOND:
   2603         {
   2604         PCRE2_SIZE local_offsets[1000];
   2605         int local_workspace[1000];
   2606         int codelink = (int)GET(code, 1);
   2607         PCRE2_UCHAR condcode;
   2608 
   2609         /* Because of the way auto-callout works during compile, a callout item
   2610         is inserted between OP_COND and an assertion condition. This does not
   2611         happen for the other conditions. */
   2612 
   2613         if (code[LINK_SIZE + 1] == OP_CALLOUT
   2614             || code[LINK_SIZE + 1] == OP_CALLOUT_STR)
   2615           {
   2616           PCRE2_SIZE callout_length = (code[LINK_SIZE + 1] == OP_CALLOUT)?
   2617             (PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
   2618             (PCRE2_SIZE)GET(code, 2 + 3*LINK_SIZE);
   2619 
   2620           rrc = 0;
   2621           if (mb->callout != NULL)
   2622             {
   2623             pcre2_callout_block cb;
   2624             cb.version          = 1;
   2625             cb.capture_top      = 1;
   2626             cb.capture_last     = 0;
   2627             cb.offset_vector    = offsets;
   2628             cb.mark             = NULL;   /* No (*MARK) support */
   2629             cb.subject          = start_subject;
   2630             cb.subject_length   = (PCRE2_SIZE)(end_subject - start_subject);
   2631             cb.start_match      = (PCRE2_SIZE)(current_subject - start_subject);
   2632             cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
   2633             cb.pattern_position = GET(code, LINK_SIZE + 2);
   2634             cb.next_item_length = GET(code, LINK_SIZE + 2 + LINK_SIZE);
   2635 
   2636             if (code[LINK_SIZE + 1] == OP_CALLOUT)
   2637               {
   2638               cb.callout_number = code[2 + 3*LINK_SIZE];
   2639               cb.callout_string_offset = 0;
   2640               cb.callout_string = NULL;
   2641               cb.callout_string_length = 0;
   2642               }
   2643             else
   2644               {
   2645               cb.callout_number = 0;
   2646               cb.callout_string_offset = GET(code, 2 + 4*LINK_SIZE);
   2647               cb.callout_string = code + (2 + 5*LINK_SIZE) + 1;
   2648               cb.callout_string_length =
   2649                 callout_length - (1 + 4*LINK_SIZE) - 2;
   2650               }
   2651 
   2652             if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
   2653               return rrc;   /* Abandon */
   2654             }
   2655           if (rrc > 0) break;                      /* Fail this thread */
   2656           code += callout_length;                  /* Skip callout data */
   2657           }
   2658 
   2659         condcode = code[LINK_SIZE+1];
   2660 
   2661         /* Back reference conditions and duplicate named recursion conditions
   2662         are not supported */
   2663 
   2664         if (condcode == OP_CREF || condcode == OP_DNCREF ||
   2665             condcode == OP_DNRREF)
   2666           return PCRE2_ERROR_DFA_UCOND;
   2667 
   2668         /* The DEFINE condition is always false, and the assertion (?!) is
   2669         converted to OP_FAIL. */
   2670 
   2671         if (condcode == OP_FALSE || condcode == OP_FAIL)
   2672           { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
   2673 
   2674         /* There is also an always-true condition */
   2675 
   2676         else if (condcode == OP_TRUE)
   2677           { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
   2678 
   2679         /* The only supported version of OP_RREF is for the value RREF_ANY,
   2680         which means "test if in any recursion". We can't test for specifically
   2681         recursed groups. */
   2682 
   2683         else if (condcode == OP_RREF)
   2684           {
   2685           unsigned int value = GET2(code, LINK_SIZE + 2);
   2686           if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND;
   2687           if (mb->recursive != NULL)
   2688             { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
   2689           else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
   2690           }
   2691 
   2692         /* Otherwise, the condition is an assertion */
   2693 
   2694         else
   2695           {
   2696           int rc;
   2697           PCRE2_SPTR asscode = code + LINK_SIZE + 1;
   2698           PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
   2699 
   2700           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
   2701 
   2702           rc = internal_dfa_match(
   2703             mb,                                   /* fixed match data */
   2704             asscode,                              /* this subexpression's code */
   2705             ptr,                                  /* where we currently are */
   2706             (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
   2707             local_offsets,                        /* offset vector */
   2708             sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
   2709             local_workspace,                      /* workspace vector */
   2710             sizeof(local_workspace)/sizeof(int),  /* size of same */
   2711             rlevel);                              /* function recursion level */
   2712 
   2713           if (rc == PCRE2_ERROR_DFA_UITEM) return rc;
   2714           if ((rc >= 0) ==
   2715                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
   2716             { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
   2717           else
   2718             { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
   2719           }
   2720         }
   2721       break;
   2722 
   2723       /*-----------------------------------------------------------------*/
   2724       case OP_RECURSE:
   2725         {
   2726         dfa_recursion_info *ri;
   2727         PCRE2_SIZE local_offsets[1000];
   2728         int local_workspace[1000];
   2729         PCRE2_SPTR callpat = start_code + GET(code, 1);
   2730         uint32_t recno = (callpat == mb->start_code)? 0 :
   2731           GET2(callpat, 1 + LINK_SIZE);
   2732         int rc;
   2733 
   2734         /* Check for repeating a recursion without advancing the subject
   2735         pointer. This should catch convoluted mutual recursions. (Some simple
   2736         cases are caught at compile time.) */
   2737 
   2738         for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
   2739           if (recno == ri->group_num && ptr == ri->subject_position)
   2740             return PCRE2_ERROR_RECURSELOOP;
   2741 
   2742         /* Remember this recursion and where we started it so as to
   2743         catch infinite loops. */
   2744 
   2745         new_recursive.group_num = recno;
   2746         new_recursive.subject_position = ptr;
   2747         new_recursive.prevrec = mb->recursive;
   2748         mb->recursive = &new_recursive;
   2749 
   2750         rc = internal_dfa_match(
   2751           mb,                                   /* fixed match data */
   2752           callpat,                              /* this subexpression's code */
   2753           ptr,                                  /* where we currently are */
   2754           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
   2755           local_offsets,                        /* offset vector */
   2756           sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
   2757           local_workspace,                      /* workspace vector */
   2758           sizeof(local_workspace)/sizeof(int),  /* size of same */
   2759           rlevel);                              /* function recursion level */
   2760 
   2761         mb->recursive = new_recursive.prevrec;  /* Done this recursion */
   2762 
   2763         /* Ran out of internal offsets */
   2764 
   2765         if (rc == 0) return PCRE2_ERROR_DFA_RECURSE;
   2766 
   2767         /* For each successful matched substring, set up the next state with a
   2768         count of characters to skip before trying it. Note that the count is in
   2769         characters, not bytes. */
   2770 
   2771         if (rc > 0)
   2772           {
   2773           for (rc = rc*2 - 2; rc >= 0; rc -= 2)
   2774             {
   2775             PCRE2_SIZE charcount = local_offsets[rc+1] - local_offsets[rc];
   2776 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
   2777             if (utf)
   2778               {
   2779               PCRE2_SPTR p = start_subject + local_offsets[rc];
   2780               PCRE2_SPTR pp = start_subject + local_offsets[rc+1];
   2781               while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
   2782               }
   2783 #endif
   2784             if (charcount > 0)
   2785               {
   2786               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0,
   2787                 (int)(charcount - 1));
   2788               }
   2789             else
   2790               {
   2791               ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
   2792               }
   2793             }
   2794           }
   2795         else if (rc != PCRE2_ERROR_NOMATCH) return rc;
   2796         }
   2797       break;
   2798 
   2799       /*-----------------------------------------------------------------*/
   2800       case OP_BRAPOS:
   2801       case OP_SBRAPOS:
   2802       case OP_CBRAPOS:
   2803       case OP_SCBRAPOS:
   2804       case OP_BRAPOSZERO:
   2805         {
   2806         PCRE2_SIZE charcount, matched_count;
   2807         PCRE2_SPTR local_ptr = ptr;
   2808         BOOL allow_zero;
   2809 
   2810         if (codevalue == OP_BRAPOSZERO)
   2811           {
   2812           allow_zero = TRUE;
   2813           codevalue = *(++code);  /* Codevalue will be one of above BRAs */
   2814           }
   2815         else allow_zero = FALSE;
   2816 
   2817         /* Loop to match the subpattern as many times as possible as if it were
   2818         a complete pattern. */
   2819 
   2820         for (matched_count = 0;; matched_count++)
   2821           {
   2822           PCRE2_SIZE local_offsets[2];
   2823           int local_workspace[1000];
   2824 
   2825           int rc = internal_dfa_match(
   2826             mb,                                   /* fixed match data */
   2827             code,                                 /* this subexpression's code */
   2828             local_ptr,                            /* where we currently are */
   2829             (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
   2830             local_offsets,                        /* offset vector */
   2831             sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
   2832             local_workspace,                      /* workspace vector */
   2833             sizeof(local_workspace)/sizeof(int),  /* size of same */
   2834             rlevel);                              /* function recursion level */
   2835 
   2836           /* Failed to match */
   2837 
   2838           if (rc < 0)
   2839             {
   2840             if (rc != PCRE2_ERROR_NOMATCH) return rc;
   2841             break;
   2842             }
   2843 
   2844           /* Matched: break the loop if zero characters matched. */
   2845 
   2846           charcount = local_offsets[1] - local_offsets[0];
   2847           if (charcount == 0) break;
   2848           local_ptr += charcount;    /* Advance temporary position ptr */
   2849           }
   2850 
   2851         /* At this point we have matched the subpattern matched_count
   2852         times, and local_ptr is pointing to the character after the end of the
   2853         last match. */
   2854 
   2855         if (matched_count > 0 || allow_zero)
   2856           {
   2857           PCRE2_SPTR end_subpattern = code;
   2858           int next_state_offset;
   2859 
   2860           do { end_subpattern += GET(end_subpattern, 1); }
   2861             while (*end_subpattern == OP_ALT);
   2862           next_state_offset =
   2863             (int)(end_subpattern - start_code + LINK_SIZE + 1);
   2864 
   2865           /* Optimization: if there are no more active states, and there
   2866           are no new states yet set up, then skip over the subject string
   2867           right here, to save looping. Otherwise, set up the new state to swing
   2868           into action when the end of the matched substring is reached. */
   2869 
   2870           if (i + 1 >= active_count && new_count == 0)
   2871             {
   2872             ptr = local_ptr;
   2873             clen = 0;
   2874             ADD_NEW(next_state_offset, 0);
   2875             }
   2876           else
   2877             {
   2878             PCRE2_SPTR p = ptr;
   2879             PCRE2_SPTR pp = local_ptr;
   2880             charcount = (PCRE2_SIZE)(pp - p);
   2881 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
   2882             if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
   2883 #endif
   2884             ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
   2885             }
   2886           }
   2887         }
   2888       break;
   2889 
   2890       /*-----------------------------------------------------------------*/
   2891       case OP_ONCE:
   2892       case OP_ONCE_NC:
   2893         {
   2894         PCRE2_SIZE local_offsets[2];
   2895         int local_workspace[1000];
   2896 
   2897         int rc = internal_dfa_match(
   2898           mb,                                   /* fixed match data */
   2899           code,                                 /* this subexpression's code */
   2900           ptr,                                  /* where we currently are */
   2901           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
   2902           local_offsets,                        /* offset vector */
   2903           sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
   2904           local_workspace,                      /* workspace vector */
   2905           sizeof(local_workspace)/sizeof(int),  /* size of same */
   2906           rlevel);                              /* function recursion level */
   2907 
   2908         if (rc >= 0)
   2909           {
   2910           PCRE2_SPTR end_subpattern = code;
   2911           PCRE2_SIZE charcount = local_offsets[1] - local_offsets[0];
   2912           int next_state_offset, repeat_state_offset;
   2913 
   2914           do { end_subpattern += GET(end_subpattern, 1); }
   2915             while (*end_subpattern == OP_ALT);
   2916           next_state_offset =
   2917             (int)(end_subpattern - start_code + LINK_SIZE + 1);
   2918 
   2919           /* If the end of this subpattern is KETRMAX or KETRMIN, we must
   2920           arrange for the repeat state also to be added to the relevant list.
   2921           Calculate the offset, or set -1 for no repeat. */
   2922 
   2923           repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
   2924                                  *end_subpattern == OP_KETRMIN)?
   2925             (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
   2926 
   2927           /* If we have matched an empty string, add the next state at the
   2928           current character pointer. This is important so that the duplicate
   2929           checking kicks in, which is what breaks infinite loops that match an
   2930           empty string. */
   2931 
   2932           if (charcount == 0)
   2933             {
   2934             ADD_ACTIVE(next_state_offset, 0);
   2935             }
   2936 
   2937           /* Optimization: if there are no more active states, and there
   2938           are no new states yet set up, then skip over the subject string
   2939           right here, to save looping. Otherwise, set up the new state to swing
   2940           into action when the end of the matched substring is reached. */
   2941 
   2942           else if (i + 1 >= active_count && new_count == 0)
   2943             {
   2944             ptr += charcount;
   2945             clen = 0;
   2946             ADD_NEW(next_state_offset, 0);
   2947 
   2948             /* If we are adding a repeat state at the new character position,
   2949             we must fudge things so that it is the only current state.
   2950             Otherwise, it might be a duplicate of one we processed before, and
   2951             that would cause it to be skipped. */
   2952 
   2953             if (repeat_state_offset >= 0)
   2954               {
   2955               next_active_state = active_states;
   2956               active_count = 0;
   2957               i = -1;
   2958               ADD_ACTIVE(repeat_state_offset, 0);
   2959               }
   2960             }
   2961           else
   2962             {
   2963 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
   2964             if (utf)
   2965               {
   2966               PCRE2_SPTR p = start_subject + local_offsets[0];
   2967               PCRE2_SPTR pp = start_subject + local_offsets[1];
   2968               while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
   2969               }
   2970 #endif
   2971             ADD_NEW_DATA(-next_state_offset, 0, (int)(charcount - 1));
   2972             if (repeat_state_offset >= 0)
   2973               { ADD_NEW_DATA(-repeat_state_offset, 0, (int)(charcount - 1)); }
   2974             }
   2975           }
   2976         else if (rc != PCRE2_ERROR_NOMATCH) return rc;
   2977         }
   2978       break;
   2979 
   2980 
   2981 /* ========================================================================== */
   2982       /* Handle callouts */
   2983 
   2984       case OP_CALLOUT:
   2985       case OP_CALLOUT_STR:
   2986         {
   2987         unsigned int callout_length = (*code == OP_CALLOUT)
   2988             ? PRIV(OP_lengths)[OP_CALLOUT] : GET(code, 1 + 2*LINK_SIZE);
   2989         rrc = 0;
   2990 
   2991         if (mb->callout != NULL)
   2992           {
   2993           pcre2_callout_block cb;
   2994           cb.version          = 1;
   2995           cb.capture_top      = 1;
   2996           cb.capture_last     = 0;
   2997           cb.offset_vector    = offsets;
   2998           cb.mark             = NULL;   /* No (*MARK) support */
   2999           cb.subject          = start_subject;
   3000           cb.subject_length   = (PCRE2_SIZE)(end_subject - start_subject);
   3001           cb.start_match      = (PCRE2_SIZE)(current_subject - start_subject);
   3002           cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
   3003           cb.pattern_position = GET(code, 1);
   3004           cb.next_item_length = GET(code, 1 + LINK_SIZE);
   3005 
   3006           if (*code == OP_CALLOUT)
   3007             {
   3008             cb.callout_number = code[1 + 2*LINK_SIZE];
   3009             cb.callout_string_offset = 0;
   3010             cb.callout_string = NULL;
   3011             cb.callout_string_length = 0;
   3012             }
   3013           else
   3014             {
   3015             cb.callout_number = 0;
   3016             cb.callout_string_offset = GET(code, 1 + 3*LINK_SIZE);
   3017             cb.callout_string = code + (1 + 4*LINK_SIZE) + 1;
   3018             cb.callout_string_length =
   3019               callout_length - (1 + 4*LINK_SIZE) - 2;
   3020             }
   3021 
   3022           if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
   3023             return rrc;   /* Abandon */
   3024           }
   3025         if (rrc == 0)
   3026           { ADD_ACTIVE(state_offset + (int)callout_length, 0); }
   3027         }
   3028       break;
   3029 
   3030 
   3031 /* ========================================================================== */
   3032       default:        /* Unsupported opcode */
   3033       return PCRE2_ERROR_DFA_UITEM;
   3034       }
   3035 
   3036     NEXT_ACTIVE_STATE: continue;
   3037 
   3038     }      /* End of loop scanning active states */
   3039 
   3040   /* We have finished the processing at the current subject character. If no
   3041   new states have been set for the next character, we have found all the
   3042   matches that we are going to find. If we are at the top level and partial
   3043   matching has been requested, check for appropriate conditions.
   3044 
   3045   The "forced_ fail" variable counts the number of (*F) encountered for the
   3046   character. If it is equal to the original active_count (saved in
   3047   workspace[1]) it means that (*F) was found on every active state. In this
   3048   case we don't want to give a partial match.
   3049 
   3050   The "could_continue" variable is true if a state could have continued but
   3051   for the fact that the end of the subject was reached. */
   3052 
   3053   if (new_count <= 0)
   3054     {
   3055     if (rlevel == 1 &&                               /* Top level, and */
   3056         could_continue &&                            /* Some could go on, and */
   3057         forced_fail != workspace[1] &&               /* Not all forced fail & */
   3058         (                                            /* either... */
   3059         (mb->moptions & PCRE2_PARTIAL_HARD) != 0      /* Hard partial */
   3060         ||                                           /* or... */
   3061         ((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
   3062          match_count < 0)                            /* no matches */
   3063         ) &&                                         /* And... */
   3064         (
   3065         partial_newline ||                           /* Either partial NL */
   3066           (                                          /* or ... */
   3067           ptr >= end_subject &&                /* End of subject and */
   3068           ptr > mb->start_used_ptr)            /* Inspected non-empty string */
   3069           )
   3070         )
   3071       match_count = PCRE2_ERROR_PARTIAL;
   3072     break;        /* In effect, "return", but see the comment below */
   3073     }
   3074 
   3075   /* One or more states are active for the next character. */
   3076 
   3077   ptr += clen;    /* Advance to next subject character */
   3078   }               /* Loop to move along the subject string */
   3079 
   3080 /* Control gets here from "break" a few lines above. We do it this way because
   3081 if we use "return" above, we have compiler trouble. Some compilers warn if
   3082 there's nothing here because they think the function doesn't return a value. On
   3083 the other hand, if we put a dummy statement here, some more clever compilers
   3084 complain that it can't be reached. Sigh. */
   3085 
   3086 return match_count;
   3087 }
   3088 
   3089 
   3090 
   3091 /*************************************************
   3092 *     Match a pattern using the DFA algorithm    *
   3093 *************************************************/
   3094 
   3095 /* This function matches a compiled pattern to a subject string, using the
   3096 alternate matching algorithm that finds all matches at once.
   3097 
   3098 Arguments:
   3099   code          points to the compiled pattern
   3100   subject       subject string
   3101   length        length of subject string
   3102   startoffset   where to start matching in the subject
   3103   options       option bits
   3104   match_data    points to a match data structure
   3105   gcontext      points to a match context
   3106   workspace     pointer to workspace
   3107   wscount       size of workspace
   3108 
   3109 Returns:        > 0 => number of match offset pairs placed in offsets
   3110                 = 0 => offsets overflowed; longest matches are present
   3111                  -1 => failed to match
   3112                < -1 => some kind of unexpected problem
   3113 */
   3114 
   3115 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
   3116 pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
   3117   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
   3118   pcre2_match_context *mcontext, int *workspace, size_t wscount)
   3119 {
   3120 const pcre2_real_code *re = (const pcre2_real_code *)code;
   3121 
   3122 PCRE2_SPTR start_match;
   3123 PCRE2_SPTR end_subject;
   3124 PCRE2_SPTR bumpalong_limit;
   3125 PCRE2_SPTR req_cu_ptr;
   3126 
   3127 BOOL utf, anchored, startline, firstline;
   3128 
   3129 BOOL has_first_cu = FALSE;
   3130 BOOL has_req_cu = FALSE;
   3131 PCRE2_UCHAR first_cu = 0;
   3132 PCRE2_UCHAR first_cu2 = 0;
   3133 PCRE2_UCHAR req_cu = 0;
   3134 PCRE2_UCHAR req_cu2 = 0;
   3135 
   3136 const uint8_t *start_bits = NULL;
   3137 
   3138 /* We need to have mb pointing to a match block, because the IS_NEWLINE macro
   3139 is used below, and it expects NLBLOCK to be defined as a pointer. */
   3140 
   3141 dfa_match_block actual_match_block;
   3142 dfa_match_block *mb = &actual_match_block;
   3143 
   3144 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
   3145 subject string. */
   3146 
   3147 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
   3148 
   3149 /* Plausibility checks */
   3150 
   3151 if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
   3152 if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
   3153   return PCRE2_ERROR_NULL;
   3154 if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
   3155 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
   3156 
   3157 /* Check that the first field in the block is the magic number. If it is not,
   3158 return with PCRE2_ERROR_BADMAGIC. */
   3159 
   3160 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
   3161 
   3162 /* Check the code unit width. */
   3163 
   3164 if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
   3165   return PCRE2_ERROR_BADMODE;
   3166 
   3167 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
   3168 options variable for this function. Users of PCRE2 who are not calling the
   3169 function directly would like to have a way of setting these flags, in the same
   3170 way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
   3171 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
   3172 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
   3173 transferred to the options for this function. The bits are guaranteed to be
   3174 adjacent, but do not have the same values. This bit of Boolean trickery assumes
   3175 that the match-time bits are not more significant than the flag bits. If by
   3176 accident this is not the case, a compile-time division by zero error will
   3177 occur. */
   3178 
   3179 #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
   3180 #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
   3181 options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
   3182 #undef FF
   3183 #undef OO
   3184 
   3185 /* If restarting after a partial match, do some sanity checks on the contents
   3186 of the workspace. */
   3187 
   3188 if ((options & PCRE2_DFA_RESTART) != 0)
   3189   {
   3190   if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
   3191     workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK))
   3192       return PCRE2_ERROR_DFA_BADRESTART;
   3193   }
   3194 
   3195 /* Set some local values */
   3196 
   3197 utf = (re->overall_options & PCRE2_UTF) != 0;
   3198 start_match = subject + start_offset;
   3199 end_subject = subject + length;
   3200 req_cu_ptr = start_match - 1;
   3201 anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
   3202   (re->overall_options & PCRE2_ANCHORED) != 0;
   3203 
   3204 /* The "must be at the start of a line" flags are used in a loop when finding
   3205 where to start. */
   3206 
   3207 startline = (re->flags & PCRE2_STARTLINE) != 0;
   3208 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
   3209 bumpalong_limit = end_subject;
   3210 
   3211 /* Get data from the match context, if present, and fill in the fields in the
   3212 match block. It is an error to set an offset limit without setting the flag at
   3213 compile time. */
   3214 
   3215 if (mcontext == NULL)
   3216   {
   3217   mb->callout = NULL;
   3218   mb->memctl = re->memctl;
   3219   }
   3220 else
   3221   {
   3222   if (mcontext->offset_limit != PCRE2_UNSET)
   3223     {
   3224     if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
   3225       return PCRE2_ERROR_BADOFFSETLIMIT;
   3226     bumpalong_limit = subject + mcontext->offset_limit;
   3227     }
   3228   mb->callout = mcontext->callout;
   3229   mb->callout_data = mcontext->callout_data;
   3230   mb->memctl = mcontext->memctl;
   3231   }
   3232 
   3233 mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
   3234   re->name_count * re->name_entry_size;
   3235 mb->tables = re->tables;
   3236 mb->start_subject = subject;
   3237 mb->end_subject = end_subject;
   3238 mb->start_offset = start_offset;
   3239 mb->moptions = options;
   3240 mb->poptions = re->overall_options;
   3241 
   3242 /* Process the \R and newline settings. */
   3243 
   3244 mb->bsr_convention = re->bsr_convention;
   3245 mb->nltype = NLTYPE_FIXED;
   3246 switch(re->newline_convention)
   3247   {
   3248   case PCRE2_NEWLINE_CR:
   3249   mb->nllen = 1;
   3250   mb->nl[0] = CHAR_CR;
   3251   break;
   3252 
   3253   case PCRE2_NEWLINE_LF:
   3254   mb->nllen = 1;
   3255   mb->nl[0] = CHAR_NL;
   3256   break;
   3257 
   3258   case PCRE2_NEWLINE_CRLF:
   3259   mb->nllen = 2;
   3260   mb->nl[0] = CHAR_CR;
   3261   mb->nl[1] = CHAR_NL;
   3262   break;
   3263 
   3264   case PCRE2_NEWLINE_ANY:
   3265   mb->nltype = NLTYPE_ANY;
   3266   break;
   3267 
   3268   case PCRE2_NEWLINE_ANYCRLF:
   3269   mb->nltype = NLTYPE_ANYCRLF;
   3270   break;
   3271 
   3272   default: return PCRE2_ERROR_INTERNAL;
   3273   }
   3274 
   3275 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
   3276 we must also check that a starting offset does not point into the middle of a
   3277 multiunit character. We check only the portion of the subject that is going to
   3278 be inspected during matching - from the offset minus the maximum back reference
   3279 to the given length. This saves time when a small part of a large subject is
   3280 being matched by the use of a starting offset. Note that the maximum lookbehind
   3281 is a number of characters, not code units. */
   3282 
   3283 #ifdef SUPPORT_UNICODE
   3284 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
   3285   {
   3286   PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
   3287 
   3288   if (start_offset > 0)
   3289     {
   3290 #if PCRE2_CODE_UNIT_WIDTH != 32
   3291     unsigned int i;
   3292     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
   3293       return PCRE2_ERROR_BADUTFOFFSET;
   3294     for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
   3295       {
   3296       check_subject--;
   3297       while (check_subject > subject &&
   3298 #if PCRE2_CODE_UNIT_WIDTH == 8
   3299       (*check_subject & 0xc0) == 0x80)
   3300 #else  /* 16-bit */
   3301       (*check_subject & 0xfc00) == 0xdc00)
   3302 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
   3303         check_subject--;
   3304       }
   3305 #else   /* In the 32-bit library, one code unit equals one character. */
   3306     check_subject -= re->max_lookbehind;
   3307     if (check_subject < subject) check_subject = subject;
   3308 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
   3309     }
   3310 
   3311   /* Validate the relevant portion of the subject. After an error, adjust the
   3312   offset to be an absolute offset in the whole string. */
   3313 
   3314   match_data->rc = PRIV(valid_utf)(check_subject,
   3315     length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar));
   3316   if (match_data->rc != 0)
   3317     {
   3318     match_data->startchar += (PCRE2_SIZE)(check_subject - subject);
   3319     return match_data->rc;
   3320     }
   3321   }
   3322 #endif  /* SUPPORT_UNICODE */
   3323 
   3324 /* Set up the first code unit to match, if available. The first_codeunit value
   3325 is never set for an anchored regular expression, but the anchoring may be
   3326 forced at run time, so we have to test for anchoring. The first code unit may
   3327 be unset for an unanchored pattern, of course. If there's no first code unit
   3328 there may be a bitmap of possible first characters. */
   3329 
   3330 if (!anchored)
   3331   {
   3332   if ((re->flags & PCRE2_FIRSTSET) != 0)
   3333     {
   3334     has_first_cu = TRUE;
   3335     first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
   3336     if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
   3337       {
   3338       first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
   3339 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
   3340       if (utf && first_cu > 127)
   3341         first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
   3342 #endif
   3343       }
   3344     }
   3345   else
   3346     if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
   3347       start_bits = re->start_bitmap;
   3348   }
   3349 
   3350 /* For anchored or unanchored matches, there may be a "last known required
   3351 character" set. */
   3352 
   3353 if ((re->flags & PCRE2_LASTSET) != 0)
   3354   {
   3355   has_req_cu = TRUE;
   3356   req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
   3357   if ((re->flags & PCRE2_LASTCASELESS) != 0)
   3358     {
   3359     req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
   3360 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
   3361     if (utf && req_cu > 127) req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
   3362 #endif
   3363     }
   3364   }
   3365 
   3366 /* Fill in fields that are always returned in the match data. */
   3367 
   3368 match_data->code = re;
   3369 match_data->subject = subject;
   3370 match_data->mark = NULL;
   3371 match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
   3372 
   3373 /* Call the main matching function, looping for a non-anchored regex after a
   3374 failed match. If not restarting, perform certain optimizations at the start of
   3375 a match. */
   3376 
   3377 for (;;)
   3378   {
   3379   int rc;
   3380 
   3381   /* ----------------- Start of match optimizations ---------------- */
   3382 
   3383   /* There are some optimizations that avoid running the match if a known
   3384   starting point is not found, or if a known later code unit is not present.
   3385   However, there is an option (settable at compile time) that disables
   3386   these, for testing and for ensuring that all callouts do actually occur.
   3387   The optimizations must also be avoided when restarting a DFA match. */
   3388 
   3389   if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
   3390       (options & PCRE2_DFA_RESTART) == 0)
   3391     {
   3392     PCRE2_SPTR save_end_subject = end_subject;
   3393 
   3394     /* If firstline is TRUE, the start of the match is constrained to the first
   3395     line of a multiline string. That is, the match must be before or at the
   3396     first newline. Implement this by temporarily adjusting end_subject so that
   3397     we stop the optimization scans at a newline. If the match fails at the
   3398     newline, later code breaks this loop. */
   3399 
   3400     if (firstline)
   3401       {
   3402       PCRE2_SPTR t = start_match;
   3403 #ifdef SUPPORT_UNICODE
   3404       if (utf)
   3405         {
   3406         while (t < mb->end_subject && !IS_NEWLINE(t))
   3407           {
   3408           t++;
   3409           ACROSSCHAR(t < end_subject, *t, t++);
   3410           }
   3411         }
   3412       else
   3413 #endif
   3414       while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
   3415       end_subject = t;
   3416       }
   3417 
   3418     /* Advance to a unique first code unit if there is one. */
   3419 
   3420     if (has_first_cu)
   3421       {
   3422       PCRE2_UCHAR smc;
   3423       if (first_cu != first_cu2)
   3424         while (start_match < end_subject &&
   3425           (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
   3426           start_match++;
   3427       else
   3428         while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
   3429           start_match++;
   3430       }
   3431 
   3432     /* Or to just after a linebreak for a multiline match */
   3433 
   3434     else if (startline)
   3435       {
   3436       if (start_match > mb->start_subject + start_offset)
   3437         {
   3438 #ifdef SUPPORT_UNICODE
   3439         if (utf)
   3440           {
   3441           while (start_match < end_subject && !WAS_NEWLINE(start_match))
   3442             {
   3443             start_match++;
   3444             ACROSSCHAR(start_match < end_subject, *start_match,
   3445               start_match++);
   3446             }
   3447           }
   3448         else
   3449 #endif
   3450         while (start_match < end_subject && !WAS_NEWLINE(start_match))
   3451           start_match++;
   3452 
   3453         /* If we have just passed a CR and the newline option is ANY or
   3454         ANYCRLF, and we are now at a LF, advance the match position by one more
   3455         code unit. */
   3456 
   3457         if (start_match[-1] == CHAR_CR &&
   3458              (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
   3459              start_match < end_subject &&
   3460              UCHAR21TEST(start_match) == CHAR_NL)
   3461           start_match++;
   3462         }
   3463       }
   3464 
   3465     /* Or to a non-unique first code unit if any have been identified. The
   3466     bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all
   3467     code units greater than 254 set the 255 bit. */
   3468 
   3469     else if (start_bits != NULL)
   3470       {
   3471       while (start_match < end_subject)
   3472         {
   3473         register uint32_t c = UCHAR21TEST(start_match);
   3474 #if PCRE2_CODE_UNIT_WIDTH != 8
   3475         if (c > 255) c = 255;
   3476 #endif
   3477         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
   3478         start_match++;
   3479         }
   3480       }
   3481 
   3482     /* Restore fudged end_subject */
   3483 
   3484     end_subject = save_end_subject;
   3485 
   3486     /* The following two optimizations are disabled for partial matching. */
   3487 
   3488     if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
   3489       {
   3490       /* The minimum matching length is a lower bound; no actual string of that
   3491       length may actually match the pattern. Although the value is, strictly,
   3492       in characters, we treat it as code units to avoid spending too much time
   3493       in this optimization. */
   3494 
   3495       if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH;
   3496 
   3497       /* If req_cu is set, we know that that code unit must appear in the
   3498       subject for the match to succeed. If the first code unit is set, req_cu
   3499       must be later in the subject; otherwise the test starts at the match
   3500       point. This optimization can save a huge amount of backtracking in
   3501       patterns with nested unlimited repeats that aren't going to match.
   3502       Writing separate code for cased/caseless versions makes it go faster, as
   3503       does using an autoincrement and backing off on a match.
   3504 
   3505       HOWEVER: when the subject string is very, very long, searching to its end
   3506       can take a long time, and give bad performance on quite ordinary
   3507       patterns. This showed up when somebody was matching something like
   3508       /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
   3509       sufficiently long. */
   3510 
   3511       if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
   3512         {
   3513         register PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
   3514 
   3515         /* We don't need to repeat the search if we haven't yet reached the
   3516         place we found it at last time. */
   3517 
   3518         if (p > req_cu_ptr)
   3519           {
   3520           if (req_cu != req_cu2)
   3521             {
   3522             while (p < end_subject)
   3523               {
   3524               register uint32_t pp = UCHAR21INCTEST(p);
   3525               if (pp == req_cu || pp == req_cu2) { p--; break; }
   3526               }
   3527             }
   3528           else
   3529             {
   3530             while (p < end_subject)
   3531               {
   3532               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
   3533               }
   3534             }
   3535 
   3536           /* If we can't find the required code unit, break the matching loop,
   3537           forcing a match failure. */
   3538 
   3539           if (p >= end_subject) break;
   3540 
   3541           /* If we have found the required code unit, save the point where we
   3542           found it, so that we don't search again next time round the loop if
   3543           the start hasn't passed this code unit yet. */
   3544 
   3545           req_cu_ptr = p;
   3546           }
   3547         }
   3548       }
   3549     }
   3550 
   3551   /* ------------ End of start of match optimizations ------------ */
   3552 
   3553   /* Give no match if we have passed the bumpalong limit. */
   3554 
   3555   if (start_match > bumpalong_limit) break;
   3556 
   3557   /* OK, now we can do the business */
   3558 
   3559   mb->start_used_ptr = start_match;
   3560   mb->last_used_ptr = start_match;
   3561   mb->recursive = NULL;
   3562 
   3563   rc = internal_dfa_match(
   3564     mb,                           /* fixed match data */
   3565     mb->start_code,               /* this subexpression's code */
   3566     start_match,                  /* where we currently are */
   3567     start_offset,                 /* start offset in subject */
   3568     match_data->ovector,          /* offset vector */
   3569     (uint32_t)match_data->oveccount * 2,  /* actual size of same */
   3570     workspace,                    /* workspace vector */
   3571     (int)wscount,                 /* size of same */
   3572     0);                           /* function recurse level */
   3573 
   3574   /* Anything other than "no match" means we are done, always; otherwise, carry
   3575   on only if not anchored. */
   3576 
   3577   if (rc != PCRE2_ERROR_NOMATCH || anchored)
   3578     {
   3579     if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
   3580       {
   3581       match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
   3582       match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
   3583       }
   3584     match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
   3585     match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
   3586     match_data->startchar = (PCRE2_SIZE)(start_match - subject);
   3587     match_data->rc = rc;
   3588     return rc;
   3589     }
   3590 
   3591   /* Advance to the next subject character unless we are at the end of a line
   3592   and firstline is set. */
   3593 
   3594   if (firstline && IS_NEWLINE(start_match)) break;
   3595   start_match++;
   3596 #ifdef SUPPORT_UNICODE
   3597   if (utf)
   3598     {
   3599     ACROSSCHAR(start_match < end_subject, *start_match,
   3600       start_match++);
   3601     }
   3602 #endif
   3603   if (start_match > end_subject) break;
   3604 
   3605   /* If we have just passed a CR and we are now at a LF, and the pattern does
   3606   not contain any explicit matches for \r or \n, and the newline option is CRLF
   3607   or ANY or ANYCRLF, advance the match position by one more character. */
   3608 
   3609   if (UCHAR21TEST(start_match - 1) == CHAR_CR &&
   3610       start_match < end_subject &&
   3611       UCHAR21TEST(start_match) == CHAR_NL &&
   3612       (re->flags & PCRE2_HASCRORLF) == 0 &&
   3613         (mb->nltype == NLTYPE_ANY ||
   3614          mb->nltype == NLTYPE_ANYCRLF ||
   3615          mb->nllen == 2))
   3616     start_match++;
   3617 
   3618   }   /* "Bumpalong" loop */
   3619 
   3620 
   3621 return PCRE2_ERROR_NOMATCH;
   3622 }
   3623 
   3624 /* End of pcre2_dfa_match.c */
   3625