Home | History | Annotate | Download | only in pcre
      1 /* This is JavaScriptCore's variant of the PCRE library. While this library
      2 started out as a copy of PCRE, many of the features of PCRE have been
      3 removed. This library now supports only the regular expression features
      4 required by the JavaScript language specification, and has only the functions
      5 needed by JavaScriptCore and the rest of WebKit.
      6 
      7                  Originally written by Philip Hazel
      8            Copyright (c) 1997-2006 University of Cambridge
      9     Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
     10     Copyright (C) 2007 Eric Seidel <eric (at) webkit.org>
     11 
     12 -----------------------------------------------------------------------------
     13 Redistribution and use in source and binary forms, with or without
     14 modification, are permitted provided that the following conditions are met:
     15 
     16     * Redistributions of source code must retain the above copyright notice,
     17       this list of conditions and the following disclaimer.
     18 
     19     * Redistributions in binary form must reproduce the above copyright
     20       notice, this list of conditions and the following disclaimer in the
     21       documentation and/or other materials provided with the distribution.
     22 
     23     * Neither the name of the University of Cambridge nor the names of its
     24       contributors may be used to endorse or promote products derived from
     25       this software without specific prior written permission.
     26 
     27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37 POSSIBILITY OF SUCH DAMAGE.
     38 -----------------------------------------------------------------------------
     39 */
     40 
     41 /* This module contains jsRegExpExecute(), the externally visible function
     42 that does pattern matching using an NFA algorithm, following the rules from
     43 the JavaScript specification. There are also some supporting functions. */
     44 
     45 #include "config.h"
     46 #include "pcre_internal.h"
     47 
     48 #include <limits.h>
     49 #include <wtf/ASCIICType.h>
     50 #include <wtf/Vector.h>
     51 
     52 #if REGEXP_HISTOGRAM
     53 #include <wtf/DateMath.h>
     54 #include <runtime/UString.h>
     55 #endif
     56 
     57 using namespace WTF;
     58 
     59 #if COMPILER(GCC)
     60 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
     61 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
     62 #endif
     63 
     64 /* Avoid warnings on Windows. */
     65 #undef min
     66 #undef max
     67 
     68 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
     69 typedef int ReturnLocation;
     70 #else
     71 typedef void* ReturnLocation;
     72 #endif
     73 
     74 #if !REGEXP_HISTOGRAM
     75 
     76 class HistogramTimeLogger {
     77 public:
     78     HistogramTimeLogger(const JSRegExp*) { }
     79 };
     80 
     81 #else
     82 
     83 using namespace JSC;
     84 
     85 class Histogram {
     86 public:
     87     ~Histogram();
     88     void add(const JSRegExp*, double);
     89 
     90 private:
     91     typedef HashMap<RefPtr<UString::Rep>, double> Map;
     92     Map times;
     93 };
     94 
     95 class HistogramTimeLogger {
     96 public:
     97     HistogramTimeLogger(const JSRegExp*);
     98     ~HistogramTimeLogger();
     99 
    100 private:
    101     const JSRegExp* m_re;
    102     double m_startTime;
    103 };
    104 
    105 #endif
    106 
    107 /* Structure for building a chain of data for holding the values of
    108 the subject pointer at the start of each bracket, used to detect when
    109 an empty string has been matched by a bracket to break infinite loops. */
    110 struct BracketChainNode {
    111     BracketChainNode* previousBracket;
    112     const UChar* bracketStart;
    113 };
    114 
    115 struct MatchFrame : FastAllocBase {
    116     ReturnLocation returnLocation;
    117     struct MatchFrame* previousFrame;
    118 
    119     /* Function arguments that may change */
    120     struct {
    121         const UChar* subjectPtr;
    122         const unsigned char* instructionPtr;
    123         int offsetTop;
    124         BracketChainNode* bracketChain;
    125     } args;
    126 
    127 
    128     /* PCRE uses "fake" recursion built off of gotos, thus
    129      stack-based local variables are not safe to use.  Instead we have to
    130      store local variables on the current MatchFrame. */
    131     struct {
    132         const unsigned char* data;
    133         const unsigned char* startOfRepeatingBracket;
    134         const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
    135         const unsigned char* instructionPtrAtStartOfOnce;
    136 
    137         int repeatOthercase;
    138 
    139         int ctype;
    140         int fc;
    141         int fi;
    142         int length;
    143         int max;
    144         int number;
    145         int offset;
    146         int saveOffset1;
    147         int saveOffset2;
    148         int saveOffset3;
    149 
    150         BracketChainNode bracketChainNode;
    151     } locals;
    152 };
    153 
    154 /* Structure for passing "static" information around between the functions
    155 doing traditional NFA matching, so that they are thread-safe. */
    156 
    157 struct MatchData {
    158   int*   offsetVector;         /* Offset vector */
    159   int    offsetEnd;            /* One past the end */
    160   int    offsetMax;            /* The maximum usable for return data */
    161   bool   offsetOverflow;       /* Set if too many extractions */
    162   const UChar*  startSubject;         /* Start of the subject string */
    163   const UChar*  endSubject;           /* End of the subject string */
    164   const UChar*  endMatchPtr;         /* Subject position at end match */
    165   int    endOffsetTop;        /* Highwater mark at end of match */
    166   bool   multiline;
    167   bool   ignoreCase;
    168 };
    169 
    170 /* The maximum remaining length of subject we are prepared to search for a
    171 reqByte match. */
    172 
    173 #define REQ_BYTE_MAX 1000
    174 
    175 /* The below limit restricts the number of "recursive" match calls in order to
    176 avoid spending exponential time on complex regular expressions. */
    177 
    178 static const unsigned matchLimit = 1000000;
    179 
    180 #ifdef DEBUG
    181 /*************************************************
    182 *        Debugging function to print chars       *
    183 *************************************************/
    184 
    185 /* Print a sequence of chars in printable format, stopping at the end of the
    186 subject if the requested.
    187 
    188 Arguments:
    189   p           points to characters
    190   length      number to print
    191   isSubject  true if printing from within md.startSubject
    192   md          pointer to matching data block, if isSubject is true
    193 */
    194 
    195 static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
    196 {
    197     if (isSubject && length > md.endSubject - p)
    198         length = md.endSubject - p;
    199     while (length-- > 0) {
    200         int c;
    201         if (isprint(c = *(p++)))
    202             printf("%c", c);
    203         else if (c < 256)
    204             printf("\\x%02x", c);
    205         else
    206             printf("\\x{%x}", c);
    207     }
    208 }
    209 #endif
    210 
    211 /*************************************************
    212 *          Match a back-reference                *
    213 *************************************************/
    214 
    215 /* If a back reference hasn't been set, the length that is passed is greater
    216 than the number of characters left in the string, so the match fails.
    217 
    218 Arguments:
    219   offset      index into the offset vector
    220   subjectPtr        points into the subject
    221   length      length to be matched
    222   md          points to match data block
    223 
    224 Returns:      true if matched
    225 */
    226 
    227 static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
    228 {
    229     const UChar* p = md.startSubject + md.offsetVector[offset];
    230 
    231 #ifdef DEBUG
    232     if (subjectPtr >= md.endSubject)
    233         printf("matching subject <null>");
    234     else {
    235         printf("matching subject ");
    236         pchars(subjectPtr, length, true, md);
    237     }
    238     printf(" against backref ");
    239     pchars(p, length, false, md);
    240     printf("\n");
    241 #endif
    242 
    243     /* Always fail if not enough characters left */
    244 
    245     if (length > md.endSubject - subjectPtr)
    246         return false;
    247 
    248     /* Separate the caselesss case for speed */
    249 
    250     if (md.ignoreCase) {
    251         while (length-- > 0) {
    252             UChar c = *p++;
    253             int othercase = jsc_pcre_ucp_othercase(c);
    254             UChar d = *subjectPtr++;
    255             if (c != d && othercase != d)
    256                 return false;
    257         }
    258     }
    259     else {
    260         while (length-- > 0)
    261             if (*p++ != *subjectPtr++)
    262                 return false;
    263     }
    264 
    265     return true;
    266 }
    267 
    268 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
    269 
    270 /* Use numbered labels and switch statement at the bottom of the match function. */
    271 
    272 #define RMATCH_WHERE(num) num
    273 #define RRETURN_LABEL RRETURN_SWITCH
    274 
    275 #else
    276 
    277 /* Use GCC's computed goto extension. */
    278 
    279 /* For one test case this is more than 40% faster than the switch statement.
    280 We could avoid the use of the num argument entirely by using local labels,
    281 but using it for the GCC case as well as the non-GCC case allows us to share
    282 a bit more code and notice if we use conflicting numbers.*/
    283 
    284 #define RMATCH_WHERE(num) &&RRETURN_##num
    285 #define RRETURN_LABEL *stack.currentFrame->returnLocation
    286 
    287 #endif
    288 
    289 #define RECURSIVE_MATCH_COMMON(num) \
    290     goto RECURSE;\
    291     RRETURN_##num: \
    292     stack.popCurrentFrame();
    293 
    294 #define RECURSIVE_MATCH(num, ra, rb) \
    295     do { \
    296         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
    297         RECURSIVE_MATCH_COMMON(num) \
    298     } while (0)
    299 
    300 #define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb) \
    301     do { \
    302         stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
    303         startNewGroup(stack.currentFrame); \
    304         RECURSIVE_MATCH_COMMON(num) \
    305     } while (0)
    306 
    307 #define RRETURN goto RRETURN_LABEL
    308 
    309 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
    310 
    311 /*************************************************
    312 *         Match from current position            *
    313 *************************************************/
    314 
    315 /* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
    316 in the subject string, while substringStart holds the value of subjectPtr at the start of the
    317 last bracketed group - used for breaking infinite loops matching zero-length
    318 strings. This function is called recursively in many circumstances. Whenever it
    319 returns a negative (error) response, the outer match() call must also return the
    320 same response.
    321 
    322 Arguments:
    323    subjectPtr        pointer in subject
    324    instructionPtr       position in code
    325    offsetTop  current top pointer
    326    md          pointer to "static" info for the match
    327 
    328 Returns:       1 if matched          )  these values are >= 0
    329                0 if failed to match  )
    330                a negative error value if aborted by an error condition
    331                  (e.g. stopped by repeated call or recursion limit)
    332 */
    333 
    334 static const unsigned numFramesOnStack = 16;
    335 
    336 struct MatchStack {
    337     MatchStack()
    338         : framesEnd(frames + numFramesOnStack)
    339         , currentFrame(frames)
    340         , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
    341     {
    342         ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
    343     }
    344 
    345     MatchFrame frames[numFramesOnStack];
    346     MatchFrame* framesEnd;
    347     MatchFrame* currentFrame;
    348     unsigned size;
    349 
    350     inline bool canUseStackBufferForNextFrame()
    351     {
    352         return size < numFramesOnStack;
    353     }
    354 
    355     inline MatchFrame* allocateNextFrame()
    356     {
    357         if (canUseStackBufferForNextFrame())
    358             return currentFrame + 1;
    359         return new MatchFrame;
    360     }
    361 
    362     inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation)
    363     {
    364         MatchFrame* newframe = allocateNextFrame();
    365         newframe->previousFrame = currentFrame;
    366 
    367         newframe->args.subjectPtr = currentFrame->args.subjectPtr;
    368         newframe->args.offsetTop = currentFrame->args.offsetTop;
    369         newframe->args.instructionPtr = instructionPtr;
    370         newframe->args.bracketChain = bracketChain;
    371         newframe->returnLocation = returnLocation;
    372         size++;
    373 
    374         currentFrame = newframe;
    375     }
    376 
    377     inline void popCurrentFrame()
    378     {
    379         MatchFrame* oldFrame = currentFrame;
    380         currentFrame = currentFrame->previousFrame;
    381         if (size > numFramesOnStack)
    382             delete oldFrame;
    383         size--;
    384     }
    385 
    386     void popAllFrames()
    387     {
    388         while (size)
    389             popCurrentFrame();
    390     }
    391 };
    392 
    393 static int matchError(int errorCode, MatchStack& stack)
    394 {
    395     stack.popAllFrames();
    396     return errorCode;
    397 }
    398 
    399 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
    400  if there are extra bytes. This is called when we know we are in UTF-8 mode. */
    401 
    402 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
    403 {
    404     c = *subjectPtr;
    405     if ((c & 0xc0) == 0xc0) {
    406         int gcaa = jsc_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
    407         int gcss = 6 * gcaa;
    408         c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
    409         for (int gcii = 1; gcii <= gcaa; gcii++) {
    410             gcss -= 6;
    411             c |= (subjectPtr[gcii] & 0x3f) << gcss;
    412         }
    413         len += gcaa;
    414     }
    415 }
    416 
    417 static inline void startNewGroup(MatchFrame* currentFrame)
    418 {
    419     /* At the start of a bracketed group, add the current subject pointer to the
    420      stack of such pointers, to be re-instated at the end of the group when we hit
    421      the closing ket. When match() is called in other circumstances, we don't add to
    422      this stack. */
    423 
    424     currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain;
    425     currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr;
    426     currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;
    427 }
    428 
    429 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
    430 static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
    431 {
    432     // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
    433     static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
    434     static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
    435 
    436     ASSERT(instructionOffset >= 0);
    437     ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
    438 
    439     minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
    440     minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
    441     maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
    442 }
    443 
    444 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
    445 {
    446     bool isMatch = false;
    447     int min;
    448     bool minimize = false; /* Initialization not really needed, but some compilers think so. */
    449     unsigned remainingMatchCount = matchLimit;
    450     int othercase; /* Declare here to avoid errors during jumps */
    451 
    452     MatchStack stack;
    453 
    454     /* The opcode jump table. */
    455 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
    456 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
    457     static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
    458 #undef EMIT_JUMP_TABLE_ENTRY
    459 #endif
    460 
    461     /* One-time setup of the opcode jump table. */
    462 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
    463     for (int i = 255; !opcodeJumpTable[i]; i--)
    464         opcodeJumpTable[i] = &&CAPTURING_BRACKET;
    465 #endif
    466 
    467 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
    468     // Shark shows this as a hot line
    469     // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
    470     stack.currentFrame->returnLocation = &&RETURN;
    471 #else
    472     stack.currentFrame->returnLocation = 0;
    473 #endif
    474     stack.currentFrame->args.subjectPtr = subjectPtr;
    475     stack.currentFrame->args.instructionPtr = instructionPtr;
    476     stack.currentFrame->args.offsetTop = offsetTop;
    477     stack.currentFrame->args.bracketChain = 0;
    478     startNewGroup(stack.currentFrame);
    479 
    480     /* This is where control jumps back to to effect "recursion" */
    481 
    482 RECURSE:
    483     if (!--remainingMatchCount)
    484         return matchError(JSRegExpErrorHitLimit, stack);
    485 
    486     /* Now start processing the operations. */
    487 
    488 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
    489     while (true)
    490 #endif
    491     {
    492 
    493 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
    494 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
    495 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
    496 #else
    497 #define BEGIN_OPCODE(opcode) case OP_##opcode
    498 #define NEXT_OPCODE continue
    499 #endif
    500 
    501 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
    502         NEXT_OPCODE;
    503 #else
    504         switch (*stack.currentFrame->args.instructionPtr)
    505 #endif
    506         {
    507             /* Non-capturing bracket: optimized */
    508 
    509             BEGIN_OPCODE(BRA):
    510             NON_CAPTURING_BRACKET:
    511                 DPRINTF(("start bracket 0\n"));
    512                 do {
    513                     RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
    514                     if (isMatch)
    515                         RRETURN;
    516                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
    517                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
    518                 DPRINTF(("bracket 0 failed\n"));
    519                 RRETURN;
    520 
    521             /* Skip over large extraction number data if encountered. */
    522 
    523             BEGIN_OPCODE(BRANUMBER):
    524                 stack.currentFrame->args.instructionPtr += 3;
    525                 NEXT_OPCODE;
    526 
    527             /* End of the pattern. */
    528 
    529             BEGIN_OPCODE(END):
    530                 md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
    531                 md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */
    532                 isMatch = true;
    533                 RRETURN;
    534 
    535             /* Assertion brackets. Check the alternative branches in turn - the
    536              matching won't pass the KET for an assertion. If any one branch matches,
    537              the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
    538              start of each branch to move the current point backwards, so the code at
    539              this level is identical to the lookahead case. */
    540 
    541             BEGIN_OPCODE(ASSERT):
    542                 do {
    543                     RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
    544                     if (isMatch)
    545                         break;
    546                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
    547                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
    548                 if (*stack.currentFrame->args.instructionPtr == OP_KET)
    549                     RRETURN_NO_MATCH;
    550 
    551                 /* Continue from after the assertion, updating the offsets high water
    552                  mark, since extracts may have been taken during the assertion. */
    553 
    554                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
    555                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
    556                 stack.currentFrame->args.offsetTop = md.endOffsetTop;
    557                 NEXT_OPCODE;
    558 
    559             /* Negative assertion: all branches must fail to match */
    560 
    561             BEGIN_OPCODE(ASSERT_NOT):
    562                 do {
    563                     RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
    564                     if (isMatch)
    565                         RRETURN_NO_MATCH;
    566                     stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
    567                 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
    568 
    569                 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
    570                 NEXT_OPCODE;
    571 
    572             /* An alternation is the end of a branch; scan along to find the end of the
    573              bracketed group and go to there. */
    574 
    575             BEGIN_OPCODE(ALT):
    576                 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
    577                 NEXT_OPCODE;
    578 
    579             /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
    580              that it may occur zero times. It may repeat infinitely, or not at all -
    581              i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
    582              repeat limits are compiled as a number of copies, with the optional ones
    583              preceded by BRAZERO or BRAMINZERO. */
    584 
    585             BEGIN_OPCODE(BRAZERO): {
    586                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
    587                 RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
    588                 if (isMatch)
    589                     RRETURN;
    590                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
    591                 stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
    592                 NEXT_OPCODE;
    593             }
    594 
    595             BEGIN_OPCODE(BRAMINZERO): {
    596                 stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
    597                 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
    598                 RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
    599                 if (isMatch)
    600                     RRETURN;
    601                 stack.currentFrame->args.instructionPtr++;
    602                 NEXT_OPCODE;
    603             }
    604 
    605             /* End of a group, repeated or non-repeating. If we are at the end of
    606              an assertion "group", stop matching and return 1, but record the
    607              current high water mark for use by positive assertions. Do this also
    608              for the "once" (not-backup up) groups. */
    609 
    610             BEGIN_OPCODE(KET):
    611             BEGIN_OPCODE(KETRMIN):
    612             BEGIN_OPCODE(KETRMAX):
    613                 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
    614                 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
    615 
    616                 /* Back up the stack of bracket start pointers. */
    617 
    618                 stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
    619 
    620                 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
    621                     md.endOffsetTop = stack.currentFrame->args.offsetTop;
    622                     isMatch = true;
    623                     RRETURN;
    624                 }
    625 
    626                 /* In all other cases except a conditional group we have to check the
    627                  group number back at the start and if necessary complete handling an
    628                  extraction by setting the offsets and bumping the high water mark. */
    629 
    630                 stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
    631 
    632                 /* For extended extraction brackets (large number), we have to fish out
    633                  the number from a dummy opcode at the start. */
    634 
    635                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
    636                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
    637                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
    638 
    639 #ifdef DEBUG
    640                 printf("end bracket %d", stack.currentFrame->locals.number);
    641                 printf("\n");
    642 #endif
    643 
    644                 /* Test for a numbered group. This includes groups called as a result
    645                  of recursion. Note that whole-pattern recursion is coded as a recurse
    646                  into group 0, so it won't be picked up here. Instead, we catch it when
    647                  the OP_END is reached. */
    648 
    649                 if (stack.currentFrame->locals.number > 0) {
    650                     if (stack.currentFrame->locals.offset >= md.offsetMax)
    651                         md.offsetOverflow = true;
    652                     else {
    653                         md.offsetVector[stack.currentFrame->locals.offset] =
    654                         md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
    655                         md.offsetVector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.startSubject;
    656                         if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
    657                             stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
    658                     }
    659                 }
    660 
    661                 /* For a non-repeating ket, just continue at this level. This also
    662                  happens for a repeating ket if no characters were matched in the group.
    663                  This is the forcible breaking of infinite loops as implemented in Perl
    664                  5.005. If there is an options reset, it will get obeyed in the normal
    665                  course of events. */
    666 
    667                 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
    668                     stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
    669                     NEXT_OPCODE;
    670                 }
    671 
    672                 /* The repeating kets try the rest of the pattern or restart from the
    673                  preceding bracket, in the appropriate order. */
    674 
    675                 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
    676                     RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
    677                     if (isMatch)
    678                         RRETURN;
    679                     RECURSIVE_MATCH_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
    680                     if (isMatch)
    681                         RRETURN;
    682                 } else { /* OP_KETRMAX */
    683                     RECURSIVE_MATCH_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
    684                     if (isMatch)
    685                         RRETURN;
    686                     RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
    687                     if (isMatch)
    688                         RRETURN;
    689                 }
    690                 RRETURN;
    691 
    692             /* Start of subject. */
    693 
    694             BEGIN_OPCODE(CIRC):
    695                 if (stack.currentFrame->args.subjectPtr != md.startSubject)
    696                     RRETURN_NO_MATCH;
    697                 stack.currentFrame->args.instructionPtr++;
    698                 NEXT_OPCODE;
    699 
    700             /* After internal newline if multiline. */
    701 
    702             BEGIN_OPCODE(BOL):
    703                 if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
    704                     RRETURN_NO_MATCH;
    705                 stack.currentFrame->args.instructionPtr++;
    706                 NEXT_OPCODE;
    707 
    708             /* End of subject. */
    709 
    710             BEGIN_OPCODE(DOLL):
    711                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
    712                     RRETURN_NO_MATCH;
    713                 stack.currentFrame->args.instructionPtr++;
    714                 NEXT_OPCODE;
    715 
    716             /* Before internal newline if multiline. */
    717 
    718             BEGIN_OPCODE(EOL):
    719                 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
    720                     RRETURN_NO_MATCH;
    721                 stack.currentFrame->args.instructionPtr++;
    722                 NEXT_OPCODE;
    723 
    724             /* Word boundary assertions */
    725 
    726             BEGIN_OPCODE(NOT_WORD_BOUNDARY):
    727             BEGIN_OPCODE(WORD_BOUNDARY): {
    728                 bool currentCharIsWordChar = false;
    729                 bool previousCharIsWordChar = false;
    730 
    731                 if (stack.currentFrame->args.subjectPtr > md.startSubject)
    732                     previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
    733                 if (stack.currentFrame->args.subjectPtr < md.endSubject)
    734                     currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
    735 
    736                 /* Now see if the situation is what we want */
    737                 bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
    738                 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
    739                     RRETURN_NO_MATCH;
    740                 NEXT_OPCODE;
    741             }
    742 
    743             /* Match a single character type; inline for speed */
    744 
    745             BEGIN_OPCODE(NOT_NEWLINE):
    746                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    747                     RRETURN_NO_MATCH;
    748                 if (isNewline(*stack.currentFrame->args.subjectPtr++))
    749                     RRETURN_NO_MATCH;
    750                 stack.currentFrame->args.instructionPtr++;
    751                 NEXT_OPCODE;
    752 
    753             BEGIN_OPCODE(NOT_DIGIT):
    754                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    755                     RRETURN_NO_MATCH;
    756                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
    757                     RRETURN_NO_MATCH;
    758                 stack.currentFrame->args.instructionPtr++;
    759                 NEXT_OPCODE;
    760 
    761             BEGIN_OPCODE(DIGIT):
    762                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    763                     RRETURN_NO_MATCH;
    764                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
    765                     RRETURN_NO_MATCH;
    766                 stack.currentFrame->args.instructionPtr++;
    767                 NEXT_OPCODE;
    768 
    769             BEGIN_OPCODE(NOT_WHITESPACE):
    770                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    771                     RRETURN_NO_MATCH;
    772                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
    773                     RRETURN_NO_MATCH;
    774                 stack.currentFrame->args.instructionPtr++;
    775                 NEXT_OPCODE;
    776 
    777             BEGIN_OPCODE(WHITESPACE):
    778                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    779                     RRETURN_NO_MATCH;
    780                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
    781                     RRETURN_NO_MATCH;
    782                 stack.currentFrame->args.instructionPtr++;
    783                 NEXT_OPCODE;
    784 
    785             BEGIN_OPCODE(NOT_WORDCHAR):
    786                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    787                     RRETURN_NO_MATCH;
    788                 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
    789                     RRETURN_NO_MATCH;
    790                 stack.currentFrame->args.instructionPtr++;
    791                 NEXT_OPCODE;
    792 
    793             BEGIN_OPCODE(WORDCHAR):
    794                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    795                     RRETURN_NO_MATCH;
    796                 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
    797                     RRETURN_NO_MATCH;
    798                 stack.currentFrame->args.instructionPtr++;
    799                 NEXT_OPCODE;
    800 
    801             /* Match a back reference, possibly repeatedly. Look past the end of the
    802              item to see if there is repeat information following. The code is similar
    803              to that for character classes, but repeated for efficiency. Then obey
    804              similar code to character type repeats - written out again for speed.
    805              However, if the referenced string is the empty string, always treat
    806              it as matched, any number of times (otherwise there could be infinite
    807              loops). */
    808 
    809             BEGIN_OPCODE(REF):
    810                 stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1;               /* Doubled ref number */
    811                 stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
    812 
    813                 /* If the reference is unset, set the length to be longer than the amount
    814                  of subject left; this ensures that every attempt at a match fails. We
    815                  can't just fail here, because of the possibility of quantifiers with zero
    816                  minima. */
    817 
    818                 if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
    819                     stack.currentFrame->locals.length = 0;
    820                 else
    821                     stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
    822 
    823                 /* Set up for repetition, or handle the non-repeated case */
    824 
    825                 switch (*stack.currentFrame->args.instructionPtr) {
    826                     case OP_CRSTAR:
    827                     case OP_CRMINSTAR:
    828                     case OP_CRPLUS:
    829                     case OP_CRMINPLUS:
    830                     case OP_CRQUERY:
    831                     case OP_CRMINQUERY:
    832                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
    833                         break;
    834 
    835                     case OP_CRRANGE:
    836                     case OP_CRMINRANGE:
    837                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
    838                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
    839                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
    840                         if (stack.currentFrame->locals.max == 0)
    841                             stack.currentFrame->locals.max = INT_MAX;
    842                         stack.currentFrame->args.instructionPtr += 5;
    843                         break;
    844 
    845                     default:               /* No repeat follows */
    846                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
    847                             RRETURN_NO_MATCH;
    848                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
    849                         NEXT_OPCODE;
    850                 }
    851 
    852                 /* If the length of the reference is zero, just continue with the
    853                  main loop. */
    854 
    855                 if (stack.currentFrame->locals.length == 0)
    856                     NEXT_OPCODE;
    857 
    858                 /* First, ensure the minimum number of matches are present. */
    859 
    860                 for (int i = 1; i <= min; i++) {
    861                     if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
    862                         RRETURN_NO_MATCH;
    863                     stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
    864                 }
    865 
    866                 /* If min = max, continue at the same level without recursion.
    867                  They are not both allowed to be zero. */
    868 
    869                 if (min == stack.currentFrame->locals.max)
    870                     NEXT_OPCODE;
    871 
    872                 /* If minimizing, keep trying and advancing the pointer */
    873 
    874                 if (minimize) {
    875                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
    876                         RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
    877                         if (isMatch)
    878                             RRETURN;
    879                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
    880                             RRETURN;
    881                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
    882                     }
    883                     /* Control never reaches here */
    884                 }
    885 
    886                 /* If maximizing, find the longest string and work backwards */
    887 
    888                 else {
    889                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
    890                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
    891                         if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
    892                             break;
    893                         stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
    894                     }
    895                     while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
    896                         RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
    897                         if (isMatch)
    898                             RRETURN;
    899                         stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
    900                     }
    901                     RRETURN_NO_MATCH;
    902                 }
    903                 /* Control never reaches here */
    904 
    905             /* Match a bit-mapped character class, possibly repeatedly. This op code is
    906              used when all the characters in the class have values in the range 0-255,
    907              and either the matching is caseful, or the characters are in the range
    908              0-127 when UTF-8 processing is enabled. The only difference between
    909              OP_CLASS and OP_NCLASS occurs when a data character outside the range is
    910              encountered.
    911 
    912              First, look past the end of the item to see if there is repeat information
    913              following. Then obey similar code to character type repeats - written out
    914              again for speed. */
    915 
    916             BEGIN_OPCODE(NCLASS):
    917             BEGIN_OPCODE(CLASS):
    918                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
    919                 stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
    920 
    921                 switch (*stack.currentFrame->args.instructionPtr) {
    922                     case OP_CRSTAR:
    923                     case OP_CRMINSTAR:
    924                     case OP_CRPLUS:
    925                     case OP_CRMINPLUS:
    926                     case OP_CRQUERY:
    927                     case OP_CRMINQUERY:
    928                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
    929                         break;
    930 
    931                     case OP_CRRANGE:
    932                     case OP_CRMINRANGE:
    933                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
    934                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
    935                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
    936                         if (stack.currentFrame->locals.max == 0)
    937                             stack.currentFrame->locals.max = INT_MAX;
    938                         stack.currentFrame->args.instructionPtr += 5;
    939                         break;
    940 
    941                     default:               /* No repeat follows */
    942                         min = stack.currentFrame->locals.max = 1;
    943                         break;
    944                 }
    945 
    946                 /* First, ensure the minimum number of matches are present. */
    947 
    948                 for (int i = 1; i <= min; i++) {
    949                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    950                         RRETURN_NO_MATCH;
    951                     int c = *stack.currentFrame->args.subjectPtr++;
    952                     if (c > 255) {
    953                         if (stack.currentFrame->locals.data[-1] == OP_CLASS)
    954                             RRETURN_NO_MATCH;
    955                     } else {
    956                         if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
    957                             RRETURN_NO_MATCH;
    958                     }
    959                 }
    960 
    961                 /* If max == min we can continue with the main loop without the
    962                  need to recurse. */
    963 
    964                 if (min == stack.currentFrame->locals.max)
    965                     NEXT_OPCODE;
    966 
    967                 /* If minimizing, keep testing the rest of the expression and advancing
    968                  the pointer while it matches the class. */
    969                 if (minimize) {
    970                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
    971                         RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
    972                         if (isMatch)
    973                             RRETURN;
    974                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
    975                             RRETURN;
    976                         int c = *stack.currentFrame->args.subjectPtr++;
    977                         if (c > 255) {
    978                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
    979                                 RRETURN;
    980                         } else {
    981                             if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
    982                                 RRETURN;
    983                         }
    984                     }
    985                     /* Control never reaches here */
    986                 }
    987                 /* If maximizing, find the longest possible run, then work backwards. */
    988                 else {
    989                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
    990 
    991                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
    992                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
    993                             break;
    994                         int c = *stack.currentFrame->args.subjectPtr;
    995                         if (c > 255) {
    996                             if (stack.currentFrame->locals.data[-1] == OP_CLASS)
    997                                 break;
    998                         } else {
    999                             if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
   1000                                 break;
   1001                         }
   1002                         ++stack.currentFrame->args.subjectPtr;
   1003                     }
   1004                     for (;;) {
   1005                         RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1006                         if (isMatch)
   1007                             RRETURN;
   1008                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
   1009                             break;        /* Stop if tried at original pos */
   1010                     }
   1011 
   1012                     RRETURN;
   1013                 }
   1014                 /* Control never reaches here */
   1015 
   1016             /* Match an extended character class. */
   1017 
   1018             BEGIN_OPCODE(XCLASS):
   1019                 stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE;                /* Save for matching */
   1020                 stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);                      /* Advance past the item */
   1021 
   1022                 switch (*stack.currentFrame->args.instructionPtr) {
   1023                     case OP_CRSTAR:
   1024                     case OP_CRMINSTAR:
   1025                     case OP_CRPLUS:
   1026                     case OP_CRMINPLUS:
   1027                     case OP_CRQUERY:
   1028                     case OP_CRMINQUERY:
   1029                         repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
   1030                         break;
   1031 
   1032                     case OP_CRRANGE:
   1033                     case OP_CRMINRANGE:
   1034                         minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
   1035                         min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1036                         stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
   1037                         if (stack.currentFrame->locals.max == 0)
   1038                             stack.currentFrame->locals.max = INT_MAX;
   1039                         stack.currentFrame->args.instructionPtr += 5;
   1040                         break;
   1041 
   1042                     default:               /* No repeat follows */
   1043                         min = stack.currentFrame->locals.max = 1;
   1044                 }
   1045 
   1046                 /* First, ensure the minimum number of matches are present. */
   1047 
   1048                 for (int i = 1; i <= min; i++) {
   1049                     if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1050                         RRETURN_NO_MATCH;
   1051                     int c = *stack.currentFrame->args.subjectPtr++;
   1052                     if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
   1053                         RRETURN_NO_MATCH;
   1054                 }
   1055 
   1056                 /* If max == min we can continue with the main loop without the
   1057                  need to recurse. */
   1058 
   1059                 if (min == stack.currentFrame->locals.max)
   1060                     NEXT_OPCODE;
   1061 
   1062                 /* If minimizing, keep testing the rest of the expression and advancing
   1063                  the pointer while it matches the class. */
   1064 
   1065                 if (minimize) {
   1066                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
   1067                         RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1068                         if (isMatch)
   1069                             RRETURN;
   1070                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
   1071                             RRETURN;
   1072                         int c = *stack.currentFrame->args.subjectPtr++;
   1073                         if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
   1074                             RRETURN;
   1075                     }
   1076                     /* Control never reaches here */
   1077                 }
   1078 
   1079                 /* If maximizing, find the longest possible run, then work backwards. */
   1080 
   1081                 else {
   1082                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
   1083                     for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1084                         if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1085                             break;
   1086                         int c = *stack.currentFrame->args.subjectPtr;
   1087                         if (!jsc_pcre_xclass(c, stack.currentFrame->locals.data))
   1088                             break;
   1089                         ++stack.currentFrame->args.subjectPtr;
   1090                     }
   1091                     for(;;) {
   1092                         RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1093                         if (isMatch)
   1094                             RRETURN;
   1095                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
   1096                             break;        /* Stop if tried at original pos */
   1097                     }
   1098                     RRETURN;
   1099                 }
   1100 
   1101                 /* Control never reaches here */
   1102 
   1103             /* Match a single character, casefully */
   1104 
   1105             BEGIN_OPCODE(CHAR):
   1106                 stack.currentFrame->locals.length = 1;
   1107                 stack.currentFrame->args.instructionPtr++;
   1108                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
   1109                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
   1110                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1111                     RRETURN_NO_MATCH;
   1112                 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
   1113                     RRETURN_NO_MATCH;
   1114                 NEXT_OPCODE;
   1115 
   1116             /* Match a single character, caselessly */
   1117 
   1118             BEGIN_OPCODE(CHAR_IGNORING_CASE): {
   1119                 stack.currentFrame->locals.length = 1;
   1120                 stack.currentFrame->args.instructionPtr++;
   1121                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
   1122                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
   1123                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1124                     RRETURN_NO_MATCH;
   1125                 int dc = *stack.currentFrame->args.subjectPtr++;
   1126                 if (stack.currentFrame->locals.fc != dc && jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
   1127                     RRETURN_NO_MATCH;
   1128                 NEXT_OPCODE;
   1129             }
   1130 
   1131             /* Match a single ASCII character. */
   1132 
   1133             BEGIN_OPCODE(ASCII_CHAR):
   1134                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
   1135                     RRETURN_NO_MATCH;
   1136                 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
   1137                     RRETURN_NO_MATCH;
   1138                 ++stack.currentFrame->args.subjectPtr;
   1139                 stack.currentFrame->args.instructionPtr += 2;
   1140                 NEXT_OPCODE;
   1141 
   1142             /* Match one of two cases of an ASCII letter. */
   1143 
   1144             BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
   1145                 if (md.endSubject == stack.currentFrame->args.subjectPtr)
   1146                     RRETURN_NO_MATCH;
   1147                 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
   1148                     RRETURN_NO_MATCH;
   1149                 ++stack.currentFrame->args.subjectPtr;
   1150                 stack.currentFrame->args.instructionPtr += 2;
   1151                 NEXT_OPCODE;
   1152 
   1153             /* Match a single character repeatedly; different opcodes share code. */
   1154 
   1155             BEGIN_OPCODE(EXACT):
   1156                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1157                 minimize = false;
   1158                 stack.currentFrame->args.instructionPtr += 3;
   1159                 goto REPEATCHAR;
   1160 
   1161             BEGIN_OPCODE(UPTO):
   1162             BEGIN_OPCODE(MINUPTO):
   1163                 min = 0;
   1164                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1165                 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
   1166                 stack.currentFrame->args.instructionPtr += 3;
   1167                 goto REPEATCHAR;
   1168 
   1169             BEGIN_OPCODE(STAR):
   1170             BEGIN_OPCODE(MINSTAR):
   1171             BEGIN_OPCODE(PLUS):
   1172             BEGIN_OPCODE(MINPLUS):
   1173             BEGIN_OPCODE(QUERY):
   1174             BEGIN_OPCODE(MINQUERY):
   1175                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
   1176 
   1177                 /* Common code for all repeated single-character matches. We can give
   1178                  up quickly if there are fewer than the minimum number of characters left in
   1179                  the subject. */
   1180 
   1181             REPEATCHAR:
   1182 
   1183                 stack.currentFrame->locals.length = 1;
   1184                 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
   1185                 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
   1186                     RRETURN_NO_MATCH;
   1187                 stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
   1188 
   1189                 if (stack.currentFrame->locals.fc <= 0xFFFF) {
   1190                     othercase = md.ignoreCase ? jsc_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
   1191 
   1192                     for (int i = 1; i <= min; i++) {
   1193                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
   1194                             RRETURN_NO_MATCH;
   1195                         ++stack.currentFrame->args.subjectPtr;
   1196                     }
   1197 
   1198                     if (min == stack.currentFrame->locals.max)
   1199                         NEXT_OPCODE;
   1200 
   1201                     if (minimize) {
   1202                         stack.currentFrame->locals.repeatOthercase = othercase;
   1203                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
   1204                             RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1205                             if (isMatch)
   1206                                 RRETURN;
   1207                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
   1208                                 RRETURN;
   1209                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
   1210                                 RRETURN;
   1211                             ++stack.currentFrame->args.subjectPtr;
   1212                         }
   1213                         /* Control never reaches here */
   1214                     } else {
   1215                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
   1216                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1217                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1218                                 break;
   1219                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
   1220                                 break;
   1221                             ++stack.currentFrame->args.subjectPtr;
   1222                         }
   1223                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
   1224                             RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1225                             if (isMatch)
   1226                                 RRETURN;
   1227                             --stack.currentFrame->args.subjectPtr;
   1228                         }
   1229                         RRETURN_NO_MATCH;
   1230                     }
   1231                     /* Control never reaches here */
   1232                 } else {
   1233                     /* No case on surrogate pairs, so no need to bother with "othercase". */
   1234 
   1235                     for (int i = 1; i <= min; i++) {
   1236                         if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
   1237                             RRETURN_NO_MATCH;
   1238                         stack.currentFrame->args.subjectPtr += 2;
   1239                     }
   1240 
   1241                     if (min == stack.currentFrame->locals.max)
   1242                         NEXT_OPCODE;
   1243 
   1244                     if (minimize) {
   1245                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
   1246                             RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1247                             if (isMatch)
   1248                                 RRETURN;
   1249                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
   1250                                 RRETURN;
   1251                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
   1252                                 RRETURN;
   1253                             stack.currentFrame->args.subjectPtr += 2;
   1254                         }
   1255                         /* Control never reaches here */
   1256                     } else {
   1257                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
   1258                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1259                             if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
   1260                                 break;
   1261                             if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
   1262                                 break;
   1263                             stack.currentFrame->args.subjectPtr += 2;
   1264                         }
   1265                         while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
   1266                             RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1267                             if (isMatch)
   1268                                 RRETURN;
   1269                             stack.currentFrame->args.subjectPtr -= 2;
   1270                         }
   1271                         RRETURN_NO_MATCH;
   1272                     }
   1273                     /* Control never reaches here */
   1274                 }
   1275                 /* Control never reaches here */
   1276 
   1277             /* Match a negated single one-byte character. */
   1278 
   1279             BEGIN_OPCODE(NOT): {
   1280                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1281                     RRETURN_NO_MATCH;
   1282                 int b = stack.currentFrame->args.instructionPtr[1];
   1283                 int c = *stack.currentFrame->args.subjectPtr++;
   1284                 stack.currentFrame->args.instructionPtr += 2;
   1285                 if (md.ignoreCase) {
   1286                     if (c < 128)
   1287                         c = toLowerCase(c);
   1288                     if (toLowerCase(b) == c)
   1289                         RRETURN_NO_MATCH;
   1290                 } else {
   1291                     if (b == c)
   1292                         RRETURN_NO_MATCH;
   1293                 }
   1294                 NEXT_OPCODE;
   1295             }
   1296 
   1297             /* Match a negated single one-byte character repeatedly. This is almost a
   1298              repeat of the code for a repeated single character, but I haven't found a
   1299              nice way of commoning these up that doesn't require a test of the
   1300              positive/negative option for each character match. Maybe that wouldn't add
   1301              very much to the time taken, but character matching *is* what this is all
   1302              about... */
   1303 
   1304             BEGIN_OPCODE(NOTEXACT):
   1305                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1306                 minimize = false;
   1307                 stack.currentFrame->args.instructionPtr += 3;
   1308                 goto REPEATNOTCHAR;
   1309 
   1310             BEGIN_OPCODE(NOTUPTO):
   1311             BEGIN_OPCODE(NOTMINUPTO):
   1312                 min = 0;
   1313                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1314                 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
   1315                 stack.currentFrame->args.instructionPtr += 3;
   1316                 goto REPEATNOTCHAR;
   1317 
   1318             BEGIN_OPCODE(NOTSTAR):
   1319             BEGIN_OPCODE(NOTMINSTAR):
   1320             BEGIN_OPCODE(NOTPLUS):
   1321             BEGIN_OPCODE(NOTMINPLUS):
   1322             BEGIN_OPCODE(NOTQUERY):
   1323             BEGIN_OPCODE(NOTMINQUERY):
   1324                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
   1325 
   1326             /* Common code for all repeated single-byte matches. We can give up quickly
   1327              if there are fewer than the minimum number of bytes left in the
   1328              subject. */
   1329 
   1330             REPEATNOTCHAR:
   1331                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
   1332                     RRETURN_NO_MATCH;
   1333                 stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
   1334 
   1335                 /* The code is duplicated for the caseless and caseful cases, for speed,
   1336                  since matching characters is likely to be quite common. First, ensure the
   1337                  minimum number of matches are present. If min = max, continue at the same
   1338                  level without recursing. Otherwise, if minimizing, keep trying the rest of
   1339                  the expression and advancing one matching character if failing, up to the
   1340                  maximum. Alternatively, if maximizing, find the maximum number of
   1341                  characters and work backwards. */
   1342 
   1343                 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
   1344 
   1345                 if (md.ignoreCase) {
   1346                     if (stack.currentFrame->locals.fc < 128)
   1347                         stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
   1348 
   1349                     for (int i = 1; i <= min; i++) {
   1350                         int d = *stack.currentFrame->args.subjectPtr++;
   1351                         if (d < 128)
   1352                             d = toLowerCase(d);
   1353                         if (stack.currentFrame->locals.fc == d)
   1354                             RRETURN_NO_MATCH;
   1355                     }
   1356 
   1357                     if (min == stack.currentFrame->locals.max)
   1358                         NEXT_OPCODE;
   1359 
   1360                     if (minimize) {
   1361                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
   1362                             RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1363                             if (isMatch)
   1364                                 RRETURN;
   1365                             int d = *stack.currentFrame->args.subjectPtr++;
   1366                             if (d < 128)
   1367                                 d = toLowerCase(d);
   1368                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
   1369                                 RRETURN;
   1370                         }
   1371                         /* Control never reaches here */
   1372                     }
   1373 
   1374                     /* Maximize case */
   1375 
   1376                     else {
   1377                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
   1378 
   1379                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1380                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1381                                 break;
   1382                             int d = *stack.currentFrame->args.subjectPtr;
   1383                             if (d < 128)
   1384                                 d = toLowerCase(d);
   1385                             if (stack.currentFrame->locals.fc == d)
   1386                                 break;
   1387                             ++stack.currentFrame->args.subjectPtr;
   1388                         }
   1389                         for (;;) {
   1390                             RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1391                             if (isMatch)
   1392                                 RRETURN;
   1393                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
   1394                                 break;        /* Stop if tried at original pos */
   1395                         }
   1396 
   1397                         RRETURN;
   1398                     }
   1399                     /* Control never reaches here */
   1400                 }
   1401 
   1402                 /* Caseful comparisons */
   1403 
   1404                 else {
   1405                     for (int i = 1; i <= min; i++) {
   1406                         int d = *stack.currentFrame->args.subjectPtr++;
   1407                         if (stack.currentFrame->locals.fc == d)
   1408                             RRETURN_NO_MATCH;
   1409                     }
   1410 
   1411                     if (min == stack.currentFrame->locals.max)
   1412                         NEXT_OPCODE;
   1413 
   1414                     if (minimize) {
   1415                         for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
   1416                             RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1417                             if (isMatch)
   1418                                 RRETURN;
   1419                             int d = *stack.currentFrame->args.subjectPtr++;
   1420                             if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
   1421                                 RRETURN;
   1422                         }
   1423                         /* Control never reaches here */
   1424                     }
   1425 
   1426                     /* Maximize case */
   1427 
   1428                     else {
   1429                         stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
   1430 
   1431                         for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1432                             if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1433                                 break;
   1434                             int d = *stack.currentFrame->args.subjectPtr;
   1435                             if (stack.currentFrame->locals.fc == d)
   1436                                 break;
   1437                             ++stack.currentFrame->args.subjectPtr;
   1438                         }
   1439                         for (;;) {
   1440                             RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1441                             if (isMatch)
   1442                                 RRETURN;
   1443                             if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
   1444                                 break;        /* Stop if tried at original pos */
   1445                         }
   1446 
   1447                         RRETURN;
   1448                     }
   1449                 }
   1450                 /* Control never reaches here */
   1451 
   1452             /* Match a single character type repeatedly; several different opcodes
   1453              share code. This is very similar to the code for single characters, but we
   1454              repeat it in the interests of efficiency. */
   1455 
   1456             BEGIN_OPCODE(TYPEEXACT):
   1457                 min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1458                 minimize = true;
   1459                 stack.currentFrame->args.instructionPtr += 3;
   1460                 goto REPEATTYPE;
   1461 
   1462             BEGIN_OPCODE(TYPEUPTO):
   1463             BEGIN_OPCODE(TYPEMINUPTO):
   1464                 min = 0;
   1465                 stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
   1466                 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
   1467                 stack.currentFrame->args.instructionPtr += 3;
   1468                 goto REPEATTYPE;
   1469 
   1470             BEGIN_OPCODE(TYPESTAR):
   1471             BEGIN_OPCODE(TYPEMINSTAR):
   1472             BEGIN_OPCODE(TYPEPLUS):
   1473             BEGIN_OPCODE(TYPEMINPLUS):
   1474             BEGIN_OPCODE(TYPEQUERY):
   1475             BEGIN_OPCODE(TYPEMINQUERY):
   1476                 repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
   1477 
   1478                 /* Common code for all repeated single character type matches. Note that
   1479                  in UTF-8 mode, '.' matches a character of any length, but for the other
   1480                  character types, the valid characters are all one-byte long. */
   1481 
   1482             REPEATTYPE:
   1483                 stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++;      /* Code for the character type */
   1484 
   1485                 /* First, ensure the minimum number of matches are present. Use inline
   1486                  code for maximizing the speed, and do the type test once at the start
   1487                  (i.e. keep it out of the loop). Also we can test that there are at least
   1488                  the minimum number of characters before we start. */
   1489 
   1490                 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
   1491                     RRETURN_NO_MATCH;
   1492                 if (min > 0) {
   1493                     switch (stack.currentFrame->locals.ctype) {
   1494                         case OP_NOT_NEWLINE:
   1495                             for (int i = 1; i <= min; i++) {
   1496                                 if (isNewline(*stack.currentFrame->args.subjectPtr))
   1497                                     RRETURN_NO_MATCH;
   1498                                 ++stack.currentFrame->args.subjectPtr;
   1499                             }
   1500                             break;
   1501 
   1502                         case OP_NOT_DIGIT:
   1503                             for (int i = 1; i <= min; i++) {
   1504                                 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
   1505                                     RRETURN_NO_MATCH;
   1506                                 ++stack.currentFrame->args.subjectPtr;
   1507                             }
   1508                             break;
   1509 
   1510                         case OP_DIGIT:
   1511                             for (int i = 1; i <= min; i++) {
   1512                                 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
   1513                                     RRETURN_NO_MATCH;
   1514                                 ++stack.currentFrame->args.subjectPtr;
   1515                             }
   1516                             break;
   1517 
   1518                         case OP_NOT_WHITESPACE:
   1519                             for (int i = 1; i <= min; i++) {
   1520                                 if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
   1521                                     RRETURN_NO_MATCH;
   1522                                 ++stack.currentFrame->args.subjectPtr;
   1523                             }
   1524                             break;
   1525 
   1526                         case OP_WHITESPACE:
   1527                             for (int i = 1; i <= min; i++) {
   1528                                 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
   1529                                     RRETURN_NO_MATCH;
   1530                                 ++stack.currentFrame->args.subjectPtr;
   1531                             }
   1532                             break;
   1533 
   1534                         case OP_NOT_WORDCHAR:
   1535                             for (int i = 1; i <= min; i++) {
   1536                                 if (isWordChar(*stack.currentFrame->args.subjectPtr))
   1537                                     RRETURN_NO_MATCH;
   1538                                 ++stack.currentFrame->args.subjectPtr;
   1539                             }
   1540                             break;
   1541 
   1542                         case OP_WORDCHAR:
   1543                             for (int i = 1; i <= min; i++) {
   1544                                 if (!isWordChar(*stack.currentFrame->args.subjectPtr))
   1545                                     RRETURN_NO_MATCH;
   1546                                 ++stack.currentFrame->args.subjectPtr;
   1547                             }
   1548                             break;
   1549 
   1550                         default:
   1551                             ASSERT_NOT_REACHED();
   1552                             return matchError(JSRegExpErrorInternal, stack);
   1553                     }  /* End switch(stack.currentFrame->locals.ctype) */
   1554                 }
   1555 
   1556                 /* If min = max, continue at the same level without recursing */
   1557 
   1558                 if (min == stack.currentFrame->locals.max)
   1559                     NEXT_OPCODE;
   1560 
   1561                 /* If minimizing, we have to test the rest of the pattern before each
   1562                  subsequent match. */
   1563 
   1564                 if (minimize) {
   1565                     for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
   1566                         RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1567                         if (isMatch)
   1568                             RRETURN;
   1569                         if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
   1570                             RRETURN;
   1571 
   1572                         int c = *stack.currentFrame->args.subjectPtr++;
   1573                         switch (stack.currentFrame->locals.ctype) {
   1574                             case OP_NOT_NEWLINE:
   1575                                 if (isNewline(c))
   1576                                     RRETURN;
   1577                                 break;
   1578 
   1579                             case OP_NOT_DIGIT:
   1580                                 if (isASCIIDigit(c))
   1581                                     RRETURN;
   1582                                 break;
   1583 
   1584                             case OP_DIGIT:
   1585                                 if (!isASCIIDigit(c))
   1586                                     RRETURN;
   1587                                 break;
   1588 
   1589                             case OP_NOT_WHITESPACE:
   1590                                 if (isSpaceChar(c))
   1591                                     RRETURN;
   1592                                 break;
   1593 
   1594                             case OP_WHITESPACE:
   1595                                 if (!isSpaceChar(c))
   1596                                     RRETURN;
   1597                                 break;
   1598 
   1599                             case OP_NOT_WORDCHAR:
   1600                                 if (isWordChar(c))
   1601                                     RRETURN;
   1602                                 break;
   1603 
   1604                             case OP_WORDCHAR:
   1605                                 if (!isWordChar(c))
   1606                                     RRETURN;
   1607                                 break;
   1608 
   1609                             default:
   1610                                 ASSERT_NOT_REACHED();
   1611                                 return matchError(JSRegExpErrorInternal, stack);
   1612                         }
   1613                     }
   1614                     /* Control never reaches here */
   1615                 }
   1616 
   1617                 /* If maximizing it is worth using inline code for speed, doing the type
   1618                  test once at the start (i.e. keep it out of the loop). */
   1619 
   1620                 else {
   1621                     stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;  /* Remember where we started */
   1622 
   1623                     switch (stack.currentFrame->locals.ctype) {
   1624                         case OP_NOT_NEWLINE:
   1625                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1626                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
   1627                                     break;
   1628                                 stack.currentFrame->args.subjectPtr++;
   1629                             }
   1630                             break;
   1631 
   1632                         case OP_NOT_DIGIT:
   1633                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1634                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1635                                     break;
   1636                                 int c = *stack.currentFrame->args.subjectPtr;
   1637                                 if (isASCIIDigit(c))
   1638                                     break;
   1639                                 ++stack.currentFrame->args.subjectPtr;
   1640                             }
   1641                             break;
   1642 
   1643                         case OP_DIGIT:
   1644                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1645                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1646                                     break;
   1647                                 int c = *stack.currentFrame->args.subjectPtr;
   1648                                 if (!isASCIIDigit(c))
   1649                                     break;
   1650                                 ++stack.currentFrame->args.subjectPtr;
   1651                             }
   1652                             break;
   1653 
   1654                         case OP_NOT_WHITESPACE:
   1655                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1656                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1657                                     break;
   1658                                 int c = *stack.currentFrame->args.subjectPtr;
   1659                                 if (isSpaceChar(c))
   1660                                     break;
   1661                                 ++stack.currentFrame->args.subjectPtr;
   1662                             }
   1663                             break;
   1664 
   1665                         case OP_WHITESPACE:
   1666                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1667                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1668                                     break;
   1669                                 int c = *stack.currentFrame->args.subjectPtr;
   1670                                 if (!isSpaceChar(c))
   1671                                     break;
   1672                                 ++stack.currentFrame->args.subjectPtr;
   1673                             }
   1674                             break;
   1675 
   1676                         case OP_NOT_WORDCHAR:
   1677                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1678                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1679                                     break;
   1680                                 int c = *stack.currentFrame->args.subjectPtr;
   1681                                 if (isWordChar(c))
   1682                                     break;
   1683                                 ++stack.currentFrame->args.subjectPtr;
   1684                             }
   1685                             break;
   1686 
   1687                         case OP_WORDCHAR:
   1688                             for (int i = min; i < stack.currentFrame->locals.max; i++) {
   1689                                 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
   1690                                     break;
   1691                                 int c = *stack.currentFrame->args.subjectPtr;
   1692                                 if (!isWordChar(c))
   1693                                     break;
   1694                                 ++stack.currentFrame->args.subjectPtr;
   1695                             }
   1696                             break;
   1697 
   1698                         default:
   1699                             ASSERT_NOT_REACHED();
   1700                             return matchError(JSRegExpErrorInternal, stack);
   1701                     }
   1702 
   1703                     /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
   1704 
   1705                     for (;;) {
   1706                         RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
   1707                         if (isMatch)
   1708                             RRETURN;
   1709                         if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
   1710                             break;        /* Stop if tried at original pos */
   1711                     }
   1712 
   1713                     /* Get here if we can't make it match with any permitted repetitions */
   1714 
   1715                     RRETURN;
   1716                 }
   1717                 /* Control never reaches here */
   1718 
   1719             BEGIN_OPCODE(CRMINPLUS):
   1720             BEGIN_OPCODE(CRMINQUERY):
   1721             BEGIN_OPCODE(CRMINRANGE):
   1722             BEGIN_OPCODE(CRMINSTAR):
   1723             BEGIN_OPCODE(CRPLUS):
   1724             BEGIN_OPCODE(CRQUERY):
   1725             BEGIN_OPCODE(CRRANGE):
   1726             BEGIN_OPCODE(CRSTAR):
   1727                 ASSERT_NOT_REACHED();
   1728                 return matchError(JSRegExpErrorInternal, stack);
   1729 
   1730 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
   1731             CAPTURING_BRACKET:
   1732 #else
   1733             default:
   1734 #endif
   1735                 /* Opening capturing bracket. If there is space in the offset vector, save
   1736                  the current subject position in the working slot at the top of the vector. We
   1737                  mustn't change the current values of the data slot, because they may be set
   1738                  from a previous iteration of this group, and be referred to by a reference
   1739                  inside the group.
   1740 
   1741                  If the bracket fails to match, we need to restore this value and also the
   1742                  values of the final offsets, in case they were set by a previous iteration of
   1743                  the same bracket.
   1744 
   1745                  If there isn't enough space in the offset vector, treat this as if it were a
   1746                  non-capturing bracket. Don't worry about setting the flag for the error case
   1747                  here; that is handled in the code for KET. */
   1748 
   1749                 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
   1750 
   1751                 stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
   1752 
   1753                 /* For extended extraction brackets (large number), we have to fish out the
   1754                  number from a dummy opcode at the start. */
   1755 
   1756                 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
   1757                     stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
   1758                 stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
   1759 
   1760 #ifdef DEBUG
   1761                 printf("start bracket %d subject=", stack.currentFrame->locals.number);
   1762                 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
   1763                 printf("\n");
   1764 #endif
   1765 
   1766                 if (stack.currentFrame->locals.offset < md.offsetMax) {
   1767                     stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
   1768                     stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
   1769                     stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
   1770 
   1771                     DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
   1772                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
   1773 
   1774                     do {
   1775                         RECURSIVE_MATCH_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
   1776                         if (isMatch)
   1777                             RRETURN;
   1778                         stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
   1779                     } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
   1780 
   1781                     DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
   1782 
   1783                     md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
   1784                     md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
   1785                     md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
   1786 
   1787                     RRETURN;
   1788                 }
   1789 
   1790                 /* Insufficient room for saving captured contents */
   1791 
   1792                 goto NON_CAPTURING_BRACKET;
   1793         }
   1794 
   1795         /* Do not stick any code in here without much thought; it is assumed
   1796          that "continue" in the code above comes out to here to repeat the main
   1797          loop. */
   1798 
   1799     } /* End of main loop */
   1800 
   1801     ASSERT_NOT_REACHED();
   1802 
   1803 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
   1804 
   1805 RRETURN_SWITCH:
   1806     switch (stack.currentFrame->returnLocation) {
   1807         case 0: goto RETURN;
   1808         case 1: goto RRETURN_1;
   1809         case 2: goto RRETURN_2;
   1810         case 6: goto RRETURN_6;
   1811         case 7: goto RRETURN_7;
   1812         case 14: goto RRETURN_14;
   1813         case 15: goto RRETURN_15;
   1814         case 16: goto RRETURN_16;
   1815         case 17: goto RRETURN_17;
   1816         case 18: goto RRETURN_18;
   1817         case 19: goto RRETURN_19;
   1818         case 20: goto RRETURN_20;
   1819         case 21: goto RRETURN_21;
   1820         case 22: goto RRETURN_22;
   1821         case 24: goto RRETURN_24;
   1822         case 26: goto RRETURN_26;
   1823         case 27: goto RRETURN_27;
   1824         case 28: goto RRETURN_28;
   1825         case 29: goto RRETURN_29;
   1826         case 30: goto RRETURN_30;
   1827         case 31: goto RRETURN_31;
   1828         case 38: goto RRETURN_38;
   1829         case 40: goto RRETURN_40;
   1830         case 42: goto RRETURN_42;
   1831         case 44: goto RRETURN_44;
   1832         case 48: goto RRETURN_48;
   1833         case 52: goto RRETURN_52;
   1834     }
   1835 
   1836     ASSERT_NOT_REACHED();
   1837     return matchError(JSRegExpErrorInternal, stack);
   1838 
   1839 #endif
   1840 
   1841 RETURN:
   1842     return isMatch;
   1843 }
   1844 
   1845 
   1846 /*************************************************
   1847 *         Execute a Regular Expression           *
   1848 *************************************************/
   1849 
   1850 /* This function applies a compiled re to a subject string and picks out
   1851 portions of the string if it matches. Two elements in the vector are set for
   1852 each substring: the offsets to the start and end of the substring.
   1853 
   1854 Arguments:
   1855   re              points to the compiled expression
   1856   extra_data      points to extra data or is NULL
   1857   subject         points to the subject string
   1858   length          length of subject string (may contain binary zeros)
   1859   start_offset    where to start in the subject string
   1860   options         option bits
   1861   offsets         points to a vector of ints to be filled in with offsets
   1862   offsetCount     the number of elements in the vector
   1863 
   1864 Returns:          > 0 => success; value is the number of elements filled in
   1865                   = 0 => success, but offsets is not big enough
   1866                    -1 => failed to match
   1867                  < -1 => some kind of unexpected problem
   1868 */
   1869 
   1870 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int firstByte, bool firstByteIsCaseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
   1871 {
   1872     // If firstByte is set, try scanning to the first instance of that byte
   1873     // no need to try and match against any earlier part of the subject string.
   1874     if (firstByte >= 0) {
   1875         UChar firstChar = firstByte;
   1876         if (firstByteIsCaseless)
   1877             while (subjectPtr < endSubject) {
   1878                 int c = *subjectPtr;
   1879                 if (c > 127)
   1880                     break;
   1881                 if (toLowerCase(c) == firstChar)
   1882                     break;
   1883                 subjectPtr++;
   1884             }
   1885         else {
   1886             while (subjectPtr < endSubject && *subjectPtr != firstChar)
   1887                 subjectPtr++;
   1888         }
   1889     } else if (useMultiLineFirstCharOptimization) {
   1890         /* Or to just after \n for a multiline match if possible */
   1891         // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
   1892         if (subjectPtr > originalSubjectStart) {
   1893             while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
   1894                 subjectPtr++;
   1895         }
   1896     }
   1897 }
   1898 
   1899 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int reqByte, int reqByte2, bool reqByteIsCaseless, bool hasFirstByte, const UChar*& reqBytePtr)
   1900 {
   1901     /* If reqByte is set, we know that that character must appear in the subject
   1902      for the match to succeed. If the first character is set, reqByte must be
   1903      later in the subject; otherwise the test starts at the match point. This
   1904      optimization can save a huge amount of backtracking in patterns with nested
   1905      unlimited repeats that aren't going to match. Writing separate code for
   1906      cased/caseless versions makes it go faster, as does using an autoincrement
   1907      and backing off on a match.
   1908 
   1909      HOWEVER: when the subject string is very, very long, searching to its end can
   1910      take a long time, and give bad performance on quite ordinary patterns. This
   1911      showed up when somebody was matching /^C/ on a 32-megabyte string... so we
   1912      don't do this when the string is sufficiently long.
   1913     */
   1914 
   1915     if (reqByte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
   1916         const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
   1917 
   1918         /* We don't need to repeat the search if we haven't yet reached the
   1919          place we found it at last time. */
   1920 
   1921         if (p > reqBytePtr) {
   1922             if (reqByteIsCaseless) {
   1923                 while (p < endSubject) {
   1924                     int pp = *p++;
   1925                     if (pp == reqByte || pp == reqByte2) {
   1926                         p--;
   1927                         break;
   1928                     }
   1929                 }
   1930             } else {
   1931                 while (p < endSubject) {
   1932                     if (*p++ == reqByte) {
   1933                         p--;
   1934                         break;
   1935                     }
   1936                 }
   1937             }
   1938 
   1939             /* If we can't find the required character, break the matching loop */
   1940 
   1941             if (p >= endSubject)
   1942                 return true;
   1943 
   1944             /* If we have found the required character, save the point where we
   1945              found it, so that we don't search again next time round the loop if
   1946              the start hasn't passed this character yet. */
   1947 
   1948             reqBytePtr = p;
   1949         }
   1950     }
   1951     return false;
   1952 }
   1953 
   1954 int jsRegExpExecute(const JSRegExp* re,
   1955                     const UChar* subject, int length, int start_offset, int* offsets,
   1956                     int offsetCount)
   1957 {
   1958     ASSERT(re);
   1959     ASSERT(subject || !length);
   1960     ASSERT(offsetCount >= 0);
   1961     ASSERT(offsets || offsetCount == 0);
   1962 
   1963     HistogramTimeLogger logger(re);
   1964 
   1965     MatchData matchBlock;
   1966     matchBlock.startSubject = subject;
   1967     matchBlock.endSubject = matchBlock.startSubject + length;
   1968     const UChar* endSubject = matchBlock.endSubject;
   1969 
   1970     matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
   1971     matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
   1972 
   1973     /* If the expression has got more back references than the offsets supplied can
   1974      hold, we get a temporary chunk of working store to use during the matching.
   1975      Otherwise, we can use the vector supplied, rounding down its size to a multiple
   1976      of 3. */
   1977 
   1978     int ocount = offsetCount - (offsetCount % 3);
   1979 
   1980     // FIXME: This is lame that we have to second-guess our caller here.
   1981     // The API should change to either fail-hard when we don't have enough offset space
   1982     // or that we shouldn't ask our callers to pre-allocate in the first place.
   1983     bool usingTemporaryOffsets = false;
   1984     if (re->topBackref > 0 && re->topBackref >= ocount/3) {
   1985         ocount = re->topBackref * 3 + 3;
   1986         matchBlock.offsetVector = new int[ocount];
   1987         if (!matchBlock.offsetVector)
   1988             return JSRegExpErrorNoMemory;
   1989         usingTemporaryOffsets = true;
   1990     } else
   1991         matchBlock.offsetVector = offsets;
   1992 
   1993     matchBlock.offsetEnd = ocount;
   1994     matchBlock.offsetMax = (2*ocount)/3;
   1995     matchBlock.offsetOverflow = false;
   1996 
   1997     /* Compute the minimum number of offsets that we need to reset each time. Doing
   1998      this makes a huge difference to execution time when there aren't many brackets
   1999      in the pattern. */
   2000 
   2001     int resetCount = 2 + re->topBracket * 2;
   2002     if (resetCount > offsetCount)
   2003         resetCount = ocount;
   2004 
   2005     /* Reset the working variable associated with each extraction. These should
   2006      never be used unless previously set, but they get saved and restored, and so we
   2007      initialize them to avoid reading uninitialized locations. */
   2008 
   2009     if (matchBlock.offsetVector) {
   2010         int* iptr = matchBlock.offsetVector + ocount;
   2011         int* iend = iptr - resetCount/2 + 1;
   2012         while (--iptr >= iend)
   2013             *iptr = -1;
   2014     }
   2015 
   2016     /* Set up the first character to match, if available. The firstByte value is
   2017      never set for an anchored regular expression, but the anchoring may be forced
   2018      at run time, so we have to test for anchoring. The first char may be unset for
   2019      an unanchored pattern, of course. If there's no first char and the pattern was
   2020      studied, there may be a bitmap of possible first characters. */
   2021 
   2022     bool firstByteIsCaseless = false;
   2023     int firstByte = -1;
   2024     if (re->options & UseFirstByteOptimizationOption) {
   2025         firstByte = re->firstByte & 255;
   2026         if ((firstByteIsCaseless = (re->firstByte & REQ_IGNORE_CASE)))
   2027             firstByte = toLowerCase(firstByte);
   2028     }
   2029 
   2030     /* For anchored or unanchored matches, there may be a "last known required
   2031      character" set. */
   2032 
   2033     bool reqByteIsCaseless = false;
   2034     int reqByte = -1;
   2035     int reqByte2 = -1;
   2036     if (re->options & UseRequiredByteOptimizationOption) {
   2037         reqByte = re->reqByte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
   2038         reqByteIsCaseless = (re->reqByte & REQ_IGNORE_CASE);
   2039         reqByte2 = flipCase(reqByte);
   2040     }
   2041 
   2042     /* Loop for handling unanchored repeated matching attempts; for anchored regexs
   2043      the loop runs just once. */
   2044 
   2045     const UChar* startMatch = subject + start_offset;
   2046     const UChar* reqBytePtr = startMatch - 1;
   2047     bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
   2048 
   2049     do {
   2050         /* Reset the maximum number of extractions we might see. */
   2051         if (matchBlock.offsetVector) {
   2052             int* iptr = matchBlock.offsetVector;
   2053             int* iend = iptr + resetCount;
   2054             while (iptr < iend)
   2055                 *iptr++ = -1;
   2056         }
   2057 
   2058         tryFirstByteOptimization(startMatch, endSubject, firstByte, firstByteIsCaseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
   2059         if (tryRequiredByteOptimization(startMatch, endSubject, reqByte, reqByte2, reqByteIsCaseless, firstByte >= 0, reqBytePtr))
   2060             break;
   2061 
   2062         /* When a match occurs, substrings will be set for all internal extractions;
   2063          we just need to set up the whole thing as substring 0 before returning. If
   2064          there were too many extractions, set the return code to zero. In the case
   2065          where we had to get some local store to hold offsets for backreferences, copy
   2066          those back references that we can. In this case there need not be overflow
   2067          if certain parts of the pattern were not used. */
   2068 
   2069         /* The code starts after the JSRegExp block and the capture name table. */
   2070         const unsigned char* start_code = (const unsigned char*)(re + 1);
   2071 
   2072         int returnCode = match(startMatch, start_code, 2, matchBlock);
   2073 
   2074         /* When the result is no match, advance the pointer to the next character
   2075          and continue. */
   2076         if (returnCode == 0) {
   2077             startMatch++;
   2078             continue;
   2079         }
   2080 
   2081         if (returnCode != 1) {
   2082             ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExpErrorNoMemory);
   2083             DPRINTF((">>>> error: returning %d\n", returnCode));
   2084             return returnCode;
   2085         }
   2086 
   2087         /* We have a match! Copy the offset information from temporary store if
   2088          necessary */
   2089 
   2090         if (usingTemporaryOffsets) {
   2091             if (offsetCount >= 4) {
   2092                 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetCount - 2) * sizeof(int));
   2093                 DPRINTF(("Copied offsets from temporary memory\n"));
   2094             }
   2095             if (matchBlock.endOffsetTop > offsetCount)
   2096                 matchBlock.offsetOverflow = true;
   2097 
   2098             DPRINTF(("Freeing temporary memory\n"));
   2099             delete [] matchBlock.offsetVector;
   2100         }
   2101 
   2102         returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
   2103 
   2104         if (offsetCount < 2)
   2105             returnCode = 0;
   2106         else {
   2107             offsets[0] = startMatch - matchBlock.startSubject;
   2108             offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
   2109         }
   2110 
   2111         DPRINTF((">>>> returning %d\n", returnCode));
   2112         return returnCode;
   2113     } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
   2114 
   2115     if (usingTemporaryOffsets) {
   2116         DPRINTF(("Freeing temporary memory\n"));
   2117         delete [] matchBlock.offsetVector;
   2118     }
   2119 
   2120     DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
   2121     return JSRegExpErrorNoMatch;
   2122 }
   2123 
   2124 #if REGEXP_HISTOGRAM
   2125 
   2126 class CompareHistogramEntries {
   2127 public:
   2128     bool operator()(const pair<UString, double>& a, const pair<UString, double>& b)
   2129     {
   2130         if (a.second == b.second)
   2131             return a.first < b.first;
   2132         return a.second < b.second;
   2133     }
   2134 };
   2135 
   2136 Histogram::~Histogram()
   2137 {
   2138     Vector<pair<UString, double> > values;
   2139     Map::iterator end = times.end();
   2140     for (Map::iterator it = times.begin(); it != end; ++it)
   2141         values.append(*it);
   2142     sort(values.begin(), values.end(), CompareHistogramEntries());
   2143     size_t size = values.size();
   2144     printf("Regular Expressions, sorted by time spent evaluating them:\n");
   2145     for (size_t i = 0; i < size; ++i)
   2146         printf("    %f - %s\n", values[size - i - 1].second, values[size - i - 1].first.UTF8String().c_str());
   2147 }
   2148 
   2149 void Histogram::add(const JSRegExp* re, double elapsedTime)
   2150 {
   2151     UString string(reinterpret_cast<const UChar*>(reinterpret_cast<const char*>(re) + re->stringOffset), re->stringLength);
   2152     if (re->options & IgnoreCaseOption && re->options & MatchAcrossMultipleLinesOption)
   2153         string += " (multi-line, ignore case)";
   2154     else {
   2155         if (re->options & IgnoreCaseOption)
   2156             string += " (ignore case)";
   2157         if (re->options & MatchAcrossMultipleLinesOption)
   2158             string += " (multi-line)";
   2159     }
   2160     pair<Map::iterator, bool> result = times.add(string.rep(), elapsedTime);
   2161     if (!result.second)
   2162         result.first->second += elapsedTime;
   2163 }
   2164 
   2165 HistogramTimeLogger::HistogramTimeLogger(const JSRegExp* re)
   2166     : m_re(re)
   2167     , m_startTime(currentTimeMS())
   2168 {
   2169 }
   2170 
   2171 HistogramTimeLogger::~HistogramTimeLogger()
   2172 {
   2173     static Histogram histogram;
   2174     histogram.add(m_re, currentTimeMS() - m_startTime);
   2175 }
   2176 
   2177 #endif
   2178