Home | History | Annotate | Download | only in yarr
      1 /*
      2  * Copyright (C) 2009 Apple Inc. All rights reserved.
      3  * Copyright (C) 2010 Peter Varga (pvarga (at) inf.u-szeged.hu), University of Szeged
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "YarrInterpreter.h"
     29 
     30 #include "Yarr.h"
     31 #include <wtf/BumpPointerAllocator.h>
     32 
     33 #ifndef NDEBUG
     34 #include <stdio.h>
     35 #endif
     36 
     37 using namespace WTF;
     38 
     39 namespace JSC { namespace Yarr {
     40 
     41 class Interpreter {
     42 public:
     43     struct ParenthesesDisjunctionContext;
     44 
     45     struct BackTrackInfoPatternCharacter {
     46         uintptr_t matchAmount;
     47     };
     48     struct BackTrackInfoCharacterClass {
     49         uintptr_t matchAmount;
     50     };
     51     struct BackTrackInfoBackReference {
     52         uintptr_t begin; // Not really needed for greedy quantifiers.
     53         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
     54     };
     55     struct BackTrackInfoAlternative {
     56         uintptr_t offset;
     57     };
     58     struct BackTrackInfoParentheticalAssertion {
     59         uintptr_t begin;
     60     };
     61     struct BackTrackInfoParenthesesOnce {
     62         uintptr_t begin;
     63     };
     64     struct BackTrackInfoParenthesesTerminal {
     65         uintptr_t begin;
     66     };
     67     struct BackTrackInfoParentheses {
     68         uintptr_t matchAmount;
     69         ParenthesesDisjunctionContext* lastContext;
     70     };
     71 
     72     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
     73     {
     74         context->next = backTrack->lastContext;
     75         backTrack->lastContext = context;
     76         ++backTrack->matchAmount;
     77     }
     78 
     79     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
     80     {
     81         ASSERT(backTrack->matchAmount);
     82         ASSERT(backTrack->lastContext);
     83         backTrack->lastContext = backTrack->lastContext->next;
     84         --backTrack->matchAmount;
     85     }
     86 
     87     struct DisjunctionContext
     88     {
     89         DisjunctionContext()
     90             : term(0)
     91         {
     92         }
     93 
     94         void* operator new(size_t, void* where)
     95         {
     96             return where;
     97         }
     98 
     99         int term;
    100         unsigned matchBegin;
    101         unsigned matchEnd;
    102         uintptr_t frame[1];
    103     };
    104 
    105     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
    106     {
    107         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
    108         allocatorPool = allocatorPool->ensureCapacity(size);
    109         if (!allocatorPool)
    110             CRASH();
    111         return new(allocatorPool->alloc(size)) DisjunctionContext();
    112     }
    113 
    114     void freeDisjunctionContext(DisjunctionContext* context)
    115     {
    116         allocatorPool = allocatorPool->dealloc(context);
    117     }
    118 
    119     struct ParenthesesDisjunctionContext
    120     {
    121         ParenthesesDisjunctionContext(int* output, ByteTerm& term)
    122             : next(0)
    123         {
    124             unsigned firstSubpatternId = term.atom.subpatternId;
    125             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
    126 
    127             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
    128                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
    129                 output[(firstSubpatternId << 1) + i] = -1;
    130             }
    131 
    132             new(getDisjunctionContext(term)) DisjunctionContext();
    133         }
    134 
    135         void* operator new(size_t, void* where)
    136         {
    137             return where;
    138         }
    139 
    140         void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
    141         {
    142             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
    143                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
    144         }
    145 
    146         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
    147         {
    148             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
    149         }
    150 
    151         ParenthesesDisjunctionContext* next;
    152         int subpatternBackup[1];
    153     };
    154 
    155     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
    156     {
    157         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
    158         allocatorPool = allocatorPool->ensureCapacity(size);
    159         if (!allocatorPool)
    160             CRASH();
    161         return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
    162     }
    163 
    164     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
    165     {
    166         allocatorPool = allocatorPool->dealloc(context);
    167     }
    168 
    169     class InputStream {
    170     public:
    171         InputStream(const UChar* input, unsigned start, unsigned length)
    172             : input(input)
    173             , pos(start)
    174             , length(length)
    175         {
    176         }
    177 
    178         void next()
    179         {
    180             ++pos;
    181         }
    182 
    183         void rewind(unsigned amount)
    184         {
    185             ASSERT(pos >= amount);
    186             pos -= amount;
    187         }
    188 
    189         int read()
    190         {
    191             ASSERT(pos < length);
    192             if (pos < length)
    193                 return input[pos];
    194             return -1;
    195         }
    196 
    197         int readPair()
    198         {
    199             ASSERT(pos + 1 < length);
    200             return input[pos] | input[pos + 1] << 16;
    201         }
    202 
    203         int readChecked(int position)
    204         {
    205             ASSERT(position < 0);
    206             ASSERT(static_cast<unsigned>(-position) <= pos);
    207             unsigned p = pos + position;
    208             ASSERT(p < length);
    209             return input[p];
    210         }
    211 
    212         int reread(unsigned from)
    213         {
    214             ASSERT(from < length);
    215             return input[from];
    216         }
    217 
    218         int prev()
    219         {
    220             ASSERT(!(pos > length));
    221             if (pos && length)
    222                 return input[pos - 1];
    223             return -1;
    224         }
    225 
    226         unsigned getPos()
    227         {
    228             return pos;
    229         }
    230 
    231         void setPos(unsigned p)
    232         {
    233             pos = p;
    234         }
    235 
    236         bool atStart()
    237         {
    238             return pos == 0;
    239         }
    240 
    241         bool atEnd()
    242         {
    243             return pos == length;
    244         }
    245 
    246         bool checkInput(int count)
    247         {
    248             if ((pos + count) <= length) {
    249                 pos += count;
    250                 return true;
    251             }
    252             return false;
    253         }
    254 
    255         void uncheckInput(int count)
    256         {
    257             pos -= count;
    258         }
    259 
    260         bool atStart(int position)
    261         {
    262             return (pos + position) == 0;
    263         }
    264 
    265         bool atEnd(int position)
    266         {
    267             return (pos + position) == length;
    268         }
    269 
    270         bool isNotAvailableInput(int position)
    271         {
    272             return (pos + position) > length;
    273         }
    274 
    275     private:
    276         const UChar* input;
    277         unsigned pos;
    278         unsigned length;
    279     };
    280 
    281     bool testCharacterClass(CharacterClass* characterClass, int ch)
    282     {
    283         if (ch & 0xFF80) {
    284             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
    285                 if (ch == characterClass->m_matchesUnicode[i])
    286                     return true;
    287             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
    288                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
    289                     return true;
    290         } else {
    291             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
    292                 if (ch == characterClass->m_matches[i])
    293                     return true;
    294             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
    295                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
    296                     return true;
    297         }
    298 
    299         return false;
    300     }
    301 
    302     bool checkCharacter(int testChar, int inputPosition)
    303     {
    304         return testChar == input.readChecked(inputPosition);
    305     }
    306 
    307     bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
    308     {
    309         int ch = input.readChecked(inputPosition);
    310         return (loChar == ch) || (hiChar == ch);
    311     }
    312 
    313     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
    314     {
    315         bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
    316         return invert ? !match : match;
    317     }
    318 
    319     bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
    320     {
    321         int matchSize = matchEnd - matchBegin;
    322 
    323         if (!input.checkInput(matchSize))
    324             return false;
    325 
    326         if (pattern->m_ignoreCase) {
    327             for (int i = 0; i < matchSize; ++i) {
    328                 int ch = input.reread(matchBegin + i);
    329 
    330                 int lo = Unicode::toLower(ch);
    331                 int hi = Unicode::toUpper(ch);
    332 
    333                 if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
    334                     input.uncheckInput(matchSize);
    335                     return false;
    336                 }
    337             }
    338         } else {
    339             for (int i = 0; i < matchSize; ++i) {
    340                 if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
    341                     input.uncheckInput(matchSize);
    342                     return false;
    343                 }
    344             }
    345         }
    346 
    347         return true;
    348     }
    349 
    350     bool matchAssertionBOL(ByteTerm& term)
    351     {
    352         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
    353     }
    354 
    355     bool matchAssertionEOL(ByteTerm& term)
    356     {
    357         if (term.inputPosition)
    358             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
    359 
    360         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
    361     }
    362 
    363     bool matchAssertionWordBoundary(ByteTerm& term)
    364     {
    365         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
    366         bool readIsWordchar;
    367         if (term.inputPosition)
    368             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
    369         else
    370             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
    371 
    372         bool wordBoundary = prevIsWordchar != readIsWordchar;
    373         return term.invert() ? !wordBoundary : wordBoundary;
    374     }
    375 
    376     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
    377     {
    378         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    379 
    380         switch (term.atom.quantityType) {
    381         case QuantifierFixedCount:
    382             break;
    383 
    384         case QuantifierGreedy:
    385             if (backTrack->matchAmount) {
    386                 --backTrack->matchAmount;
    387                 input.uncheckInput(1);
    388                 return true;
    389             }
    390             break;
    391 
    392         case QuantifierNonGreedy:
    393             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
    394                 ++backTrack->matchAmount;
    395                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
    396                     return true;
    397             }
    398             input.uncheckInput(backTrack->matchAmount);
    399             break;
    400         }
    401 
    402         return false;
    403     }
    404 
    405     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
    406     {
    407         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    408 
    409         switch (term.atom.quantityType) {
    410         case QuantifierFixedCount:
    411             break;
    412 
    413         case QuantifierGreedy:
    414             if (backTrack->matchAmount) {
    415                 --backTrack->matchAmount;
    416                 input.uncheckInput(1);
    417                 return true;
    418             }
    419             break;
    420 
    421         case QuantifierNonGreedy:
    422             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
    423                 ++backTrack->matchAmount;
    424                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
    425                     return true;
    426             }
    427             input.uncheckInput(backTrack->matchAmount);
    428             break;
    429         }
    430 
    431         return false;
    432     }
    433 
    434     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
    435     {
    436         ASSERT(term.type == ByteTerm::TypeCharacterClass);
    437         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    438 
    439         switch (term.atom.quantityType) {
    440         case QuantifierFixedCount: {
    441             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
    442                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
    443                     return false;
    444             }
    445             return true;
    446         }
    447 
    448         case QuantifierGreedy: {
    449             unsigned matchAmount = 0;
    450             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
    451                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
    452                     input.uncheckInput(1);
    453                     break;
    454                 }
    455                 ++matchAmount;
    456             }
    457             backTrack->matchAmount = matchAmount;
    458 
    459             return true;
    460         }
    461 
    462         case QuantifierNonGreedy:
    463             backTrack->matchAmount = 0;
    464             return true;
    465         }
    466 
    467         ASSERT_NOT_REACHED();
    468         return false;
    469     }
    470 
    471     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
    472     {
    473         ASSERT(term.type == ByteTerm::TypeCharacterClass);
    474         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
    475 
    476         switch (term.atom.quantityType) {
    477         case QuantifierFixedCount:
    478             break;
    479 
    480         case QuantifierGreedy:
    481             if (backTrack->matchAmount) {
    482                 --backTrack->matchAmount;
    483                 input.uncheckInput(1);
    484                 return true;
    485             }
    486             break;
    487 
    488         case QuantifierNonGreedy:
    489             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
    490                 ++backTrack->matchAmount;
    491                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
    492                     return true;
    493             }
    494             input.uncheckInput(backTrack->matchAmount);
    495             break;
    496         }
    497 
    498         return false;
    499     }
    500 
    501     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
    502     {
    503         ASSERT(term.type == ByteTerm::TypeBackReference);
    504         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
    505 
    506         int matchBegin = output[(term.atom.subpatternId << 1)];
    507         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
    508 
    509         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
    510         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
    511         // Eg.: /(a\1)/
    512         if (matchEnd == -1)
    513             return true;
    514 
    515         ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
    516 
    517         if (matchBegin == matchEnd)
    518             return true;
    519 
    520         switch (term.atom.quantityType) {
    521         case QuantifierFixedCount: {
    522             backTrack->begin = input.getPos();
    523             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
    524                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
    525                     input.setPos(backTrack->begin);
    526                     return false;
    527                 }
    528             }
    529             return true;
    530         }
    531 
    532         case QuantifierGreedy: {
    533             unsigned matchAmount = 0;
    534             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
    535                 ++matchAmount;
    536             backTrack->matchAmount = matchAmount;
    537             return true;
    538         }
    539 
    540         case QuantifierNonGreedy:
    541             backTrack->begin = input.getPos();
    542             backTrack->matchAmount = 0;
    543             return true;
    544         }
    545 
    546         ASSERT_NOT_REACHED();
    547         return false;
    548     }
    549 
    550     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
    551     {
    552         ASSERT(term.type == ByteTerm::TypeBackReference);
    553         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
    554 
    555         int matchBegin = output[(term.atom.subpatternId << 1)];
    556         int matchEnd = output[(term.atom.subpatternId << 1) + 1];
    557         ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
    558 
    559         if (matchBegin == matchEnd)
    560             return false;
    561 
    562         switch (term.atom.quantityType) {
    563         case QuantifierFixedCount:
    564             // for quantityCount == 1, could rewind.
    565             input.setPos(backTrack->begin);
    566             break;
    567 
    568         case QuantifierGreedy:
    569             if (backTrack->matchAmount) {
    570                 --backTrack->matchAmount;
    571                 input.rewind(matchEnd - matchBegin);
    572                 return true;
    573             }
    574             break;
    575 
    576         case QuantifierNonGreedy:
    577             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
    578                 ++backTrack->matchAmount;
    579                 return true;
    580             }
    581             input.setPos(backTrack->begin);
    582             break;
    583         }
    584 
    585         return false;
    586     }
    587 
    588     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
    589     {
    590         if (term.capture()) {
    591             unsigned subpatternId = term.atom.subpatternId;
    592             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
    593             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
    594         }
    595     }
    596     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
    597     {
    598         unsigned firstSubpatternId = term.atom.subpatternId;
    599         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
    600         context->restoreOutput(output, firstSubpatternId, count);
    601     }
    602     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
    603     {
    604         while (backTrack->matchAmount) {
    605             ParenthesesDisjunctionContext* context = backTrack->lastContext;
    606 
    607             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
    608             if (result == JSRegExpMatch)
    609                 return JSRegExpMatch;
    610 
    611             resetMatches(term, context);
    612             popParenthesesDisjunctionContext(backTrack);
    613             freeParenthesesDisjunctionContext(context);
    614 
    615             if (result != JSRegExpNoMatch)
    616                 return result;
    617         }
    618 
    619         return JSRegExpNoMatch;
    620     }
    621 
    622     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
    623     {
    624         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
    625         ASSERT(term.atom.quantityCount == 1);
    626 
    627         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
    628 
    629         switch (term.atom.quantityType) {
    630         case QuantifierGreedy: {
    631             // set this speculatively; if we get to the parens end this will be true.
    632             backTrack->begin = input.getPos();
    633             break;
    634         }
    635         case QuantifierNonGreedy: {
    636             backTrack->begin = notFound;
    637             context->term += term.atom.parenthesesWidth;
    638             return true;
    639         }
    640         case QuantifierFixedCount:
    641             break;
    642         }
    643 
    644         if (term.capture()) {
    645             unsigned subpatternId = term.atom.subpatternId;
    646             output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
    647         }
    648 
    649         return true;
    650     }
    651 
    652     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
    653     {
    654         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
    655         ASSERT(term.atom.quantityCount == 1);
    656 
    657         if (term.capture()) {
    658             unsigned subpatternId = term.atom.subpatternId;
    659             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
    660         }
    661 
    662         if (term.atom.quantityType == QuantifierFixedCount)
    663             return true;
    664 
    665         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
    666         return backTrack->begin != input.getPos();
    667     }
    668 
    669     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
    670     {
    671         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
    672         ASSERT(term.atom.quantityCount == 1);
    673 
    674         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
    675 
    676         if (term.capture()) {
    677             unsigned subpatternId = term.atom.subpatternId;
    678             output[(subpatternId << 1)] = -1;
    679             output[(subpatternId << 1) + 1] = -1;
    680         }
    681 
    682         switch (term.atom.quantityType) {
    683         case QuantifierGreedy:
    684             // if we backtrack to this point, there is another chance - try matching nothing.
    685             ASSERT(backTrack->begin != notFound);
    686             backTrack->begin = notFound;
    687             context->term += term.atom.parenthesesWidth;
    688             return true;
    689         case QuantifierNonGreedy:
    690             ASSERT(backTrack->begin != notFound);
    691         case QuantifierFixedCount:
    692             break;
    693         }
    694 
    695         return false;
    696     }
    697 
    698     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
    699     {
    700         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
    701         ASSERT(term.atom.quantityCount == 1);
    702 
    703         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
    704 
    705         switch (term.atom.quantityType) {
    706         case QuantifierGreedy:
    707             if (backTrack->begin == notFound) {
    708                 context->term -= term.atom.parenthesesWidth;
    709                 return false;
    710             }
    711         case QuantifierNonGreedy:
    712             if (backTrack->begin == notFound) {
    713                 backTrack->begin = input.getPos();
    714                 if (term.capture()) {
    715                     // Technically this access to inputPosition should be accessing the begin term's
    716                     // inputPosition, but for repeats other than fixed these values should be
    717                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
    718                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
    719                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
    720                     unsigned subpatternId = term.atom.subpatternId;
    721                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
    722                 }
    723                 context->term -= term.atom.parenthesesWidth;
    724                 return true;
    725             }
    726         case QuantifierFixedCount:
    727             break;
    728         }
    729 
    730         return false;
    731     }
    732 
    733     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
    734     {
    735         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
    736         ASSERT(term.atom.quantityType == QuantifierGreedy);
    737         ASSERT(term.atom.quantityCount == quantifyInfinite);
    738         ASSERT(!term.capture());
    739 
    740         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
    741         backTrack->begin = input.getPos();
    742         return true;
    743     }
    744 
    745     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
    746     {
    747         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
    748 
    749         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
    750         // Empty match is a failed match.
    751         if (backTrack->begin == input.getPos())
    752             return false;
    753 
    754         // Successful match! Okay, what's next? - loop around and try to match moar!
    755         context->term -= (term.atom.parenthesesWidth + 1);
    756         return true;
    757     }
    758 
    759     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
    760     {
    761         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
    762         ASSERT(term.atom.quantityType == QuantifierGreedy);
    763         ASSERT(term.atom.quantityCount == quantifyInfinite);
    764         ASSERT(!term.capture());
    765 
    766         // If we backtrack to this point, we have failed to match this iteration of the parens.
    767         // Since this is greedy / zero minimum a failed is also accepted as a match!
    768         context->term += term.atom.parenthesesWidth;
    769         return true;
    770     }
    771 
    772     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
    773     {
    774         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
    775         // should always be returned as a successful match - we should never backtrack to here.
    776         ASSERT_NOT_REACHED();
    777         return false;
    778     }
    779 
    780     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
    781     {
    782         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
    783         ASSERT(term.atom.quantityCount == 1);
    784 
    785         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    786 
    787         backTrack->begin = input.getPos();
    788         return true;
    789     }
    790 
    791     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
    792     {
    793         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
    794         ASSERT(term.atom.quantityCount == 1);
    795 
    796         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    797 
    798         input.setPos(backTrack->begin);
    799 
    800         // We've reached the end of the parens; if they are inverted, this is failure.
    801         if (term.invert()) {
    802             context->term -= term.atom.parenthesesWidth;
    803             return false;
    804         }
    805 
    806         return true;
    807     }
    808 
    809     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
    810     {
    811         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
    812         ASSERT(term.atom.quantityCount == 1);
    813 
    814         // We've failed to match parens; if they are inverted, this is win!
    815         if (term.invert()) {
    816             context->term += term.atom.parenthesesWidth;
    817             return true;
    818         }
    819 
    820         return false;
    821     }
    822 
    823     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
    824     {
    825         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
    826         ASSERT(term.atom.quantityCount == 1);
    827 
    828         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
    829 
    830         input.setPos(backTrack->begin);
    831 
    832         context->term -= term.atom.parenthesesWidth;
    833         return false;
    834     }
    835 
    836     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
    837     {
    838         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
    839 
    840         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
    841         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
    842 
    843         backTrack->matchAmount = 0;
    844         backTrack->lastContext = 0;
    845 
    846         switch (term.atom.quantityType) {
    847         case QuantifierFixedCount: {
    848             // While we haven't yet reached our fixed limit,
    849             while (backTrack->matchAmount < term.atom.quantityCount) {
    850                 // Try to do a match, and it it succeeds, add it to the list.
    851                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
    852                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
    853                 if (result == JSRegExpMatch)
    854                     appendParenthesesDisjunctionContext(backTrack, context);
    855                 else {
    856                     // The match failed; try to find an alternate point to carry on from.
    857                     resetMatches(term, context);
    858                     freeParenthesesDisjunctionContext(context);
    859 
    860                     if (result == JSRegExpNoMatch) {
    861                         JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
    862                         if (backtrackResult != JSRegExpMatch)
    863                             return backtrackResult;
    864                     } else
    865                         return result;
    866                 }
    867             }
    868 
    869             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
    870             ParenthesesDisjunctionContext* context = backTrack->lastContext;
    871             recordParenthesesMatch(term, context);
    872             return JSRegExpMatch;
    873         }
    874 
    875         case QuantifierGreedy: {
    876             while (backTrack->matchAmount < term.atom.quantityCount) {
    877                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
    878                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
    879                 if (result == JSRegExpMatch)
    880                     appendParenthesesDisjunctionContext(backTrack, context);
    881                 else {
    882                     resetMatches(term, context);
    883                     freeParenthesesDisjunctionContext(context);
    884 
    885                     if (result != JSRegExpNoMatch)
    886                         return result;
    887 
    888                     break;
    889                 }
    890             }
    891 
    892             if (backTrack->matchAmount) {
    893                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
    894                 recordParenthesesMatch(term, context);
    895             }
    896             return JSRegExpMatch;
    897         }
    898 
    899         case QuantifierNonGreedy:
    900             return JSRegExpMatch;
    901         }
    902 
    903         ASSERT_NOT_REACHED();
    904         return JSRegExpErrorNoMatch;
    905     }
    906 
    907     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
    908     //
    909     // Greedy matches never should try just adding more - you should already have done
    910     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
    911     // you backtrack an item off the list needs checking, since we'll never have matched
    912     // the one less case.  Tracking forwards, still add as much as possible.
    913     //
    914     // Non-greedy, we've already done the one less case, so don't match on popping.
    915     // We haven't done the one more case, so always try to add that.
    916     //
    917     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
    918     {
    919         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
    920 
    921         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
    922         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
    923 
    924         switch (term.atom.quantityType) {
    925         case QuantifierFixedCount: {
    926             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
    927 
    928             ParenthesesDisjunctionContext* context = 0;
    929             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
    930 
    931             if (result != JSRegExpMatch)
    932                 return result;
    933 
    934             // While we haven't yet reached our fixed limit,
    935             while (backTrack->matchAmount < term.atom.quantityCount) {
    936                 // Try to do a match, and it it succeeds, add it to the list.
    937                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
    938                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
    939 
    940                 if (result == JSRegExpMatch)
    941                     appendParenthesesDisjunctionContext(backTrack, context);
    942                 else {
    943                     // The match failed; try to find an alternate point to carry on from.
    944                     resetMatches(term, context);
    945                     freeParenthesesDisjunctionContext(context);
    946 
    947                     if (result == JSRegExpNoMatch) {
    948                         JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
    949                         if (backtrackResult != JSRegExpMatch)
    950                             return backtrackResult;
    951                     } else
    952                         return result;
    953                 }
    954             }
    955 
    956             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
    957             context = backTrack->lastContext;
    958             recordParenthesesMatch(term, context);
    959             return JSRegExpMatch;
    960         }
    961 
    962         case QuantifierGreedy: {
    963             if (!backTrack->matchAmount)
    964                 return JSRegExpNoMatch;
    965 
    966             ParenthesesDisjunctionContext* context = backTrack->lastContext;
    967             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
    968             if (result == JSRegExpMatch) {
    969                 while (backTrack->matchAmount < term.atom.quantityCount) {
    970                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
    971                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
    972                     if (parenthesesResult == JSRegExpMatch)
    973                         appendParenthesesDisjunctionContext(backTrack, context);
    974                     else {
    975                         resetMatches(term, context);
    976                         freeParenthesesDisjunctionContext(context);
    977 
    978                         if (parenthesesResult != JSRegExpNoMatch)
    979                             return parenthesesResult;
    980 
    981                         break;
    982                     }
    983                 }
    984             } else {
    985                 resetMatches(term, context);
    986                 popParenthesesDisjunctionContext(backTrack);
    987                 freeParenthesesDisjunctionContext(context);
    988 
    989                 if (result != JSRegExpNoMatch)
    990                     return result;
    991             }
    992 
    993             if (backTrack->matchAmount) {
    994                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
    995                 recordParenthesesMatch(term, context);
    996             }
    997             return JSRegExpMatch;
    998         }
    999 
   1000         case QuantifierNonGreedy: {
   1001             // If we've not reached the limit, try to add one more match.
   1002             if (backTrack->matchAmount < term.atom.quantityCount) {
   1003                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   1004                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   1005                 if (result == JSRegExpMatch) {
   1006                     appendParenthesesDisjunctionContext(backTrack, context);
   1007                     recordParenthesesMatch(term, context);
   1008                     return JSRegExpMatch;
   1009                 }
   1010 
   1011                 resetMatches(term, context);
   1012                 freeParenthesesDisjunctionContext(context);
   1013 
   1014                 if (result != JSRegExpNoMatch)
   1015                     return result;
   1016             }
   1017 
   1018             // Nope - okay backtrack looking for an alternative.
   1019             while (backTrack->matchAmount) {
   1020                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
   1021                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
   1022                 if (result == JSRegExpMatch) {
   1023                     // successful backtrack! we're back in the game!
   1024                     if (backTrack->matchAmount) {
   1025                         context = backTrack->lastContext;
   1026                         recordParenthesesMatch(term, context);
   1027                     }
   1028                     return JSRegExpMatch;
   1029                 }
   1030 
   1031                 // pop a match off the stack
   1032                 resetMatches(term, context);
   1033                 popParenthesesDisjunctionContext(backTrack);
   1034                 freeParenthesesDisjunctionContext(context);
   1035 
   1036                 return result;
   1037             }
   1038 
   1039             return JSRegExpNoMatch;
   1040         }
   1041         }
   1042 
   1043         ASSERT_NOT_REACHED();
   1044         return JSRegExpErrorNoMatch;
   1045     }
   1046 
   1047     void lookupForBeginChars()
   1048     {
   1049         int character;
   1050         bool firstSingleCharFound;
   1051 
   1052         while (true) {
   1053             if (input.isNotAvailableInput(2))
   1054                 return;
   1055 
   1056             firstSingleCharFound = false;
   1057 
   1058             character = input.readPair();
   1059 
   1060             for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
   1061                 BeginChar bc = pattern->m_beginChars[i];
   1062 
   1063                 if (!firstSingleCharFound && bc.value <= 0xFFFF) {
   1064                     firstSingleCharFound = true;
   1065                     character &= 0xFFFF;
   1066                 }
   1067 
   1068                 if ((character | bc.mask) == bc.value)
   1069                     return;
   1070             }
   1071 
   1072             input.next();
   1073         }
   1074     }
   1075 
   1076 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
   1077 #define BACKTRACK() { --context->term; goto backtrack; }
   1078 #define currentTerm() (disjunction->terms[context->term])
   1079     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
   1080     {
   1081         if (!--remainingMatchCount)
   1082             return JSRegExpErrorHitLimit;
   1083 
   1084         if (btrack)
   1085             BACKTRACK();
   1086 
   1087         if (pattern->m_containsBeginChars && isBody)
   1088             lookupForBeginChars();
   1089 
   1090         context->matchBegin = input.getPos();
   1091         context->term = 0;
   1092 
   1093     matchAgain:
   1094         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
   1095 
   1096         switch (currentTerm().type) {
   1097         case ByteTerm::TypeSubpatternBegin:
   1098             MATCH_NEXT();
   1099         case ByteTerm::TypeSubpatternEnd:
   1100             context->matchEnd = input.getPos();
   1101             return JSRegExpMatch;
   1102 
   1103         case ByteTerm::TypeBodyAlternativeBegin:
   1104             MATCH_NEXT();
   1105         case ByteTerm::TypeBodyAlternativeDisjunction:
   1106         case ByteTerm::TypeBodyAlternativeEnd:
   1107             context->matchEnd = input.getPos();
   1108             return JSRegExpMatch;
   1109 
   1110         case ByteTerm::TypeAlternativeBegin:
   1111             MATCH_NEXT();
   1112         case ByteTerm::TypeAlternativeDisjunction:
   1113         case ByteTerm::TypeAlternativeEnd: {
   1114             int offset = currentTerm().alternative.end;
   1115             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
   1116             backTrack->offset = offset;
   1117             context->term += offset;
   1118             MATCH_NEXT();
   1119         }
   1120 
   1121         case ByteTerm::TypeAssertionBOL:
   1122             if (matchAssertionBOL(currentTerm()))
   1123                 MATCH_NEXT();
   1124             BACKTRACK();
   1125         case ByteTerm::TypeAssertionEOL:
   1126             if (matchAssertionEOL(currentTerm()))
   1127                 MATCH_NEXT();
   1128             BACKTRACK();
   1129         case ByteTerm::TypeAssertionWordBoundary:
   1130             if (matchAssertionWordBoundary(currentTerm()))
   1131                 MATCH_NEXT();
   1132             BACKTRACK();
   1133 
   1134         case ByteTerm::TypePatternCharacterOnce:
   1135         case ByteTerm::TypePatternCharacterFixed: {
   1136             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
   1137                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
   1138                     BACKTRACK();
   1139             }
   1140             MATCH_NEXT();
   1141         }
   1142         case ByteTerm::TypePatternCharacterGreedy: {
   1143             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
   1144             unsigned matchAmount = 0;
   1145             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
   1146                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
   1147                     input.uncheckInput(1);
   1148                     break;
   1149                 }
   1150                 ++matchAmount;
   1151             }
   1152             backTrack->matchAmount = matchAmount;
   1153 
   1154             MATCH_NEXT();
   1155         }
   1156         case ByteTerm::TypePatternCharacterNonGreedy: {
   1157             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
   1158             backTrack->matchAmount = 0;
   1159             MATCH_NEXT();
   1160         }
   1161 
   1162         case ByteTerm::TypePatternCasedCharacterOnce:
   1163         case ByteTerm::TypePatternCasedCharacterFixed: {
   1164             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
   1165                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
   1166                     BACKTRACK();
   1167             }
   1168             MATCH_NEXT();
   1169         }
   1170         case ByteTerm::TypePatternCasedCharacterGreedy: {
   1171             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
   1172             unsigned matchAmount = 0;
   1173             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
   1174                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
   1175                     input.uncheckInput(1);
   1176                     break;
   1177                 }
   1178                 ++matchAmount;
   1179             }
   1180             backTrack->matchAmount = matchAmount;
   1181 
   1182             MATCH_NEXT();
   1183         }
   1184         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
   1185             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
   1186             backTrack->matchAmount = 0;
   1187             MATCH_NEXT();
   1188         }
   1189 
   1190         case ByteTerm::TypeCharacterClass:
   1191             if (matchCharacterClass(currentTerm(), context))
   1192                 MATCH_NEXT();
   1193             BACKTRACK();
   1194         case ByteTerm::TypeBackReference:
   1195             if (matchBackReference(currentTerm(), context))
   1196                 MATCH_NEXT();
   1197             BACKTRACK();
   1198         case ByteTerm::TypeParenthesesSubpattern: {
   1199             JSRegExpResult result = matchParentheses(currentTerm(), context);
   1200 
   1201             if (result == JSRegExpMatch) {
   1202                 MATCH_NEXT();
   1203             }  else if (result != JSRegExpNoMatch)
   1204                 return result;
   1205 
   1206             BACKTRACK();
   1207         }
   1208         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
   1209             if (matchParenthesesOnceBegin(currentTerm(), context))
   1210                 MATCH_NEXT();
   1211             BACKTRACK();
   1212         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
   1213             if (matchParenthesesOnceEnd(currentTerm(), context))
   1214                 MATCH_NEXT();
   1215             BACKTRACK();
   1216         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
   1217             if (matchParenthesesTerminalBegin(currentTerm(), context))
   1218                 MATCH_NEXT();
   1219             BACKTRACK();
   1220         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
   1221             if (matchParenthesesTerminalEnd(currentTerm(), context))
   1222                 MATCH_NEXT();
   1223             BACKTRACK();
   1224         case ByteTerm::TypeParentheticalAssertionBegin:
   1225             if (matchParentheticalAssertionBegin(currentTerm(), context))
   1226                 MATCH_NEXT();
   1227             BACKTRACK();
   1228         case ByteTerm::TypeParentheticalAssertionEnd:
   1229             if (matchParentheticalAssertionEnd(currentTerm(), context))
   1230                 MATCH_NEXT();
   1231             BACKTRACK();
   1232 
   1233         case ByteTerm::TypeCheckInput:
   1234             if (input.checkInput(currentTerm().checkInputCount))
   1235                 MATCH_NEXT();
   1236             BACKTRACK();
   1237 
   1238             case ByteTerm::TypeUncheckInput:
   1239                 input.uncheckInput(currentTerm().checkInputCount);
   1240                 MATCH_NEXT();
   1241         }
   1242 
   1243         // We should never fall-through to here.
   1244         ASSERT_NOT_REACHED();
   1245 
   1246     backtrack:
   1247         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
   1248 
   1249         switch (currentTerm().type) {
   1250         case ByteTerm::TypeSubpatternBegin:
   1251             return JSRegExpNoMatch;
   1252         case ByteTerm::TypeSubpatternEnd:
   1253             ASSERT_NOT_REACHED();
   1254 
   1255         case ByteTerm::TypeBodyAlternativeBegin:
   1256         case ByteTerm::TypeBodyAlternativeDisjunction: {
   1257             int offset = currentTerm().alternative.next;
   1258             context->term += offset;
   1259             if (offset > 0)
   1260                 MATCH_NEXT();
   1261 
   1262             if (input.atEnd())
   1263                 return JSRegExpNoMatch;
   1264 
   1265             input.next();
   1266 
   1267             if (pattern->m_containsBeginChars && isBody)
   1268                 lookupForBeginChars();
   1269 
   1270             context->matchBegin = input.getPos();
   1271 
   1272             if (currentTerm().alternative.onceThrough)
   1273                 context->term += currentTerm().alternative.next;
   1274 
   1275             MATCH_NEXT();
   1276         }
   1277         case ByteTerm::TypeBodyAlternativeEnd:
   1278             ASSERT_NOT_REACHED();
   1279 
   1280             case ByteTerm::TypeAlternativeBegin:
   1281             case ByteTerm::TypeAlternativeDisjunction: {
   1282                 int offset = currentTerm().alternative.next;
   1283                 context->term += offset;
   1284                 if (offset > 0)
   1285                     MATCH_NEXT();
   1286                 BACKTRACK();
   1287             }
   1288             case ByteTerm::TypeAlternativeEnd: {
   1289                 // We should never backtrack back into an alternative of the main body of the regex.
   1290                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
   1291                 unsigned offset = backTrack->offset;
   1292                 context->term -= offset;
   1293                 BACKTRACK();
   1294             }
   1295 
   1296             case ByteTerm::TypeAssertionBOL:
   1297             case ByteTerm::TypeAssertionEOL:
   1298             case ByteTerm::TypeAssertionWordBoundary:
   1299                 BACKTRACK();
   1300 
   1301             case ByteTerm::TypePatternCharacterOnce:
   1302             case ByteTerm::TypePatternCharacterFixed:
   1303             case ByteTerm::TypePatternCharacterGreedy:
   1304             case ByteTerm::TypePatternCharacterNonGreedy:
   1305                 if (backtrackPatternCharacter(currentTerm(), context))
   1306                     MATCH_NEXT();
   1307                 BACKTRACK();
   1308             case ByteTerm::TypePatternCasedCharacterOnce:
   1309             case ByteTerm::TypePatternCasedCharacterFixed:
   1310             case ByteTerm::TypePatternCasedCharacterGreedy:
   1311             case ByteTerm::TypePatternCasedCharacterNonGreedy:
   1312                 if (backtrackPatternCasedCharacter(currentTerm(), context))
   1313                     MATCH_NEXT();
   1314                 BACKTRACK();
   1315             case ByteTerm::TypeCharacterClass:
   1316                 if (backtrackCharacterClass(currentTerm(), context))
   1317                     MATCH_NEXT();
   1318                 BACKTRACK();
   1319             case ByteTerm::TypeBackReference:
   1320                 if (backtrackBackReference(currentTerm(), context))
   1321                     MATCH_NEXT();
   1322                 BACKTRACK();
   1323             case ByteTerm::TypeParenthesesSubpattern: {
   1324                 JSRegExpResult result = backtrackParentheses(currentTerm(), context);
   1325 
   1326                 if (result == JSRegExpMatch) {
   1327                     MATCH_NEXT();
   1328                 } else if (result != JSRegExpNoMatch)
   1329                     return result;
   1330 
   1331                 BACKTRACK();
   1332             }
   1333             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
   1334                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
   1335                     MATCH_NEXT();
   1336                 BACKTRACK();
   1337             case ByteTerm::TypeParenthesesSubpatternOnceEnd:
   1338                 if (backtrackParenthesesOnceEnd(currentTerm(), context))
   1339                     MATCH_NEXT();
   1340                 BACKTRACK();
   1341             case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
   1342                 if (backtrackParenthesesTerminalBegin(currentTerm(), context))
   1343                     MATCH_NEXT();
   1344                 BACKTRACK();
   1345             case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
   1346                 if (backtrackParenthesesTerminalEnd(currentTerm(), context))
   1347                     MATCH_NEXT();
   1348                 BACKTRACK();
   1349             case ByteTerm::TypeParentheticalAssertionBegin:
   1350                 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
   1351                     MATCH_NEXT();
   1352                 BACKTRACK();
   1353             case ByteTerm::TypeParentheticalAssertionEnd:
   1354                 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
   1355                     MATCH_NEXT();
   1356                 BACKTRACK();
   1357 
   1358             case ByteTerm::TypeCheckInput:
   1359                 input.uncheckInput(currentTerm().checkInputCount);
   1360                 BACKTRACK();
   1361 
   1362             case ByteTerm::TypeUncheckInput:
   1363                 input.checkInput(currentTerm().checkInputCount);
   1364                 BACKTRACK();
   1365         }
   1366 
   1367         ASSERT_NOT_REACHED();
   1368         return JSRegExpErrorNoMatch;
   1369     }
   1370 
   1371     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
   1372     {
   1373         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
   1374 
   1375         if (result == JSRegExpMatch) {
   1376             while (context->matchBegin == context->matchEnd) {
   1377                 result = matchDisjunction(disjunction, context, true);
   1378                 if (result != JSRegExpMatch)
   1379                     return result;
   1380             }
   1381             return JSRegExpMatch;
   1382         }
   1383 
   1384         return result;
   1385     }
   1386 
   1387     int interpret()
   1388     {
   1389         allocatorPool = pattern->m_allocator->startAllocator();
   1390         if (!allocatorPool)
   1391             CRASH();
   1392 
   1393         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
   1394             output[i] = -1;
   1395 
   1396         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
   1397 
   1398         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
   1399         if (result == JSRegExpMatch) {
   1400             output[0] = context->matchBegin;
   1401             output[1] = context->matchEnd;
   1402         }
   1403 
   1404         freeDisjunctionContext(context);
   1405 
   1406         pattern->m_allocator->stopAllocator();
   1407 
   1408         // RegExp.cpp currently expects all error to be converted to -1.
   1409         ASSERT((result == JSRegExpMatch) == (output[0] != -1));
   1410         return output[0];
   1411     }
   1412 
   1413     Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
   1414         : pattern(pattern)
   1415         , output(output)
   1416         , input(inputChar, start, length)
   1417         , allocatorPool(0)
   1418         , remainingMatchCount(matchLimit)
   1419     {
   1420     }
   1421 
   1422 private:
   1423     BytecodePattern* pattern;
   1424     int* output;
   1425     InputStream input;
   1426     BumpPointerPool* allocatorPool;
   1427     unsigned remainingMatchCount;
   1428 };
   1429 
   1430 
   1431 
   1432 class ByteCompiler {
   1433     struct ParenthesesStackEntry {
   1434         unsigned beginTerm;
   1435         unsigned savedAlternativeIndex;
   1436         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
   1437             : beginTerm(beginTerm)
   1438             , savedAlternativeIndex(savedAlternativeIndex)
   1439         {
   1440         }
   1441     };
   1442 
   1443 public:
   1444     ByteCompiler(YarrPattern& pattern)
   1445         : m_pattern(pattern)
   1446     {
   1447         m_currentAlternativeIndex = 0;
   1448     }
   1449 
   1450     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
   1451     {
   1452         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
   1453         emitDisjunction(m_pattern.m_body);
   1454         regexEnd();
   1455 
   1456         return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
   1457     }
   1458 
   1459     void checkInput(unsigned count)
   1460     {
   1461         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
   1462     }
   1463 
   1464     void uncheckInput(unsigned count)
   1465     {
   1466         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
   1467     }
   1468 
   1469     void assertionBOL(int inputPosition)
   1470     {
   1471         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
   1472     }
   1473 
   1474     void assertionEOL(int inputPosition)
   1475     {
   1476         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
   1477     }
   1478 
   1479     void assertionWordBoundary(bool invert, int inputPosition)
   1480     {
   1481         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
   1482     }
   1483 
   1484     void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
   1485     {
   1486         if (m_pattern.m_ignoreCase) {
   1487             UChar lo = Unicode::toLower(ch);
   1488             UChar hi = Unicode::toUpper(ch);
   1489 
   1490             if (lo != hi) {
   1491                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
   1492                 return;
   1493             }
   1494         }
   1495 
   1496         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
   1497     }
   1498 
   1499     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
   1500     {
   1501         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
   1502 
   1503         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
   1504         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
   1505         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
   1506     }
   1507 
   1508     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
   1509     {
   1510         ASSERT(subpatternId);
   1511 
   1512         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
   1513 
   1514         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
   1515         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
   1516         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
   1517     }
   1518 
   1519     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
   1520     {
   1521         int beginTerm = m_bodyDisjunction->terms.size();
   1522 
   1523         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
   1524         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
   1525         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
   1526         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
   1527 
   1528         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
   1529         m_currentAlternativeIndex = beginTerm + 1;
   1530     }
   1531 
   1532     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
   1533     {
   1534         int beginTerm = m_bodyDisjunction->terms.size();
   1535 
   1536         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
   1537         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
   1538         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
   1539         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
   1540 
   1541         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
   1542         m_currentAlternativeIndex = beginTerm + 1;
   1543     }
   1544 
   1545     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
   1546     {
   1547         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
   1548         // then fix this up at the end! - simplifying this should make it much clearer.
   1549         // https://bugs.webkit.org/show_bug.cgi?id=50136
   1550 
   1551         int beginTerm = m_bodyDisjunction->terms.size();
   1552 
   1553         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
   1554         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
   1555         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
   1556         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
   1557 
   1558         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
   1559         m_currentAlternativeIndex = beginTerm + 1;
   1560     }
   1561 
   1562     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
   1563     {
   1564         int beginTerm = m_bodyDisjunction->terms.size();
   1565 
   1566         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
   1567         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
   1568         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
   1569         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
   1570 
   1571         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
   1572         m_currentAlternativeIndex = beginTerm + 1;
   1573     }
   1574 
   1575     void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
   1576     {
   1577         unsigned beginTerm = popParenthesesStack();
   1578         closeAlternative(beginTerm + 1);
   1579         unsigned endTerm = m_bodyDisjunction->terms.size();
   1580 
   1581         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
   1582 
   1583         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
   1584         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
   1585 
   1586         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
   1587         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
   1588         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
   1589         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
   1590 
   1591         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
   1592         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
   1593         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
   1594         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
   1595     }
   1596 
   1597     unsigned popParenthesesStack()
   1598     {
   1599         ASSERT(m_parenthesesStack.size());
   1600         int stackEnd = m_parenthesesStack.size() - 1;
   1601         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
   1602         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
   1603         m_parenthesesStack.shrink(stackEnd);
   1604 
   1605         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
   1606         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
   1607 
   1608         return beginTerm;
   1609     }
   1610 
   1611 #ifndef NDEBUG
   1612     void dumpDisjunction(ByteDisjunction* disjunction)
   1613     {
   1614         printf("ByteDisjunction(%p):\n\t", disjunction);
   1615         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
   1616             printf("{ %d } ", disjunction->terms[i].type);
   1617         printf("\n");
   1618     }
   1619 #endif
   1620 
   1621     void closeAlternative(int beginTerm)
   1622     {
   1623         int origBeginTerm = beginTerm;
   1624         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
   1625         int endIndex = m_bodyDisjunction->terms.size();
   1626 
   1627         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
   1628 
   1629         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
   1630             m_bodyDisjunction->terms.remove(beginTerm);
   1631         else {
   1632             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
   1633                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
   1634                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
   1635                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
   1636                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
   1637             }
   1638 
   1639             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
   1640 
   1641             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
   1642             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
   1643         }
   1644     }
   1645 
   1646     void closeBodyAlternative()
   1647     {
   1648         int beginTerm = 0;
   1649         int origBeginTerm = 0;
   1650         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
   1651         int endIndex = m_bodyDisjunction->terms.size();
   1652 
   1653         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
   1654 
   1655         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
   1656             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
   1657             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
   1658             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
   1659             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
   1660         }
   1661 
   1662         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
   1663 
   1664         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
   1665         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
   1666     }
   1667 
   1668     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
   1669     {
   1670         unsigned beginTerm = popParenthesesStack();
   1671         closeAlternative(beginTerm + 1);
   1672         unsigned endTerm = m_bodyDisjunction->terms.size();
   1673 
   1674         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   1675 
   1676         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
   1677 
   1678         bool capture = parenthesesBegin.capture();
   1679         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
   1680 
   1681         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
   1682         ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
   1683 
   1684         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
   1685         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
   1686             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
   1687         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
   1688 
   1689         m_bodyDisjunction->terms.shrink(beginTerm);
   1690 
   1691         m_allParenthesesInfo.append(parenthesesDisjunction);
   1692         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
   1693 
   1694         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
   1695         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
   1696         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
   1697     }
   1698 
   1699     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
   1700     {
   1701         unsigned beginTerm = popParenthesesStack();
   1702         closeAlternative(beginTerm + 1);
   1703         unsigned endTerm = m_bodyDisjunction->terms.size();
   1704 
   1705         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   1706 
   1707         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
   1708         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
   1709 
   1710         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
   1711         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
   1712         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
   1713         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
   1714 
   1715         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
   1716         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
   1717         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
   1718         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
   1719     }
   1720 
   1721     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
   1722     {
   1723         unsigned beginTerm = popParenthesesStack();
   1724         closeAlternative(beginTerm + 1);
   1725         unsigned endTerm = m_bodyDisjunction->terms.size();
   1726 
   1727         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
   1728 
   1729         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
   1730         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
   1731 
   1732         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
   1733         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
   1734         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
   1735         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
   1736 
   1737         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
   1738         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
   1739         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
   1740         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
   1741     }
   1742 
   1743     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
   1744     {
   1745         m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
   1746         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
   1747         m_bodyDisjunction->terms[0].frameLocation = 0;
   1748         m_currentAlternativeIndex = 0;
   1749     }
   1750 
   1751     void regexEnd()
   1752     {
   1753         closeBodyAlternative();
   1754     }
   1755 
   1756     void alternativeBodyDisjunction(bool onceThrough)
   1757     {
   1758         int newAlternativeIndex = m_bodyDisjunction->terms.size();
   1759         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
   1760         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
   1761 
   1762         m_currentAlternativeIndex = newAlternativeIndex;
   1763     }
   1764 
   1765     void alternativeDisjunction()
   1766     {
   1767         int newAlternativeIndex = m_bodyDisjunction->terms.size();
   1768         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
   1769         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
   1770 
   1771         m_currentAlternativeIndex = newAlternativeIndex;
   1772     }
   1773 
   1774     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0, bool isParentheticalAssertion = false)
   1775     {
   1776         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
   1777             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
   1778 
   1779             PatternAlternative* alternative = disjunction->m_alternatives[alt];
   1780 
   1781             if (alt) {
   1782                 if (disjunction == m_pattern.m_body)
   1783                     alternativeBodyDisjunction(alternative->onceThrough());
   1784                 else
   1785                     alternativeDisjunction();
   1786             }
   1787 
   1788             unsigned minimumSize = alternative->m_minimumSize;
   1789             int countToCheck;
   1790 
   1791             if (isParentheticalAssertion && parenthesesInputCountAlreadyChecked > minimumSize)
   1792                 countToCheck = 0;
   1793             else
   1794                 countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
   1795 
   1796             ASSERT(countToCheck >= 0);
   1797             if (countToCheck) {
   1798                 checkInput(countToCheck);
   1799                 currentCountAlreadyChecked += countToCheck;
   1800             }
   1801 
   1802             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
   1803                 PatternTerm& term = alternative->m_terms[i];
   1804 
   1805                 switch (term.type) {
   1806                 case PatternTerm::TypeAssertionBOL:
   1807                     assertionBOL(term.inputPosition - currentCountAlreadyChecked);
   1808                     break;
   1809 
   1810                 case PatternTerm::TypeAssertionEOL:
   1811                     assertionEOL(term.inputPosition - currentCountAlreadyChecked);
   1812                     break;
   1813 
   1814                 case PatternTerm::TypeAssertionWordBoundary:
   1815                     assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
   1816                     break;
   1817 
   1818                 case PatternTerm::TypePatternCharacter:
   1819                     atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
   1820                     break;
   1821 
   1822                 case PatternTerm::TypeCharacterClass:
   1823                     atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
   1824                     break;
   1825 
   1826                 case PatternTerm::TypeBackReference:
   1827                     atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
   1828                         break;
   1829 
   1830                 case PatternTerm::TypeForwardReference:
   1831                     break;
   1832 
   1833                 case PatternTerm::TypeParenthesesSubpattern: {
   1834                     unsigned disjunctionAlreadyCheckedCount = 0;
   1835                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
   1836                         unsigned alternativeFrameLocation = term.frameLocation;
   1837                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
   1838                         if (term.quantityType == QuantifierFixedCount)
   1839                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
   1840                         else
   1841                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
   1842                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
   1843                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
   1844                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
   1845                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
   1846                     } else if (term.parentheses.isTerminal) {
   1847                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
   1848                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
   1849                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
   1850                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
   1851                     } else {
   1852                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
   1853                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
   1854                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
   1855                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
   1856                     }
   1857                     break;
   1858                 }
   1859 
   1860                 case PatternTerm::TypeParentheticalAssertion: {
   1861                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
   1862 
   1863                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
   1864                     int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
   1865                     int uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
   1866 
   1867                     if (uncheckAmount > 0) {
   1868                         uncheckInput(uncheckAmount);
   1869                         currentCountAlreadyChecked -= uncheckAmount;
   1870                     } else
   1871                         uncheckAmount = 0;
   1872 
   1873                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
   1874                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true);
   1875                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
   1876                     if (uncheckAmount) {
   1877                         checkInput(uncheckAmount);
   1878                         currentCountAlreadyChecked += uncheckAmount;
   1879                     }
   1880                     break;
   1881                 }
   1882                 }
   1883             }
   1884         }
   1885     }
   1886 
   1887 private:
   1888     YarrPattern& m_pattern;
   1889     OwnPtr<ByteDisjunction> m_bodyDisjunction;
   1890     unsigned m_currentAlternativeIndex;
   1891     Vector<ParenthesesStackEntry> m_parenthesesStack;
   1892     Vector<ByteDisjunction*> m_allParenthesesInfo;
   1893 };
   1894 
   1895 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
   1896 {
   1897     return ByteCompiler(pattern).compile(allocator);
   1898 }
   1899 
   1900 int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
   1901 {
   1902     return Interpreter(bytecode, output, input, start, length).interpret();
   1903 }
   1904 
   1905 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
   1906 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
   1907 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
   1908 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
   1909 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
   1910 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
   1911 COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
   1912 
   1913 
   1914 } }
   1915