Home | History | Annotate | Download | only in wrec
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef Parser_h
     27 #define Parser_h
     28 
     29 #include <wtf/Platform.h>
     30 
     31 #if ENABLE(WREC)
     32 
     33 #include "Escapes.h"
     34 #include "Quantifier.h"
     35 #include "UString.h"
     36 #include "WRECGenerator.h"
     37 #include <wtf/ASCIICType.h>
     38 
     39 namespace JSC { namespace WREC {
     40 
     41     struct CharacterClass;
     42 
     43     class Parser {
     44     typedef Generator::JumpList JumpList;
     45     typedef Generator::ParenthesesType ParenthesesType;
     46 
     47     friend class SavedState;
     48 
     49     public:
     50         Parser(const UString& pattern, bool ignoreCase, bool multiline)
     51             : m_generator(*this)
     52             , m_data(pattern.data())
     53             , m_size(pattern.size())
     54             , m_ignoreCase(ignoreCase)
     55             , m_multiline(multiline)
     56         {
     57             reset();
     58         }
     59 
     60         Generator& generator() { return m_generator; }
     61 
     62         bool ignoreCase() const { return m_ignoreCase; }
     63         bool multiline() const { return m_multiline; }
     64 
     65         void recordSubpattern() { ++m_numSubpatterns; }
     66         unsigned numSubpatterns() const { return m_numSubpatterns; }
     67 
     68         const char* error() const { return m_error; }
     69         const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; }
     70 
     71         void parsePattern(JumpList& failures)
     72         {
     73             reset();
     74 
     75             parseDisjunction(failures);
     76 
     77             if (peek() != EndOfPattern)
     78                 setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it.
     79         }
     80 
     81         void parseDisjunction(JumpList& failures);
     82         void parseAlternative(JumpList& failures);
     83         bool parseTerm(JumpList& failures);
     84         bool parseNonCharacterEscape(JumpList& failures, const Escape&);
     85         bool parseParentheses(JumpList& failures);
     86         bool parseCharacterClass(JumpList& failures);
     87         bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert);
     88         bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId);
     89 
     90     private:
     91         class SavedState {
     92         public:
     93             SavedState(Parser& parser)
     94                 : m_parser(parser)
     95                 , m_index(parser.m_index)
     96             {
     97             }
     98 
     99             void restore()
    100             {
    101                 m_parser.m_index = m_index;
    102             }
    103 
    104         private:
    105             Parser& m_parser;
    106             unsigned m_index;
    107         };
    108 
    109         void reset()
    110         {
    111             m_index = 0;
    112             m_numSubpatterns = 0;
    113             m_error = 0;
    114         }
    115 
    116         void setError(const char* error)
    117         {
    118             if (m_error)
    119                 return;
    120             m_error = error;
    121         }
    122 
    123         int peek()
    124         {
    125             if (m_index >= m_size)
    126                 return EndOfPattern;
    127             return m_data[m_index];
    128         }
    129 
    130         int consume()
    131         {
    132             if (m_index >= m_size)
    133                 return EndOfPattern;
    134             return m_data[m_index++];
    135         }
    136 
    137         bool peekIsDigit()
    138         {
    139             return WTF::isASCIIDigit(peek());
    140         }
    141 
    142         unsigned peekDigit()
    143         {
    144             ASSERT(peekIsDigit());
    145             return peek() - '0';
    146         }
    147 
    148         unsigned consumeDigit()
    149         {
    150             ASSERT(peekIsDigit());
    151             return consume() - '0';
    152         }
    153 
    154         unsigned consumeNumber()
    155         {
    156             int n = consumeDigit();
    157             while (peekIsDigit()) {
    158                 n *= 10;
    159                 n += consumeDigit();
    160             }
    161             return n;
    162         }
    163 
    164         int consumeHex(int count)
    165         {
    166             int n = 0;
    167             while (count--) {
    168                 if (!WTF::isASCIIHexDigit(peek()))
    169                     return -1;
    170                 n = (n << 4) | WTF::toASCIIHexValue(consume());
    171             }
    172             return n;
    173         }
    174 
    175         unsigned consumeOctal()
    176         {
    177             unsigned n = 0;
    178             while (n < 32 && WTF::isASCIIOctalDigit(peek()))
    179                 n = n * 8 + consumeDigit();
    180             return n;
    181         }
    182 
    183         ALWAYS_INLINE Quantifier consumeGreedyQuantifier();
    184         Quantifier consumeQuantifier();
    185         Escape consumeEscape(bool inCharacterClass);
    186         ParenthesesType consumeParenthesesType();
    187 
    188         static const int EndOfPattern = -1;
    189 
    190         // Error messages.
    191         static const char* QuantifierOutOfOrder;
    192         static const char* QuantifierWithoutAtom;
    193         static const char* ParenthesesUnmatched;
    194         static const char* ParenthesesTypeInvalid;
    195         static const char* ParenthesesNotSupported;
    196         static const char* CharacterClassUnmatched;
    197         static const char* CharacterClassOutOfOrder;
    198         static const char* EscapeUnterminated;
    199 
    200         Generator m_generator;
    201         const UChar* m_data;
    202         unsigned m_size;
    203         unsigned m_index;
    204         bool m_ignoreCase;
    205         bool m_multiline;
    206         unsigned m_numSubpatterns;
    207         const char* m_error;
    208     };
    209 
    210 } } // namespace JSC::WREC
    211 
    212 #endif // ENABLE(WREC)
    213 
    214 #endif // Parser_h
    215