1 /* 2 * Copyright (C) 2008 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef Parser_h 27 #define Parser_h 28 29 #include <wtf/Platform.h> 30 31 #if ENABLE(WREC) 32 33 #include "Escapes.h" 34 #include "Quantifier.h" 35 #include "UString.h" 36 #include "WRECGenerator.h" 37 #include <wtf/ASCIICType.h> 38 39 namespace JSC { namespace WREC { 40 41 struct CharacterClass; 42 43 class Parser { 44 typedef Generator::JumpList JumpList; 45 typedef Generator::ParenthesesType ParenthesesType; 46 47 friend class SavedState; 48 49 public: 50 Parser(const UString& pattern, bool ignoreCase, bool multiline) 51 : m_generator(*this) 52 , m_data(pattern.data()) 53 , m_size(pattern.size()) 54 , m_ignoreCase(ignoreCase) 55 , m_multiline(multiline) 56 { 57 reset(); 58 } 59 60 Generator& generator() { return m_generator; } 61 62 bool ignoreCase() const { return m_ignoreCase; } 63 bool multiline() const { return m_multiline; } 64 65 void recordSubpattern() { ++m_numSubpatterns; } 66 unsigned numSubpatterns() const { return m_numSubpatterns; } 67 68 const char* error() const { return m_error; } 69 const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } 70 71 void parsePattern(JumpList& failures) 72 { 73 reset(); 74 75 parseDisjunction(failures); 76 77 if (peek() != EndOfPattern) 78 setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. 79 } 80 81 void parseDisjunction(JumpList& failures); 82 void parseAlternative(JumpList& failures); 83 bool parseTerm(JumpList& failures); 84 bool parseNonCharacterEscape(JumpList& failures, const Escape&); 85 bool parseParentheses(JumpList& failures); 86 bool parseCharacterClass(JumpList& failures); 87 bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert); 88 bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId); 89 90 private: 91 class SavedState { 92 public: 93 SavedState(Parser& parser) 94 : m_parser(parser) 95 , m_index(parser.m_index) 96 { 97 } 98 99 void restore() 100 { 101 m_parser.m_index = m_index; 102 } 103 104 private: 105 Parser& m_parser; 106 unsigned m_index; 107 }; 108 109 void reset() 110 { 111 m_index = 0; 112 m_numSubpatterns = 0; 113 m_error = 0; 114 } 115 116 void setError(const char* error) 117 { 118 if (m_error) 119 return; 120 m_error = error; 121 } 122 123 int peek() 124 { 125 if (m_index >= m_size) 126 return EndOfPattern; 127 return m_data[m_index]; 128 } 129 130 int consume() 131 { 132 if (m_index >= m_size) 133 return EndOfPattern; 134 return m_data[m_index++]; 135 } 136 137 bool peekIsDigit() 138 { 139 return WTF::isASCIIDigit(peek()); 140 } 141 142 unsigned peekDigit() 143 { 144 ASSERT(peekIsDigit()); 145 return peek() - '0'; 146 } 147 148 unsigned consumeDigit() 149 { 150 ASSERT(peekIsDigit()); 151 return consume() - '0'; 152 } 153 154 unsigned consumeNumber() 155 { 156 int n = consumeDigit(); 157 while (peekIsDigit()) { 158 n *= 10; 159 n += consumeDigit(); 160 } 161 return n; 162 } 163 164 int consumeHex(int count) 165 { 166 int n = 0; 167 while (count--) { 168 if (!WTF::isASCIIHexDigit(peek())) 169 return -1; 170 n = (n << 4) | WTF::toASCIIHexValue(consume()); 171 } 172 return n; 173 } 174 175 unsigned consumeOctal() 176 { 177 unsigned n = 0; 178 while (n < 32 && WTF::isASCIIOctalDigit(peek())) 179 n = n * 8 + consumeDigit(); 180 return n; 181 } 182 183 ALWAYS_INLINE Quantifier consumeGreedyQuantifier(); 184 Quantifier consumeQuantifier(); 185 Escape consumeEscape(bool inCharacterClass); 186 ParenthesesType consumeParenthesesType(); 187 188 static const int EndOfPattern = -1; 189 190 // Error messages. 191 static const char* QuantifierOutOfOrder; 192 static const char* QuantifierWithoutAtom; 193 static const char* ParenthesesUnmatched; 194 static const char* ParenthesesTypeInvalid; 195 static const char* ParenthesesNotSupported; 196 static const char* CharacterClassUnmatched; 197 static const char* CharacterClassOutOfOrder; 198 static const char* EscapeUnterminated; 199 200 Generator m_generator; 201 const UChar* m_data; 202 unsigned m_size; 203 unsigned m_index; 204 bool m_ignoreCase; 205 bool m_multiline; 206 unsigned m_numSubpatterns; 207 const char* m_error; 208 }; 209 210 } } // namespace JSC::WREC 211 212 #endif // ENABLE(WREC) 213 214 #endif // Parser_h 215