1 /* 2 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef YarrInterpreter_h 27 #define YarrInterpreter_h 28 29 #include "YarrPattern.h" 30 #include <wtf/PassOwnPtr.h> 31 #include <wtf/unicode/Unicode.h> 32 33 namespace WTF { 34 class BumpPointerAllocator; 35 } 36 using WTF::BumpPointerAllocator; 37 38 namespace JSC { namespace Yarr { 39 40 class ByteDisjunction; 41 42 struct ByteTerm { 43 enum Type { 44 TypeBodyAlternativeBegin, 45 TypeBodyAlternativeDisjunction, 46 TypeBodyAlternativeEnd, 47 TypeAlternativeBegin, 48 TypeAlternativeDisjunction, 49 TypeAlternativeEnd, 50 TypeSubpatternBegin, 51 TypeSubpatternEnd, 52 TypeAssertionBOL, 53 TypeAssertionEOL, 54 TypeAssertionWordBoundary, 55 TypePatternCharacterOnce, 56 TypePatternCharacterFixed, 57 TypePatternCharacterGreedy, 58 TypePatternCharacterNonGreedy, 59 TypePatternCasedCharacterOnce, 60 TypePatternCasedCharacterFixed, 61 TypePatternCasedCharacterGreedy, 62 TypePatternCasedCharacterNonGreedy, 63 TypeCharacterClass, 64 TypeBackReference, 65 TypeParenthesesSubpattern, 66 TypeParenthesesSubpatternOnceBegin, 67 TypeParenthesesSubpatternOnceEnd, 68 TypeParenthesesSubpatternTerminalBegin, 69 TypeParenthesesSubpatternTerminalEnd, 70 TypeParentheticalAssertionBegin, 71 TypeParentheticalAssertionEnd, 72 TypeCheckInput, 73 TypeUncheckInput, 74 } type; 75 union { 76 struct { 77 union { 78 UChar patternCharacter; 79 struct { 80 UChar lo; 81 UChar hi; 82 } casedCharacter; 83 CharacterClass* characterClass; 84 unsigned subpatternId; 85 }; 86 union { 87 ByteDisjunction* parenthesesDisjunction; 88 unsigned parenthesesWidth; 89 }; 90 QuantifierType quantityType; 91 unsigned quantityCount; 92 } atom; 93 struct { 94 int next; 95 int end; 96 bool onceThrough; 97 } alternative; 98 unsigned checkInputCount; 99 }; 100 unsigned frameLocation; 101 bool m_capture : 1; 102 bool m_invert : 1; 103 int inputPosition; 104 105 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) 106 : frameLocation(frameLocation) 107 , m_capture(false) 108 , m_invert(false) 109 { 110 switch (quantityType) { 111 case QuantifierFixedCount: 112 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; 113 break; 114 case QuantifierGreedy: 115 type = ByteTerm::TypePatternCharacterGreedy; 116 break; 117 case QuantifierNonGreedy: 118 type = ByteTerm::TypePatternCharacterNonGreedy; 119 break; 120 } 121 122 atom.patternCharacter = ch; 123 atom.quantityType = quantityType; 124 atom.quantityCount = quantityCount; 125 inputPosition = inputPos; 126 } 127 128 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) 129 : frameLocation(frameLocation) 130 , m_capture(false) 131 , m_invert(false) 132 { 133 switch (quantityType) { 134 case QuantifierFixedCount: 135 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed; 136 break; 137 case QuantifierGreedy: 138 type = ByteTerm::TypePatternCasedCharacterGreedy; 139 break; 140 case QuantifierNonGreedy: 141 type = ByteTerm::TypePatternCasedCharacterNonGreedy; 142 break; 143 } 144 145 atom.casedCharacter.lo = lo; 146 atom.casedCharacter.hi = hi; 147 atom.quantityType = quantityType; 148 atom.quantityCount = quantityCount; 149 inputPosition = inputPos; 150 } 151 152 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) 153 : type(ByteTerm::TypeCharacterClass) 154 , m_capture(false) 155 , m_invert(invert) 156 { 157 atom.characterClass = characterClass; 158 atom.quantityType = QuantifierFixedCount; 159 atom.quantityCount = 1; 160 inputPosition = inputPos; 161 } 162 163 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) 164 : type(type) 165 , m_capture(capture) 166 , m_invert(false) 167 { 168 atom.subpatternId = subpatternId; 169 atom.parenthesesDisjunction = parenthesesInfo; 170 atom.quantityType = QuantifierFixedCount; 171 atom.quantityCount = 1; 172 inputPosition = inputPos; 173 } 174 175 ByteTerm(Type type, bool invert = false) 176 : type(type) 177 , m_capture(false) 178 , m_invert(invert) 179 { 180 atom.quantityType = QuantifierFixedCount; 181 atom.quantityCount = 1; 182 } 183 184 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) 185 : type(type) 186 , m_capture(capture) 187 , m_invert(invert) 188 { 189 atom.subpatternId = subpatternId; 190 atom.quantityType = QuantifierFixedCount; 191 atom.quantityCount = 1; 192 inputPosition = inputPos; 193 } 194 195 static ByteTerm BOL(int inputPos) 196 { 197 ByteTerm term(TypeAssertionBOL); 198 term.inputPosition = inputPos; 199 return term; 200 } 201 202 static ByteTerm CheckInput(unsigned count) 203 { 204 ByteTerm term(TypeCheckInput); 205 term.checkInputCount = count; 206 return term; 207 } 208 209 static ByteTerm UncheckInput(unsigned count) 210 { 211 ByteTerm term(TypeUncheckInput); 212 term.checkInputCount = count; 213 return term; 214 } 215 216 static ByteTerm EOL(int inputPos) 217 { 218 ByteTerm term(TypeAssertionEOL); 219 term.inputPosition = inputPos; 220 return term; 221 } 222 223 static ByteTerm WordBoundary(bool invert, int inputPos) 224 { 225 ByteTerm term(TypeAssertionWordBoundary, invert); 226 term.inputPosition = inputPos; 227 return term; 228 } 229 230 static ByteTerm BackReference(unsigned subpatternId, int inputPos) 231 { 232 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); 233 } 234 235 static ByteTerm BodyAlternativeBegin(bool onceThrough) 236 { 237 ByteTerm term(TypeBodyAlternativeBegin); 238 term.alternative.next = 0; 239 term.alternative.end = 0; 240 term.alternative.onceThrough = onceThrough; 241 return term; 242 } 243 244 static ByteTerm BodyAlternativeDisjunction(bool onceThrough) 245 { 246 ByteTerm term(TypeBodyAlternativeDisjunction); 247 term.alternative.next = 0; 248 term.alternative.end = 0; 249 term.alternative.onceThrough = onceThrough; 250 return term; 251 } 252 253 static ByteTerm BodyAlternativeEnd() 254 { 255 ByteTerm term(TypeBodyAlternativeEnd); 256 term.alternative.next = 0; 257 term.alternative.end = 0; 258 term.alternative.onceThrough = false; 259 return term; 260 } 261 262 static ByteTerm AlternativeBegin() 263 { 264 ByteTerm term(TypeAlternativeBegin); 265 term.alternative.next = 0; 266 term.alternative.end = 0; 267 term.alternative.onceThrough = false; 268 return term; 269 } 270 271 static ByteTerm AlternativeDisjunction() 272 { 273 ByteTerm term(TypeAlternativeDisjunction); 274 term.alternative.next = 0; 275 term.alternative.end = 0; 276 term.alternative.onceThrough = false; 277 return term; 278 } 279 280 static ByteTerm AlternativeEnd() 281 { 282 ByteTerm term(TypeAlternativeEnd); 283 term.alternative.next = 0; 284 term.alternative.end = 0; 285 term.alternative.onceThrough = false; 286 return term; 287 } 288 289 static ByteTerm SubpatternBegin() 290 { 291 return ByteTerm(TypeSubpatternBegin); 292 } 293 294 static ByteTerm SubpatternEnd() 295 { 296 return ByteTerm(TypeSubpatternEnd); 297 } 298 299 bool invert() 300 { 301 return m_invert; 302 } 303 304 bool capture() 305 { 306 return m_capture; 307 } 308 }; 309 310 class ByteDisjunction { 311 WTF_MAKE_FAST_ALLOCATED; 312 public: 313 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize) 314 : m_numSubpatterns(numSubpatterns) 315 , m_frameSize(frameSize) 316 { 317 } 318 319 Vector<ByteTerm> terms; 320 unsigned m_numSubpatterns; 321 unsigned m_frameSize; 322 }; 323 324 struct BytecodePattern { 325 WTF_MAKE_FAST_ALLOCATED; 326 public: 327 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator) 328 : m_body(body) 329 , m_ignoreCase(pattern.m_ignoreCase) 330 , m_multiline(pattern.m_multiline) 331 , m_containsBeginChars(pattern.m_containsBeginChars) 332 , m_allocator(allocator) 333 { 334 newlineCharacterClass = pattern.newlineCharacterClass(); 335 wordcharCharacterClass = pattern.wordcharCharacterClass(); 336 337 m_allParenthesesInfo.append(allParenthesesInfo); 338 m_userCharacterClasses.append(pattern.m_userCharacterClasses); 339 // 'Steal' the YarrPattern's CharacterClasses! We clear its 340 // array, so that it won't delete them on destruction. We'll 341 // take responsibility for that. 342 pattern.m_userCharacterClasses.clear(); 343 344 m_beginChars.append(pattern.m_beginChars); 345 } 346 347 ~BytecodePattern() 348 { 349 deleteAllValues(m_allParenthesesInfo); 350 deleteAllValues(m_userCharacterClasses); 351 } 352 353 OwnPtr<ByteDisjunction> m_body; 354 bool m_ignoreCase; 355 bool m_multiline; 356 bool m_containsBeginChars; 357 // Each BytecodePattern is associated with a RegExp, each RegExp is associated 358 // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator. 359 BumpPointerAllocator* m_allocator; 360 361 CharacterClass* newlineCharacterClass; 362 CharacterClass* wordcharCharacterClass; 363 364 Vector<BeginChar> m_beginChars; 365 366 private: 367 Vector<ByteDisjunction*> m_allParenthesesInfo; 368 Vector<CharacterClass*> m_userCharacterClasses; 369 }; 370 371 } } // namespace JSC::Yarr 372 373 #endif // YarrInterpreter_h 374