1 /* 2 * Copyright (C) 2008 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "WREC.h" 28 29 #if ENABLE(WREC) 30 31 #include "CharacterClassConstructor.h" 32 #include "Interpreter.h" 33 #include "WRECFunctors.h" 34 #include "WRECParser.h" 35 #include "pcre_internal.h" 36 37 using namespace WTF; 38 39 namespace JSC { namespace WREC { 40 41 void Generator::generateEnter() 42 { 43 #if CPU(X86) 44 // On x86 edi & esi are callee preserved registers. 45 push(X86Registers::edi); 46 push(X86Registers::esi); 47 48 #if COMPILER(MSVC) 49 // Move the arguments into registers. 50 peek(input, 3); 51 peek(index, 4); 52 peek(length, 5); 53 peek(output, 6); 54 #else 55 // On gcc the function is regparm(3), so the input, index, and length registers 56 // (eax, edx, and ecx respectively) already contain the appropriate values. 57 // Just load the fourth argument (output) into edi 58 peek(output, 3); 59 #endif 60 #endif 61 } 62 63 void Generator::generateReturnSuccess() 64 { 65 ASSERT(returnRegister != index); 66 ASSERT(returnRegister != output); 67 68 // Set return value. 69 pop(returnRegister); // match begin 70 store32(returnRegister, output); 71 store32(index, Address(output, 4)); // match end 72 73 // Restore callee save registers. 74 #if CPU(X86) 75 pop(X86Registers::esi); 76 pop(X86Registers::edi); 77 #endif 78 ret(); 79 } 80 81 void Generator::generateSaveIndex() 82 { 83 push(index); 84 } 85 86 void Generator::generateIncrementIndex(Jump* failure) 87 { 88 peek(index); 89 if (failure) 90 *failure = branch32(Equal, length, index); 91 add32(Imm32(1), index); 92 poke(index); 93 } 94 95 void Generator::generateLoadCharacter(JumpList& failures) 96 { 97 failures.append(branch32(Equal, length, index)); 98 load16(BaseIndex(input, index, TimesTwo), character); 99 } 100 101 // For the sake of end-of-line assertions, we treat one-past-the-end as if it 102 // were part of the input string. 103 void Generator::generateJumpIfNotEndOfInput(Label target) 104 { 105 branch32(LessThanOrEqual, index, length, target); 106 } 107 108 void Generator::generateReturnFailure() 109 { 110 pop(); 111 move(Imm32(-1), returnRegister); 112 113 #if CPU(X86) 114 pop(X86Registers::esi); 115 pop(X86Registers::edi); 116 #endif 117 ret(); 118 } 119 120 void Generator::generateBacktrack1() 121 { 122 sub32(Imm32(1), index); 123 } 124 125 void Generator::generateBacktrackBackreference(unsigned subpatternId) 126 { 127 sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index); 128 add32(Address(output, (2 * subpatternId) * sizeof(int)), index); 129 } 130 131 void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max) 132 { 133 GenerateBackreferenceFunctor functor(subpatternId); 134 135 load32(Address(output, (2 * subpatternId) * sizeof(int)), character); 136 Jump skipIfEmpty = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character); 137 138 ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy); 139 if (quantifierType == Quantifier::Greedy) 140 generateGreedyQuantifier(failures, functor, min, max); 141 else 142 generateNonGreedyQuantifier(failures, functor, min, max); 143 144 skipIfEmpty.link(this); 145 } 146 147 void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) 148 { 149 JumpList atomFailedList; 150 JumpList alternativeFailedList; 151 152 // (0) Setup: Save, then init repeatCount. 153 push(repeatCount); 154 move(Imm32(0), repeatCount); 155 Jump start = jump(); 156 157 // (4) Quantifier failed: No more atom reading possible. 158 Label quantifierFailed(this); 159 pop(repeatCount); 160 failures.append(jump()); 161 162 // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again. 163 Label alternativeFailed(this); 164 pop(index); 165 if (max != Quantifier::Infinity) 166 branch32(Equal, repeatCount, Imm32(max), quantifierFailed); 167 168 // (1) Read an atom. 169 if (min) 170 start.link(this); 171 Label readAtom(this); 172 functor.generateAtom(this, atomFailedList); 173 atomFailedList.linkTo(quantifierFailed, this); 174 add32(Imm32(1), repeatCount); 175 176 // (2) Keep reading if we're under the minimum. 177 if (min > 1) 178 branch32(LessThan, repeatCount, Imm32(min), readAtom); 179 180 // (3) Test the rest of the alternative. 181 if (!min) 182 start.link(this); 183 push(index); 184 m_parser.parseAlternative(alternativeFailedList); 185 alternativeFailedList.linkTo(alternativeFailed, this); 186 187 pop(); 188 pop(repeatCount); 189 } 190 191 void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) 192 { 193 if (!max) 194 return; 195 196 JumpList doneReadingAtomsList; 197 JumpList alternativeFailedList; 198 199 // (0) Setup: Save, then init repeatCount. 200 push(repeatCount); 201 move(Imm32(0), repeatCount); 202 203 // (1) Greedily read as many copies of the atom as possible, then jump to (2). 204 Label readAtom(this); 205 functor.generateAtom(this, doneReadingAtomsList); 206 add32(Imm32(1), repeatCount); 207 if (max == Quantifier::Infinity) 208 jump(readAtom); 209 else if (max == 1) 210 doneReadingAtomsList.append(jump()); 211 else { 212 branch32(NotEqual, repeatCount, Imm32(max), readAtom); 213 doneReadingAtomsList.append(jump()); 214 } 215 216 // (5) Quantifier failed: No more backtracking possible. 217 Label quantifierFailed(this); 218 pop(repeatCount); 219 failures.append(jump()); 220 221 // (4) Alternative failed: Backtrack, then fall through to (2) to try again. 222 Label alternativeFailed(this); 223 pop(index); 224 functor.backtrack(this); 225 sub32(Imm32(1), repeatCount); 226 227 // (2) Verify that we have enough atoms. 228 doneReadingAtomsList.link(this); 229 branch32(LessThan, repeatCount, Imm32(min), quantifierFailed); 230 231 // (3) Test the rest of the alternative. 232 push(index); 233 m_parser.parseAlternative(alternativeFailedList); 234 alternativeFailedList.linkTo(alternativeFailed, this); 235 236 pop(); 237 pop(repeatCount); 238 } 239 240 void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count) 241 { 242 for (size_t i = 0; i < count;) { 243 if (i < count - 1) { 244 if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) { 245 i += 2; 246 continue; 247 } 248 } 249 250 generatePatternCharacter(failures, sequence[i]); 251 ++i; 252 } 253 } 254 255 bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2) 256 { 257 if (m_parser.ignoreCase()) { 258 // Non-trivial case folding requires more than one test, so we can't 259 // test as a pair with an adjacent character. 260 if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1)) 261 return false; 262 if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2)) 263 return false; 264 } 265 266 // Optimistically consume 2 characters. 267 add32(Imm32(2), index); 268 failures.append(branch32(GreaterThan, index, length)); 269 270 // Load the characters we just consumed, offset -2 characters from index. 271 load32(BaseIndex(input, index, TimesTwo, -2 * 2), character); 272 273 if (m_parser.ignoreCase()) { 274 // Convert ASCII alphabet characters to upper case before testing for 275 // equality. (ASCII non-alphabet characters don't require upper-casing 276 // because they have no uppercase equivalents. Unicode characters don't 277 // require upper-casing because we only handle Unicode characters whose 278 // upper and lower cases are equal.) 279 int ch1Mask = 0; 280 if (isASCIIAlpha(ch1)) { 281 ch1 |= 32; 282 ch1Mask = 32; 283 } 284 285 int ch2Mask = 0; 286 if (isASCIIAlpha(ch2)) { 287 ch2 |= 32; 288 ch2Mask = 32; 289 } 290 291 int mask = ch1Mask | (ch2Mask << 16); 292 if (mask) 293 or32(Imm32(mask), character); 294 } 295 int pair = ch1 | (ch2 << 16); 296 297 failures.append(branch32(NotEqual, character, Imm32(pair))); 298 return true; 299 } 300 301 void Generator::generatePatternCharacter(JumpList& failures, int ch) 302 { 303 generateLoadCharacter(failures); 304 305 // used for unicode case insensitive 306 bool hasUpper = false; 307 Jump isUpper; 308 309 // if case insensitive match 310 if (m_parser.ignoreCase()) { 311 UChar lower, upper; 312 313 // check for ascii case sensitive characters 314 if (isASCIIAlpha(ch)) { 315 or32(Imm32(32), character); 316 ch |= 32; 317 } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) { 318 // handle unicode case sentitive characters - branch to success on upper 319 isUpper = branch32(Equal, character, Imm32(upper)); 320 hasUpper = true; 321 ch = lower; 322 } 323 } 324 325 // checks for ch, or lower case version of ch, if insensitive 326 failures.append(branch32(NotEqual, character, Imm32((unsigned short)ch))); 327 328 if (m_parser.ignoreCase() && hasUpper) { 329 // for unicode case insensitive matches, branch here if upper matches. 330 isUpper.link(this); 331 } 332 333 // on success consume the char 334 add32(Imm32(1), index); 335 } 336 337 void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) 338 { 339 do { 340 // pick which range we're going to generate 341 int which = count >> 1; 342 char lo = ranges[which].begin; 343 char hi = ranges[which].end; 344 345 // check if there are any ranges or matches below lo. If not, just jl to failure - 346 // if there is anything else to check, check that first, if it falls through jmp to failure. 347 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 348 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 349 350 // generate code for all ranges before this one 351 if (which) 352 generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); 353 354 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 355 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); 356 ++*matchIndex; 357 } 358 failures.append(jump()); 359 360 loOrAbove.link(this); 361 } else if (which) { 362 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 363 364 generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); 365 failures.append(jump()); 366 367 loOrAbove.link(this); 368 } else 369 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); 370 371 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) 372 ++*matchIndex; 373 374 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); 375 // fall through to here, the value is above hi. 376 377 // shuffle along & loop around if there are any more matches to handle. 378 unsigned next = which + 1; 379 ranges += next; 380 count -= next; 381 } while (count); 382 } 383 384 void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass) 385 { 386 Jump unicodeFail; 387 if (charClass.numMatchesUnicode || charClass.numRangesUnicode) { 388 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); 389 390 if (charClass.numMatchesUnicode) { 391 for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) { 392 UChar ch = charClass.matchesUnicode[i]; 393 matchDest.append(branch32(Equal, character, Imm32(ch))); 394 } 395 } 396 397 if (charClass.numRangesUnicode) { 398 for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) { 399 UChar lo = charClass.rangesUnicode[i].begin; 400 UChar hi = charClass.rangesUnicode[i].end; 401 402 Jump below = branch32(LessThan, character, Imm32(lo)); 403 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); 404 below.link(this); 405 } 406 } 407 408 unicodeFail = jump(); 409 isAscii.link(this); 410 } 411 412 if (charClass.numRanges) { 413 unsigned matchIndex = 0; 414 JumpList failures; 415 generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches); 416 while (matchIndex < charClass.numMatches) 417 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass.matches[matchIndex++]))); 418 419 failures.link(this); 420 } else if (charClass.numMatches) { 421 // optimization: gather 'a','A' etc back together, can mask & test once. 422 Vector<char> matchesAZaz; 423 424 for (unsigned i = 0; i < charClass.numMatches; ++i) { 425 char ch = charClass.matches[i]; 426 if (m_parser.ignoreCase()) { 427 if (isASCIILower(ch)) { 428 matchesAZaz.append(ch); 429 continue; 430 } 431 if (isASCIIUpper(ch)) 432 continue; 433 } 434 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); 435 } 436 437 if (unsigned countAZaz = matchesAZaz.size()) { 438 or32(Imm32(32), character); 439 for (unsigned i = 0; i < countAZaz; ++i) 440 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); 441 } 442 } 443 444 if (charClass.numMatchesUnicode || charClass.numRangesUnicode) 445 unicodeFail.link(this); 446 } 447 448 void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert) 449 { 450 generateLoadCharacter(failures); 451 452 if (invert) 453 generateCharacterClassInverted(failures, charClass); 454 else { 455 JumpList successes; 456 generateCharacterClassInverted(successes, charClass); 457 failures.append(jump()); 458 successes.link(this); 459 } 460 461 add32(Imm32(1), index); 462 } 463 464 void Generator::generateParenthesesAssertion(JumpList& failures) 465 { 466 JumpList disjunctionFailed; 467 468 push(index); 469 m_parser.parseDisjunction(disjunctionFailed); 470 Jump success = jump(); 471 472 disjunctionFailed.link(this); 473 pop(index); 474 failures.append(jump()); 475 476 success.link(this); 477 pop(index); 478 } 479 480 void Generator::generateParenthesesInvertedAssertion(JumpList& failures) 481 { 482 JumpList disjunctionFailed; 483 484 push(index); 485 m_parser.parseDisjunction(disjunctionFailed); 486 487 // If the disjunction succeeded, the inverted assertion failed. 488 pop(index); 489 failures.append(jump()); 490 491 // If the disjunction failed, the inverted assertion succeeded. 492 disjunctionFailed.link(this); 493 pop(index); 494 } 495 496 void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail) 497 { 498 jump(start); 499 success.link(this); 500 failures.append(fail); 501 } 502 503 Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter) 504 { 505 Jump skip = jump(); 506 newFailures.link(this); 507 for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) { 508 store32(Imm32(-1), Address(output, (2 * i) * sizeof(int))); 509 store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int))); 510 } 511 512 Jump newFailJump = jump(); 513 skip.link(this); 514 515 return newFailJump; 516 } 517 518 void Generator::generateAssertionBOL(JumpList& failures) 519 { 520 if (m_parser.multiline()) { 521 JumpList previousIsNewline; 522 523 // begin of input == success 524 previousIsNewline.append(branch32(Equal, index, Imm32(0))); 525 526 // now check prev char against newline characters. 527 load16(BaseIndex(input, index, TimesTwo, -2), character); 528 generateCharacterClassInverted(previousIsNewline, CharacterClass::newline()); 529 530 failures.append(jump()); 531 532 previousIsNewline.link(this); 533 } else 534 failures.append(branch32(NotEqual, index, Imm32(0))); 535 } 536 537 void Generator::generateAssertionEOL(JumpList& failures) 538 { 539 if (m_parser.multiline()) { 540 JumpList nextIsNewline; 541 542 generateLoadCharacter(nextIsNewline); // end of input == success 543 generateCharacterClassInverted(nextIsNewline, CharacterClass::newline()); 544 failures.append(jump()); 545 nextIsNewline.link(this); 546 } else { 547 failures.append(branch32(NotEqual, length, index)); 548 } 549 } 550 551 void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert) 552 { 553 JumpList wordBoundary; 554 JumpList notWordBoundary; 555 556 // (1) Check if the previous value was a word char 557 558 // (1.1) check for begin of input 559 Jump atBegin = branch32(Equal, index, Imm32(0)); 560 // (1.2) load the last char, and chck if is word character 561 load16(BaseIndex(input, index, TimesTwo, -2), character); 562 JumpList previousIsWord; 563 generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar()); 564 // (1.3) if we get here, previous is not a word char 565 atBegin.link(this); 566 567 // (2) Handle situation where previous was NOT a \w 568 569 generateLoadCharacter(notWordBoundary); 570 generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar()); 571 // (2.1) If we get here, neither chars are word chars 572 notWordBoundary.append(jump()); 573 574 // (3) Handle situation where previous was a \w 575 576 // (3.0) link success in first match to here 577 previousIsWord.link(this); 578 generateLoadCharacter(wordBoundary); 579 generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar()); 580 // (3.1) If we get here, this is an end of a word, within the input. 581 582 // (4) Link everything up 583 584 if (invert) { 585 // handle the fall through case 586 wordBoundary.append(jump()); 587 588 // looking for non word boundaries, so link boundary fails to here. 589 notWordBoundary.link(this); 590 591 failures.append(wordBoundary); 592 } else { 593 // looking for word boundaries, so link successes here. 594 wordBoundary.link(this); 595 596 failures.append(notWordBoundary); 597 } 598 } 599 600 void Generator::generateBackreference(JumpList& failures, unsigned subpatternId) 601 { 602 push(index); 603 push(repeatCount); 604 605 // get the start pos of the backref into repeatCount (multipurpose!) 606 load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount); 607 608 Jump skipIncrement = jump(); 609 Label topOfLoop(this); 610 611 add32(Imm32(1), index); 612 add32(Imm32(1), repeatCount); 613 skipIncrement.link(this); 614 615 // check if we're at the end of backref (if we are, success!) 616 Jump endOfBackRef = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount); 617 618 load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character); 619 620 // check if we've run out of input (this would be a can o'fail) 621 Jump endOfInput = branch32(Equal, length, index); 622 623 branch16(Equal, BaseIndex(input, index, TimesTwo), character, topOfLoop); 624 625 endOfInput.link(this); 626 627 // Failure 628 pop(repeatCount); 629 pop(index); 630 failures.append(jump()); 631 632 // Success 633 endOfBackRef.link(this); 634 pop(repeatCount); 635 pop(); 636 } 637 638 void Generator::terminateAlternative(JumpList& successes, JumpList& failures) 639 { 640 successes.append(jump()); 641 642 failures.link(this); 643 peek(index); 644 } 645 646 void Generator::terminateDisjunction(JumpList& successes) 647 { 648 successes.link(this); 649 } 650 651 } } // namespace JSC::WREC 652 653 #endif // ENABLE(WREC) 654