1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "RegexJIT.h" 28 29 #include "ASCIICType.h" 30 #include "JSGlobalData.h" 31 #include "LinkBuffer.h" 32 #include "MacroAssembler.h" 33 #include "RegexCompiler.h" 34 35 #include "pcre.h" // temporary, remove when fallback is removed. 36 37 #if ENABLE(YARR_JIT) 38 39 using namespace WTF; 40 41 namespace JSC { namespace Yarr { 42 43 44 class RegexGenerator : private MacroAssembler { 45 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); 46 47 #if CPU(ARM) 48 static const RegisterID input = ARMRegisters::r0; 49 static const RegisterID index = ARMRegisters::r1; 50 static const RegisterID length = ARMRegisters::r2; 51 static const RegisterID output = ARMRegisters::r4; 52 53 static const RegisterID regT0 = ARMRegisters::r5; 54 static const RegisterID regT1 = ARMRegisters::r6; 55 56 static const RegisterID returnRegister = ARMRegisters::r0; 57 #elif CPU(X86) 58 static const RegisterID input = X86Registers::eax; 59 static const RegisterID index = X86Registers::edx; 60 static const RegisterID length = X86Registers::ecx; 61 static const RegisterID output = X86Registers::edi; 62 63 static const RegisterID regT0 = X86Registers::ebx; 64 static const RegisterID regT1 = X86Registers::esi; 65 66 static const RegisterID returnRegister = X86Registers::eax; 67 #elif CPU(X86_64) 68 static const RegisterID input = X86Registers::edi; 69 static const RegisterID index = X86Registers::esi; 70 static const RegisterID length = X86Registers::edx; 71 static const RegisterID output = X86Registers::ecx; 72 73 static const RegisterID regT0 = X86Registers::eax; 74 static const RegisterID regT1 = X86Registers::ebx; 75 76 static const RegisterID returnRegister = X86Registers::eax; 77 #endif 78 79 void optimizeAlternative(PatternAlternative* alternative) 80 { 81 if (!alternative->m_terms.size()) 82 return; 83 84 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { 85 PatternTerm& term = alternative->m_terms[i]; 86 PatternTerm& nextTerm = alternative->m_terms[i + 1]; 87 88 if ((term.type == PatternTerm::TypeCharacterClass) 89 && (term.quantityType == QuantifierFixedCount) 90 && (nextTerm.type == PatternTerm::TypePatternCharacter) 91 && (nextTerm.quantityType == QuantifierFixedCount)) { 92 PatternTerm termCopy = term; 93 alternative->m_terms[i] = nextTerm; 94 alternative->m_terms[i + 1] = termCopy; 95 } 96 } 97 } 98 99 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) 100 { 101 do { 102 // pick which range we're going to generate 103 int which = count >> 1; 104 char lo = ranges[which].begin; 105 char hi = ranges[which].end; 106 107 // check if there are any ranges or matches below lo. If not, just jl to failure - 108 // if there is anything else to check, check that first, if it falls through jmp to failure. 109 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 110 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 111 112 // generate code for all ranges before this one 113 if (which) 114 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); 115 116 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 117 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); 118 ++*matchIndex; 119 } 120 failures.append(jump()); 121 122 loOrAbove.link(this); 123 } else if (which) { 124 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 125 126 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); 127 failures.append(jump()); 128 129 loOrAbove.link(this); 130 } else 131 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); 132 133 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) 134 ++*matchIndex; 135 136 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); 137 // fall through to here, the value is above hi. 138 139 // shuffle along & loop around if there are any more matches to handle. 140 unsigned next = which + 1; 141 ranges += next; 142 count -= next; 143 } while (count); 144 } 145 146 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) 147 { 148 Jump unicodeFail; 149 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { 150 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); 151 152 if (charClass->m_matchesUnicode.size()) { 153 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { 154 UChar ch = charClass->m_matchesUnicode[i]; 155 matchDest.append(branch32(Equal, character, Imm32(ch))); 156 } 157 } 158 159 if (charClass->m_rangesUnicode.size()) { 160 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { 161 UChar lo = charClass->m_rangesUnicode[i].begin; 162 UChar hi = charClass->m_rangesUnicode[i].end; 163 164 Jump below = branch32(LessThan, character, Imm32(lo)); 165 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); 166 below.link(this); 167 } 168 } 169 170 unicodeFail = jump(); 171 isAscii.link(this); 172 } 173 174 if (charClass->m_ranges.size()) { 175 unsigned matchIndex = 0; 176 JumpList failures; 177 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); 178 while (matchIndex < charClass->m_matches.size()) 179 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); 180 181 failures.link(this); 182 } else if (charClass->m_matches.size()) { 183 // optimization: gather 'a','A' etc back together, can mask & test once. 184 Vector<char> matchesAZaz; 185 186 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { 187 char ch = charClass->m_matches[i]; 188 if (m_pattern.m_ignoreCase) { 189 if (isASCIILower(ch)) { 190 matchesAZaz.append(ch); 191 continue; 192 } 193 if (isASCIIUpper(ch)) 194 continue; 195 } 196 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); 197 } 198 199 if (unsigned countAZaz = matchesAZaz.size()) { 200 or32(Imm32(32), character); 201 for (unsigned i = 0; i < countAZaz; ++i) 202 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); 203 } 204 } 205 206 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) 207 unicodeFail.link(this); 208 } 209 210 // Jumps if input not available; will have (incorrectly) incremented already! 211 Jump jumpIfNoAvailableInput(unsigned countToCheck) 212 { 213 add32(Imm32(countToCheck), index); 214 return branch32(Above, index, length); 215 } 216 217 Jump jumpIfAvailableInput(unsigned countToCheck) 218 { 219 add32(Imm32(countToCheck), index); 220 return branch32(BelowOrEqual, index, length); 221 } 222 223 Jump checkInput() 224 { 225 return branch32(BelowOrEqual, index, length); 226 } 227 228 Jump atEndOfInput() 229 { 230 return branch32(Equal, index, length); 231 } 232 233 Jump notAtEndOfInput() 234 { 235 return branch32(NotEqual, index, length); 236 } 237 238 Jump jumpIfCharEquals(UChar ch, int inputPosition) 239 { 240 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); 241 } 242 243 Jump jumpIfCharNotEquals(UChar ch, int inputPosition) 244 { 245 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); 246 } 247 248 void readCharacter(int inputPosition, RegisterID reg) 249 { 250 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); 251 } 252 253 void storeToFrame(RegisterID reg, unsigned frameLocation) 254 { 255 poke(reg, frameLocation); 256 } 257 258 void storeToFrame(Imm32 imm, unsigned frameLocation) 259 { 260 poke(imm, frameLocation); 261 } 262 263 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) 264 { 265 return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); 266 } 267 268 void loadFromFrame(unsigned frameLocation, RegisterID reg) 269 { 270 peek(reg, frameLocation); 271 } 272 273 void loadFromFrameAndJump(unsigned frameLocation) 274 { 275 jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); 276 } 277 278 struct AlternativeBacktrackRecord { 279 DataLabelPtr dataLabel; 280 Label backtrackLocation; 281 282 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) 283 : dataLabel(dataLabel) 284 , backtrackLocation(backtrackLocation) 285 { 286 } 287 }; 288 289 struct TermGenerationState { 290 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) 291 : disjunction(disjunction) 292 , checkedTotal(checkedTotal) 293 { 294 } 295 296 void resetAlternative() 297 { 298 isBackTrackGenerated = false; 299 alt = 0; 300 } 301 bool alternativeValid() 302 { 303 return alt < disjunction->m_alternatives.size(); 304 } 305 void nextAlternative() 306 { 307 ++alt; 308 } 309 PatternAlternative* alternative() 310 { 311 return disjunction->m_alternatives[alt]; 312 } 313 314 void resetTerm() 315 { 316 ASSERT(alternativeValid()); 317 t = 0; 318 } 319 bool termValid() 320 { 321 ASSERT(alternativeValid()); 322 return t < alternative()->m_terms.size(); 323 } 324 void nextTerm() 325 { 326 ASSERT(alternativeValid()); 327 ++t; 328 } 329 PatternTerm& term() 330 { 331 ASSERT(alternativeValid()); 332 return alternative()->m_terms[t]; 333 } 334 335 PatternTerm& lookaheadTerm() 336 { 337 ASSERT(alternativeValid()); 338 ASSERT((t + 1) < alternative()->m_terms.size()); 339 return alternative()->m_terms[t + 1]; 340 } 341 bool isSinglePatternCharacterLookaheadTerm() 342 { 343 ASSERT(alternativeValid()); 344 return ((t + 1) < alternative()->m_terms.size()) 345 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) 346 && (lookaheadTerm().quantityType == QuantifierFixedCount) 347 && (lookaheadTerm().quantityCount == 1); 348 } 349 350 int inputOffset() 351 { 352 return term().inputPosition - checkedTotal; 353 } 354 355 void jumpToBacktrack(Jump jump, MacroAssembler* masm) 356 { 357 if (isBackTrackGenerated) 358 jump.linkTo(backtrackLabel, masm); 359 else 360 backTrackJumps.append(jump); 361 } 362 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm) 363 { 364 if (isBackTrackGenerated) 365 jumps.linkTo(backtrackLabel, masm); 366 else 367 backTrackJumps.append(jumps); 368 } 369 bool plantJumpToBacktrackIfExists(MacroAssembler* masm) 370 { 371 if (isBackTrackGenerated) { 372 masm->jump(backtrackLabel); 373 return true; 374 } 375 return false; 376 } 377 void addBacktrackJump(Jump jump) 378 { 379 backTrackJumps.append(jump); 380 } 381 void setBacktrackGenerated(Label label) 382 { 383 isBackTrackGenerated = true; 384 backtrackLabel = label; 385 } 386 void linkAlternativeBacktracks(MacroAssembler* masm) 387 { 388 isBackTrackGenerated = false; 389 backTrackJumps.link(masm); 390 } 391 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm) 392 { 393 isBackTrackGenerated = false; 394 backTrackJumps.linkTo(label, masm); 395 } 396 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm) 397 { 398 jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm); 399 if (nestedParenthesesState.isBackTrackGenerated) 400 setBacktrackGenerated(nestedParenthesesState.backtrackLabel); 401 } 402 403 PatternDisjunction* disjunction; 404 int checkedTotal; 405 private: 406 unsigned alt; 407 unsigned t; 408 JumpList backTrackJumps; 409 Label backtrackLabel; 410 bool isBackTrackGenerated; 411 }; 412 413 void generateAssertionBOL(TermGenerationState& state) 414 { 415 PatternTerm& term = state.term(); 416 417 if (m_pattern.m_multiline) { 418 const RegisterID character = regT0; 419 420 JumpList matchDest; 421 if (!term.inputPosition) 422 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal))); 423 424 readCharacter(state.inputOffset() - 1, character); 425 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 426 state.jumpToBacktrack(jump(), this); 427 428 matchDest.link(this); 429 } else { 430 // Erk, really should poison out these alternatives early. :-/ 431 if (term.inputPosition) 432 state.jumpToBacktrack(jump(), this); 433 else 434 state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this); 435 } 436 } 437 438 void generateAssertionEOL(TermGenerationState& state) 439 { 440 PatternTerm& term = state.term(); 441 442 if (m_pattern.m_multiline) { 443 const RegisterID character = regT0; 444 445 JumpList matchDest; 446 if (term.inputPosition == state.checkedTotal) 447 matchDest.append(atEndOfInput()); 448 449 readCharacter(state.inputOffset(), character); 450 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 451 state.jumpToBacktrack(jump(), this); 452 453 matchDest.link(this); 454 } else { 455 if (term.inputPosition == state.checkedTotal) 456 state.jumpToBacktrack(notAtEndOfInput(), this); 457 // Erk, really should poison out these alternatives early. :-/ 458 else 459 state.jumpToBacktrack(jump(), this); 460 } 461 } 462 463 // Also falls though on nextIsNotWordChar. 464 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) 465 { 466 const RegisterID character = regT0; 467 PatternTerm& term = state.term(); 468 469 if (term.inputPosition == state.checkedTotal) 470 nextIsNotWordChar.append(atEndOfInput()); 471 472 readCharacter(state.inputOffset(), character); 473 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); 474 } 475 476 void generateAssertionWordBoundary(TermGenerationState& state) 477 { 478 const RegisterID character = regT0; 479 PatternTerm& term = state.term(); 480 481 Jump atBegin; 482 JumpList matchDest; 483 if (!term.inputPosition) 484 atBegin = branch32(Equal, index, Imm32(state.checkedTotal)); 485 readCharacter(state.inputOffset() - 1, character); 486 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); 487 if (!term.inputPosition) 488 atBegin.link(this); 489 490 // We fall through to here if the last character was not a wordchar. 491 JumpList nonWordCharThenWordChar; 492 JumpList nonWordCharThenNonWordChar; 493 if (term.invertOrCapture) { 494 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); 495 nonWordCharThenWordChar.append(jump()); 496 } else { 497 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); 498 nonWordCharThenNonWordChar.append(jump()); 499 } 500 state.jumpToBacktrack(nonWordCharThenNonWordChar, this); 501 502 // We jump here if the last character was a wordchar. 503 matchDest.link(this); 504 JumpList wordCharThenWordChar; 505 JumpList wordCharThenNonWordChar; 506 if (term.invertOrCapture) { 507 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); 508 wordCharThenWordChar.append(jump()); 509 } else { 510 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar); 511 // This can fall-though! 512 } 513 514 state.jumpToBacktrack(wordCharThenWordChar, this); 515 516 nonWordCharThenWordChar.link(this); 517 wordCharThenNonWordChar.link(this); 518 } 519 520 void generatePatternCharacterSingle(TermGenerationState& state) 521 { 522 const RegisterID character = regT0; 523 UChar ch = state.term().patternCharacter; 524 525 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 526 readCharacter(state.inputOffset(), character); 527 or32(Imm32(32), character); 528 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); 529 } else { 530 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 531 state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this); 532 } 533 } 534 535 void generatePatternCharacterPair(TermGenerationState& state) 536 { 537 const RegisterID character = regT0; 538 UChar ch1 = state.term().patternCharacter; 539 UChar ch2 = state.lookaheadTerm().patternCharacter; 540 541 int mask = 0; 542 int chPair = ch1 | (ch2 << 16); 543 544 if (m_pattern.m_ignoreCase) { 545 if (isASCIIAlpha(ch1)) 546 mask |= 32; 547 if (isASCIIAlpha(ch2)) 548 mask |= 32 << 16; 549 } 550 551 if (mask) { 552 load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); 553 or32(Imm32(mask), character); 554 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this); 555 } else 556 state.jumpToBacktrack(branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this); 557 } 558 559 void generatePatternCharacterFixed(TermGenerationState& state) 560 { 561 const RegisterID character = regT0; 562 const RegisterID countRegister = regT1; 563 PatternTerm& term = state.term(); 564 UChar ch = term.patternCharacter; 565 566 move(index, countRegister); 567 sub32(Imm32(term.quantityCount), countRegister); 568 569 Label loop(this); 570 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 571 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); 572 or32(Imm32(32), character); 573 state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this); 574 } else { 575 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 576 state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this); 577 } 578 add32(Imm32(1), countRegister); 579 branch32(NotEqual, countRegister, index).linkTo(loop, this); 580 } 581 582 void generatePatternCharacterGreedy(TermGenerationState& state) 583 { 584 const RegisterID character = regT0; 585 const RegisterID countRegister = regT1; 586 PatternTerm& term = state.term(); 587 UChar ch = term.patternCharacter; 588 589 move(Imm32(0), countRegister); 590 591 JumpList failures; 592 Label loop(this); 593 failures.append(atEndOfInput()); 594 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 595 readCharacter(state.inputOffset(), character); 596 or32(Imm32(32), character); 597 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 598 } else { 599 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 600 failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); 601 } 602 add32(Imm32(1), countRegister); 603 add32(Imm32(1), index); 604 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); 605 failures.append(jump()); 606 607 Label backtrackBegin(this); 608 loadFromFrame(term.frameLocation, countRegister); 609 state.jumpToBacktrack(branchTest32(Zero, countRegister), this); 610 sub32(Imm32(1), countRegister); 611 sub32(Imm32(1), index); 612 613 failures.link(this); 614 615 storeToFrame(countRegister, term.frameLocation); 616 617 state.setBacktrackGenerated(backtrackBegin); 618 } 619 620 void generatePatternCharacterNonGreedy(TermGenerationState& state) 621 { 622 const RegisterID character = regT0; 623 const RegisterID countRegister = regT1; 624 PatternTerm& term = state.term(); 625 UChar ch = term.patternCharacter; 626 627 move(Imm32(0), countRegister); 628 629 Jump firstTimeDoNothing = jump(); 630 631 Label hardFail(this); 632 sub32(countRegister, index); 633 state.jumpToBacktrack(jump(), this); 634 635 Label backtrackBegin(this); 636 loadFromFrame(term.frameLocation, countRegister); 637 638 atEndOfInput().linkTo(hardFail, this); 639 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); 640 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 641 readCharacter(state.inputOffset(), character); 642 or32(Imm32(32), character); 643 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); 644 } else { 645 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 646 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); 647 } 648 649 add32(Imm32(1), countRegister); 650 add32(Imm32(1), index); 651 652 firstTimeDoNothing.link(this); 653 storeToFrame(countRegister, term.frameLocation); 654 655 state.setBacktrackGenerated(backtrackBegin); 656 } 657 658 void generateCharacterClassSingle(TermGenerationState& state) 659 { 660 const RegisterID character = regT0; 661 PatternTerm& term = state.term(); 662 663 JumpList matchDest; 664 readCharacter(state.inputOffset(), character); 665 matchCharacterClass(character, matchDest, term.characterClass); 666 667 if (term.invertOrCapture) 668 state.jumpToBacktrack(matchDest, this); 669 else { 670 state.jumpToBacktrack(jump(), this); 671 matchDest.link(this); 672 } 673 } 674 675 void generateCharacterClassFixed(TermGenerationState& state) 676 { 677 const RegisterID character = regT0; 678 const RegisterID countRegister = regT1; 679 PatternTerm& term = state.term(); 680 681 move(index, countRegister); 682 sub32(Imm32(term.quantityCount), countRegister); 683 684 Label loop(this); 685 JumpList matchDest; 686 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); 687 matchCharacterClass(character, matchDest, term.characterClass); 688 689 if (term.invertOrCapture) 690 state.jumpToBacktrack(matchDest, this); 691 else { 692 state.jumpToBacktrack(jump(), this); 693 matchDest.link(this); 694 } 695 696 add32(Imm32(1), countRegister); 697 branch32(NotEqual, countRegister, index).linkTo(loop, this); 698 } 699 700 void generateCharacterClassGreedy(TermGenerationState& state) 701 { 702 const RegisterID character = regT0; 703 const RegisterID countRegister = regT1; 704 PatternTerm& term = state.term(); 705 706 move(Imm32(0), countRegister); 707 708 JumpList failures; 709 Label loop(this); 710 failures.append(atEndOfInput()); 711 712 if (term.invertOrCapture) { 713 readCharacter(state.inputOffset(), character); 714 matchCharacterClass(character, failures, term.characterClass); 715 } else { 716 JumpList matchDest; 717 readCharacter(state.inputOffset(), character); 718 matchCharacterClass(character, matchDest, term.characterClass); 719 failures.append(jump()); 720 matchDest.link(this); 721 } 722 723 add32(Imm32(1), countRegister); 724 add32(Imm32(1), index); 725 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); 726 failures.append(jump()); 727 728 Label backtrackBegin(this); 729 loadFromFrame(term.frameLocation, countRegister); 730 state.jumpToBacktrack(branchTest32(Zero, countRegister), this); 731 sub32(Imm32(1), countRegister); 732 sub32(Imm32(1), index); 733 734 failures.link(this); 735 736 storeToFrame(countRegister, term.frameLocation); 737 738 state.setBacktrackGenerated(backtrackBegin); 739 } 740 741 void generateCharacterClassNonGreedy(TermGenerationState& state) 742 { 743 const RegisterID character = regT0; 744 const RegisterID countRegister = regT1; 745 PatternTerm& term = state.term(); 746 747 move(Imm32(0), countRegister); 748 749 Jump firstTimeDoNothing = jump(); 750 751 Label hardFail(this); 752 sub32(countRegister, index); 753 state.jumpToBacktrack(jump(), this); 754 755 Label backtrackBegin(this); 756 loadFromFrame(term.frameLocation, countRegister); 757 758 atEndOfInput().linkTo(hardFail, this); 759 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); 760 761 JumpList matchDest; 762 readCharacter(state.inputOffset(), character); 763 matchCharacterClass(character, matchDest, term.characterClass); 764 765 if (term.invertOrCapture) 766 matchDest.linkTo(hardFail, this); 767 else { 768 jump(hardFail); 769 matchDest.link(this); 770 } 771 772 add32(Imm32(1), countRegister); 773 add32(Imm32(1), index); 774 775 firstTimeDoNothing.link(this); 776 storeToFrame(countRegister, term.frameLocation); 777 778 state.setBacktrackGenerated(backtrackBegin); 779 } 780 781 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) 782 { 783 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); 784 ASSERT(parenthesesTerm.quantityCount == 1); 785 786 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; 787 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; 788 789 if (disjunction->m_alternatives.size() == 1) { 790 state.resetAlternative(); 791 ASSERT(state.alternativeValid()); 792 PatternAlternative* alternative = state.alternative(); 793 optimizeAlternative(alternative); 794 795 int countToCheck = alternative->m_minimumSize - preCheckedCount; 796 if (countToCheck) { 797 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); 798 799 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' 800 // will be forced to always trampoline into here, just to decrement the index. 801 // Ick. 802 Jump skip = jump(); 803 804 Label backtrackBegin(this); 805 sub32(Imm32(countToCheck), index); 806 state.addBacktrackJump(jump()); 807 808 skip.link(this); 809 810 state.setBacktrackGenerated(backtrackBegin); 811 812 state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this); 813 state.checkedTotal += countToCheck; 814 } 815 816 for (state.resetTerm(); state.termValid(); state.nextTerm()) 817 generateTerm(state); 818 819 state.checkedTotal -= countToCheck; 820 } else { 821 JumpList successes; 822 823 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) { 824 825 PatternAlternative* alternative = state.alternative(); 826 optimizeAlternative(alternative); 827 828 ASSERT(alternative->m_minimumSize >= preCheckedCount); 829 int countToCheck = alternative->m_minimumSize - preCheckedCount; 830 if (countToCheck) { 831 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); 832 state.checkedTotal += countToCheck; 833 } 834 835 for (state.resetTerm(); state.termValid(); state.nextTerm()) 836 generateTerm(state); 837 838 // Matched an alternative. 839 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); 840 successes.append(jump()); 841 842 // Alternative did not match. 843 Label backtrackLocation(this); 844 845 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one. 846 state.plantJumpToBacktrackIfExists(this); 847 848 state.linkAlternativeBacktracks(this); 849 850 if (countToCheck) { 851 sub32(Imm32(countToCheck), index); 852 state.checkedTotal -= countToCheck; 853 } 854 855 m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation)); 856 } 857 // We fall through to here when the last alternative fails. 858 // Add a backtrack out of here for the parenthese handling code to link up. 859 state.addBacktrackJump(jump()); 860 861 // Generate a trampoline for the parens code to backtrack to, to retry the 862 // next alternative. 863 state.setBacktrackGenerated(label()); 864 loadFromFrameAndJump(alternativeFrameLocation); 865 866 // FIXME: both of the above hooks are a little inefficient, in that you 867 // may end up trampolining here, just to trampoline back out to the 868 // parentheses code, or vice versa. We can probably eliminate a jump 869 // by restructuring, but coding this way for now for simplicity during 870 // development. 871 872 successes.link(this); 873 } 874 } 875 876 void generateParenthesesSingle(TermGenerationState& state) 877 { 878 const RegisterID indexTemporary = regT0; 879 PatternTerm& term = state.term(); 880 PatternDisjunction* disjunction = term.parentheses.disjunction; 881 ASSERT(term.quantityCount == 1); 882 883 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0; 884 885 unsigned parenthesesFrameLocation = term.frameLocation; 886 unsigned alternativeFrameLocation = parenthesesFrameLocation; 887 if (term.quantityType != QuantifierFixedCount) 888 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce; 889 890 // optimized case - no capture & no quantifier can be handled in a light-weight manner. 891 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) { 892 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 893 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 894 // this expects that any backtracks back out of the parentheses will be in the 895 // parenthesesState's backTrackJumps vector, and that if they need backtracking 896 // they will have set an entry point on the parenthesesState's backtrackLabel. 897 state.propagateBacktrackingFrom(parenthesesState, this); 898 } else { 899 Jump nonGreedySkipParentheses; 900 Label nonGreedyTryParentheses; 901 if (term.quantityType == QuantifierGreedy) 902 storeToFrame(Imm32(1), parenthesesFrameLocation); 903 else if (term.quantityType == QuantifierNonGreedy) { 904 storeToFrame(Imm32(0), parenthesesFrameLocation); 905 nonGreedySkipParentheses = jump(); 906 nonGreedyTryParentheses = label(); 907 storeToFrame(Imm32(1), parenthesesFrameLocation); 908 } 909 910 // store the match start index 911 if (term.invertOrCapture) { 912 int inputOffset = state.inputOffset() - preCheckedCount; 913 if (inputOffset) { 914 move(index, indexTemporary); 915 add32(Imm32(inputOffset), indexTemporary); 916 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 917 } else 918 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 919 } 920 921 // generate the body of the parentheses 922 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 923 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 924 925 // store the match end index 926 if (term.invertOrCapture) { 927 int inputOffset = state.inputOffset(); 928 if (inputOffset) { 929 move(index, indexTemporary); 930 add32(Imm32(state.inputOffset()), indexTemporary); 931 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 932 } else 933 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 934 } 935 Jump success = jump(); 936 937 // A failure AFTER the parens jumps here 938 Label backtrackFromAfterParens(this); 939 940 if (term.quantityType == QuantifierGreedy) { 941 // If this is zero we have now tested with both with and without the parens. 942 loadFromFrame(parenthesesFrameLocation, indexTemporary); 943 state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this); 944 } else if (term.quantityType == QuantifierNonGreedy) { 945 // If this is zero we have now tested with both with and without the parens. 946 loadFromFrame(parenthesesFrameLocation, indexTemporary); 947 branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this); 948 } 949 950 parenthesesState.plantJumpToBacktrackIfExists(this); 951 // A failure WITHIN the parens jumps here 952 parenthesesState.linkAlternativeBacktracks(this); 953 if (term.invertOrCapture) { 954 store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 955 store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 956 } 957 958 if (term.quantityType == QuantifierGreedy) 959 storeToFrame(Imm32(0), parenthesesFrameLocation); 960 else 961 state.jumpToBacktrack(jump(), this); 962 963 state.setBacktrackGenerated(backtrackFromAfterParens); 964 if (term.quantityType == QuantifierNonGreedy) 965 nonGreedySkipParentheses.link(this); 966 success.link(this); 967 } 968 } 969 970 void generateParentheticalAssertion(TermGenerationState& state) 971 { 972 PatternTerm& term = state.term(); 973 PatternDisjunction* disjunction = term.parentheses.disjunction; 974 ASSERT(term.quantityCount == 1); 975 ASSERT(term.quantityType == QuantifierFixedCount); 976 977 unsigned parenthesesFrameLocation = term.frameLocation; 978 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion; 979 980 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; 981 982 if (term.invertOrCapture) { 983 // Inverted case 984 storeToFrame(index, parenthesesFrameLocation); 985 986 state.checkedTotal -= countCheckedAfterAssertion; 987 if (countCheckedAfterAssertion) 988 sub32(Imm32(countCheckedAfterAssertion), index); 989 990 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 991 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 992 // Success! - which means - Fail! 993 loadFromFrame(parenthesesFrameLocation, index); 994 state.jumpToBacktrack(jump(), this); 995 996 // And fail means success. 997 parenthesesState.linkAlternativeBacktracks(this); 998 loadFromFrame(parenthesesFrameLocation, index); 999 1000 state.checkedTotal += countCheckedAfterAssertion; 1001 } else { 1002 // Normal case 1003 storeToFrame(index, parenthesesFrameLocation); 1004 1005 state.checkedTotal -= countCheckedAfterAssertion; 1006 if (countCheckedAfterAssertion) 1007 sub32(Imm32(countCheckedAfterAssertion), index); 1008 1009 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1010 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1011 // Success! - which means - Success! 1012 loadFromFrame(parenthesesFrameLocation, index); 1013 Jump success = jump(); 1014 1015 parenthesesState.linkAlternativeBacktracks(this); 1016 loadFromFrame(parenthesesFrameLocation, index); 1017 state.jumpToBacktrack(jump(), this); 1018 1019 success.link(this); 1020 1021 state.checkedTotal += countCheckedAfterAssertion; 1022 } 1023 } 1024 1025 void generateTerm(TermGenerationState& state) 1026 { 1027 PatternTerm& term = state.term(); 1028 1029 switch (term.type) { 1030 case PatternTerm::TypeAssertionBOL: 1031 generateAssertionBOL(state); 1032 break; 1033 1034 case PatternTerm::TypeAssertionEOL: 1035 generateAssertionEOL(state); 1036 break; 1037 1038 case PatternTerm::TypeAssertionWordBoundary: 1039 generateAssertionWordBoundary(state); 1040 break; 1041 1042 case PatternTerm::TypePatternCharacter: 1043 switch (term.quantityType) { 1044 case QuantifierFixedCount: 1045 if (term.quantityCount == 1) { 1046 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { 1047 generatePatternCharacterPair(state); 1048 state.nextTerm(); 1049 } else 1050 generatePatternCharacterSingle(state); 1051 } else 1052 generatePatternCharacterFixed(state); 1053 break; 1054 case QuantifierGreedy: 1055 generatePatternCharacterGreedy(state); 1056 break; 1057 case QuantifierNonGreedy: 1058 generatePatternCharacterNonGreedy(state); 1059 break; 1060 } 1061 break; 1062 1063 case PatternTerm::TypeCharacterClass: 1064 switch (term.quantityType) { 1065 case QuantifierFixedCount: 1066 if (term.quantityCount == 1) 1067 generateCharacterClassSingle(state); 1068 else 1069 generateCharacterClassFixed(state); 1070 break; 1071 case QuantifierGreedy: 1072 generateCharacterClassGreedy(state); 1073 break; 1074 case QuantifierNonGreedy: 1075 generateCharacterClassNonGreedy(state); 1076 break; 1077 } 1078 break; 1079 1080 case PatternTerm::TypeBackReference: 1081 m_generationFailed = true; 1082 break; 1083 1084 case PatternTerm::TypeForwardReference: 1085 break; 1086 1087 case PatternTerm::TypeParenthesesSubpattern: 1088 if ((term.quantityCount == 1) && !term.parentheses.isCopy) 1089 generateParenthesesSingle(state); 1090 else 1091 m_generationFailed = true; 1092 break; 1093 1094 case PatternTerm::TypeParentheticalAssertion: 1095 generateParentheticalAssertion(state); 1096 break; 1097 } 1098 } 1099 1100 void generateDisjunction(PatternDisjunction* disjunction) 1101 { 1102 TermGenerationState state(disjunction, 0); 1103 state.resetAlternative(); 1104 1105 // Plant a check to see if there is sufficient input available to run the first alternative. 1106 // Jumping back to the label 'firstAlternative' will get to this check, jumping to 1107 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having 1108 // skipped this check. 1109 1110 Label firstAlternative(this); 1111 1112 // check availability for the next alternative 1113 int countCheckedForCurrentAlternative = 0; 1114 int countToCheckForFirstAlternative = 0; 1115 bool hasShorterAlternatives = false; 1116 JumpList notEnoughInputForPreviousAlternative; 1117 1118 if (state.alternativeValid()) { 1119 PatternAlternative* alternative = state.alternative(); 1120 countToCheckForFirstAlternative = alternative->m_minimumSize; 1121 state.checkedTotal += countToCheckForFirstAlternative; 1122 if (countToCheckForFirstAlternative) 1123 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); 1124 countCheckedForCurrentAlternative = countToCheckForFirstAlternative; 1125 } 1126 1127 Label firstAlternativeInputChecked(this); 1128 1129 while (state.alternativeValid()) { 1130 // Track whether any alternatives are shorter than the first one. 1131 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); 1132 1133 PatternAlternative* alternative = state.alternative(); 1134 optimizeAlternative(alternative); 1135 1136 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1137 generateTerm(state); 1138 1139 // If we get here, the alternative matched. 1140 if (m_pattern.m_body->m_callFrameSize) 1141 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1142 1143 ASSERT(index != returnRegister); 1144 if (m_pattern.m_body->m_hasFixedSize) { 1145 move(index, returnRegister); 1146 if (alternative->m_minimumSize) 1147 sub32(Imm32(alternative->m_minimumSize), returnRegister); 1148 } else 1149 pop(returnRegister); 1150 store32(index, Address(output, 4)); 1151 store32(returnRegister, output); 1152 1153 generateReturn(); 1154 1155 state.nextAlternative(); 1156 1157 // if there are any more alternatives, plant the check for input before looping. 1158 if (state.alternativeValid()) { 1159 PatternAlternative* nextAlternative = state.alternative(); 1160 int countToCheckForNextAlternative = nextAlternative->m_minimumSize; 1161 1162 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. 1163 // If we get here, there the last input checked failed. 1164 notEnoughInputForPreviousAlternative.link(this); 1165 1166 // Check if sufficent input available to run the next alternative 1167 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); 1168 // We are now in the correct state to enter the next alternative; this add is only required 1169 // to mirror and revert operation of the sub32, just below. 1170 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); 1171 1172 // If we get here, there the last input checked passed. 1173 state.linkAlternativeBacktracks(this); 1174 // No need to check if we can run the next alternative, since it is shorter - 1175 // just update index. 1176 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); 1177 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. 1178 // If we get here, there the last input checked failed. 1179 // If there is insufficient input to run the current alternative, and the next alternative is longer, 1180 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if 1181 // we had checked. 1182 notEnoughInputForPreviousAlternative.link(this); 1183 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); 1184 notEnoughInputForPreviousAlternative.append(jump()); 1185 1186 // The next alternative is longer than the current one; check the difference. 1187 state.linkAlternativeBacktracks(this); 1188 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); 1189 } else { // CASE 3: Both alternatives are the same length. 1190 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); 1191 1192 // If the next alterative is the same length as this one, then no need to check the input - 1193 // if there was sufficent input to run the current alternative then there is sufficient 1194 // input to run the next one; if not, there isn't. 1195 state.linkAlternativeBacktracks(this); 1196 } 1197 1198 state.checkedTotal -= countCheckedForCurrentAlternative; 1199 countCheckedForCurrentAlternative = countToCheckForNextAlternative; 1200 state.checkedTotal += countCheckedForCurrentAlternative; 1201 } 1202 } 1203 1204 // If we get here, all Alternatives failed... 1205 1206 state.checkedTotal -= countCheckedForCurrentAlternative; 1207 1208 // How much more input need there be to be able to retry from the first alternative? 1209 // examples: 1210 // /yarr_jit/ or /wrec|pcre/ 1211 // In these examples we need check for one more input before looping. 1212 // /yarr_jit|pcre/ 1213 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative 1214 // being four longer than the last alternative checked, and another +1 to effectively move 1215 // the start position along by one). 1216 // /yarr|rules/ or /wrec|notsomuch/ 1217 // In these examples, provided that there was sufficient input to have just been matching for 1218 // the second alternative we can loop without checking for available input (since the second 1219 // alternative is longer than the first). In the latter example we need to decrement index 1220 // (by 4) so the start position is only progressed by 1 from the last iteration. 1221 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; 1222 1223 // First, deal with the cases where there was sufficient input to try the last alternative. 1224 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. 1225 state.linkAlternativeBacktracks(this); 1226 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! 1227 state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this); 1228 else { // no need to check the input, but we do have some bookkeeping to do first. 1229 state.linkAlternativeBacktracks(this); 1230 1231 // Where necessary update our preserved start position. 1232 if (!m_pattern.m_body->m_hasFixedSize) { 1233 move(index, regT0); 1234 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); 1235 poke(regT0, m_pattern.m_body->m_callFrameSize); 1236 } 1237 1238 // Update index if necessary, and loop (without checking). 1239 if (incrementForNextIter) 1240 add32(Imm32(incrementForNextIter), index); 1241 jump().linkTo(firstAlternativeInputChecked, this); 1242 } 1243 1244 notEnoughInputForPreviousAlternative.link(this); 1245 // Update our idea of the start position, if we're tracking this. 1246 if (!m_pattern.m_body->m_hasFixedSize) { 1247 if (countCheckedForCurrentAlternative - 1) { 1248 move(index, regT0); 1249 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); 1250 poke(regT0, m_pattern.m_body->m_callFrameSize); 1251 } else 1252 poke(index, m_pattern.m_body->m_callFrameSize); 1253 } 1254 // Check if there is sufficent input to run the first alternative again. 1255 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); 1256 // No - insufficent input to run the first alteranative, are there any other alternatives we 1257 // might need to check? If so, the last check will have left the index incremented by 1258 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative 1259 // LESS input is available, to have the effect of just progressing the start position by 1 1260 // from the last iteration. If this check passes we can just jump up to the check associated 1261 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the 1262 // first alternative again, and this check will fail (otherwise the check planted just above 1263 // here would have passed). This is a bit sad, however it saves trying to do something more 1264 // complex here in compilation, and in the common case we should end up coallescing the checks. 1265 // 1266 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least 1267 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150, 1268 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there 1269 // is sufficient input to run either alternative (constantly failing). If there had been only 1270 // one alternative, or if the shorter alternative had come first, we would have terminated 1271 // immediately. :-/ 1272 if (hasShorterAlternatives) 1273 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); 1274 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, 1275 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 1276 // but since we're about to return a failure this doesn't really matter!) 1277 1278 unsigned frameSize = m_pattern.m_body->m_callFrameSize; 1279 if (!m_pattern.m_body->m_hasFixedSize) 1280 ++frameSize; 1281 if (frameSize) 1282 addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister); 1283 1284 move(Imm32(-1), returnRegister); 1285 1286 generateReturn(); 1287 } 1288 1289 void generateEnter() 1290 { 1291 #if CPU(X86_64) 1292 push(X86Registers::ebp); 1293 move(stackPointerRegister, X86Registers::ebp); 1294 push(X86Registers::ebx); 1295 #elif CPU(X86) 1296 push(X86Registers::ebp); 1297 move(stackPointerRegister, X86Registers::ebp); 1298 // TODO: do we need spill registers to fill the output pointer if there are no sub captures? 1299 push(X86Registers::ebx); 1300 push(X86Registers::edi); 1301 push(X86Registers::esi); 1302 // load output into edi (2 = saved ebp + return address). 1303 #if COMPILER(MSVC) 1304 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); 1305 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); 1306 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); 1307 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); 1308 #else 1309 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); 1310 #endif 1311 #elif CPU(ARM) 1312 push(ARMRegisters::r4); 1313 push(ARMRegisters::r5); 1314 push(ARMRegisters::r6); 1315 move(ARMRegisters::r3, output); 1316 #endif 1317 } 1318 1319 void generateReturn() 1320 { 1321 #if CPU(X86_64) 1322 pop(X86Registers::ebx); 1323 pop(X86Registers::ebp); 1324 #elif CPU(X86) 1325 pop(X86Registers::esi); 1326 pop(X86Registers::edi); 1327 pop(X86Registers::ebx); 1328 pop(X86Registers::ebp); 1329 #elif CPU(ARM) 1330 pop(ARMRegisters::r6); 1331 pop(ARMRegisters::r5); 1332 pop(ARMRegisters::r4); 1333 #endif 1334 ret(); 1335 } 1336 1337 public: 1338 RegexGenerator(RegexPattern& pattern) 1339 : m_pattern(pattern) 1340 , m_generationFailed(false) 1341 { 1342 } 1343 1344 void generate() 1345 { 1346 generateEnter(); 1347 1348 // TODO: do I really want this on the stack? 1349 if (!m_pattern.m_body->m_hasFixedSize) 1350 push(index); 1351 1352 if (m_pattern.m_body->m_callFrameSize) 1353 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1354 1355 generateDisjunction(m_pattern.m_body); 1356 } 1357 1358 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject) 1359 { 1360 generate(); 1361 1362 LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size())); 1363 1364 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) 1365 patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation)); 1366 1367 jitObject.set(patchBuffer.finalizeCode()); 1368 } 1369 1370 bool generationFailed() 1371 { 1372 return m_generationFailed; 1373 } 1374 1375 private: 1376 RegexPattern& m_pattern; 1377 Vector<AlternativeBacktrackRecord> m_backtrackRecords; 1378 bool m_generationFailed; 1379 }; 1380 1381 void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline) 1382 { 1383 RegexPattern pattern(ignoreCase, multiline); 1384 1385 if ((error = compileRegex(patternString, pattern))) 1386 return; 1387 1388 numSubpatterns = pattern.m_numSubpatterns; 1389 1390 RegexGenerator generator(pattern); 1391 generator.compile(globalData, jitObject); 1392 1393 if (generator.generationFailed()) { 1394 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase; 1395 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine; 1396 jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error)); 1397 } 1398 } 1399 1400 }} 1401 1402 #endif 1403 1404 1405 1406 1407 1408