1 /* 2 * Copyright (C) 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 #include "YarrJIT.h" 28 29 #include "ASCIICType.h" 30 #include "LinkBuffer.h" 31 #include "Yarr.h" 32 33 #if ENABLE(YARR_JIT) 34 35 using namespace WTF; 36 37 namespace JSC { namespace Yarr { 38 39 class YarrGenerator : private MacroAssembler { 40 friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); 41 42 #if CPU(ARM) 43 static const RegisterID input = ARMRegisters::r0; 44 static const RegisterID index = ARMRegisters::r1; 45 static const RegisterID length = ARMRegisters::r2; 46 static const RegisterID output = ARMRegisters::r4; 47 48 static const RegisterID regT0 = ARMRegisters::r5; 49 static const RegisterID regT1 = ARMRegisters::r6; 50 51 static const RegisterID returnRegister = ARMRegisters::r0; 52 #elif CPU(MIPS) 53 static const RegisterID input = MIPSRegisters::a0; 54 static const RegisterID index = MIPSRegisters::a1; 55 static const RegisterID length = MIPSRegisters::a2; 56 static const RegisterID output = MIPSRegisters::a3; 57 58 static const RegisterID regT0 = MIPSRegisters::t4; 59 static const RegisterID regT1 = MIPSRegisters::t5; 60 61 static const RegisterID returnRegister = MIPSRegisters::v0; 62 #elif CPU(SH4) 63 static const RegisterID input = SH4Registers::r4; 64 static const RegisterID index = SH4Registers::r5; 65 static const RegisterID length = SH4Registers::r6; 66 static const RegisterID output = SH4Registers::r7; 67 68 static const RegisterID regT0 = SH4Registers::r0; 69 static const RegisterID regT1 = SH4Registers::r1; 70 71 static const RegisterID returnRegister = SH4Registers::r0; 72 #elif CPU(X86) 73 static const RegisterID input = X86Registers::eax; 74 static const RegisterID index = X86Registers::edx; 75 static const RegisterID length = X86Registers::ecx; 76 static const RegisterID output = X86Registers::edi; 77 78 static const RegisterID regT0 = X86Registers::ebx; 79 static const RegisterID regT1 = X86Registers::esi; 80 81 static const RegisterID returnRegister = X86Registers::eax; 82 #elif CPU(X86_64) 83 static const RegisterID input = X86Registers::edi; 84 static const RegisterID index = X86Registers::esi; 85 static const RegisterID length = X86Registers::edx; 86 static const RegisterID output = X86Registers::ecx; 87 88 static const RegisterID regT0 = X86Registers::eax; 89 static const RegisterID regT1 = X86Registers::ebx; 90 91 static const RegisterID returnRegister = X86Registers::eax; 92 #endif 93 94 void optimizeAlternative(PatternAlternative* alternative) 95 { 96 if (!alternative->m_terms.size()) 97 return; 98 99 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { 100 PatternTerm& term = alternative->m_terms[i]; 101 PatternTerm& nextTerm = alternative->m_terms[i + 1]; 102 103 if ((term.type == PatternTerm::TypeCharacterClass) 104 && (term.quantityType == QuantifierFixedCount) 105 && (nextTerm.type == PatternTerm::TypePatternCharacter) 106 && (nextTerm.quantityType == QuantifierFixedCount)) { 107 PatternTerm termCopy = term; 108 alternative->m_terms[i] = nextTerm; 109 alternative->m_terms[i + 1] = termCopy; 110 } 111 } 112 } 113 114 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) 115 { 116 do { 117 // pick which range we're going to generate 118 int which = count >> 1; 119 char lo = ranges[which].begin; 120 char hi = ranges[which].end; 121 122 // check if there are any ranges or matches below lo. If not, just jl to failure - 123 // if there is anything else to check, check that first, if it falls through jmp to failure. 124 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 125 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 126 127 // generate code for all ranges before this one 128 if (which) 129 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); 130 131 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { 132 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); 133 ++*matchIndex; 134 } 135 failures.append(jump()); 136 137 loOrAbove.link(this); 138 } else if (which) { 139 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); 140 141 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); 142 failures.append(jump()); 143 144 loOrAbove.link(this); 145 } else 146 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); 147 148 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) 149 ++*matchIndex; 150 151 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); 152 // fall through to here, the value is above hi. 153 154 // shuffle along & loop around if there are any more matches to handle. 155 unsigned next = which + 1; 156 ranges += next; 157 count -= next; 158 } while (count); 159 } 160 161 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) 162 { 163 if (charClass->m_table) { 164 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table)); 165 matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry)); 166 return; 167 } 168 Jump unicodeFail; 169 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { 170 Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); 171 172 if (charClass->m_matchesUnicode.size()) { 173 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { 174 UChar ch = charClass->m_matchesUnicode[i]; 175 matchDest.append(branch32(Equal, character, Imm32(ch))); 176 } 177 } 178 179 if (charClass->m_rangesUnicode.size()) { 180 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { 181 UChar lo = charClass->m_rangesUnicode[i].begin; 182 UChar hi = charClass->m_rangesUnicode[i].end; 183 184 Jump below = branch32(LessThan, character, Imm32(lo)); 185 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); 186 below.link(this); 187 } 188 } 189 190 unicodeFail = jump(); 191 isAscii.link(this); 192 } 193 194 if (charClass->m_ranges.size()) { 195 unsigned matchIndex = 0; 196 JumpList failures; 197 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); 198 while (matchIndex < charClass->m_matches.size()) 199 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); 200 201 failures.link(this); 202 } else if (charClass->m_matches.size()) { 203 // optimization: gather 'a','A' etc back together, can mask & test once. 204 Vector<char> matchesAZaz; 205 206 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { 207 char ch = charClass->m_matches[i]; 208 if (m_pattern.m_ignoreCase) { 209 if (isASCIILower(ch)) { 210 matchesAZaz.append(ch); 211 continue; 212 } 213 if (isASCIIUpper(ch)) 214 continue; 215 } 216 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); 217 } 218 219 if (unsigned countAZaz = matchesAZaz.size()) { 220 or32(TrustedImm32(32), character); 221 for (unsigned i = 0; i < countAZaz; ++i) 222 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); 223 } 224 } 225 226 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) 227 unicodeFail.link(this); 228 } 229 230 // Jumps if input not available; will have (incorrectly) incremented already! 231 Jump jumpIfNoAvailableInput(unsigned countToCheck) 232 { 233 add32(Imm32(countToCheck), index); 234 return branch32(Above, index, length); 235 } 236 237 Jump jumpIfAvailableInput(unsigned countToCheck) 238 { 239 add32(Imm32(countToCheck), index); 240 return branch32(BelowOrEqual, index, length); 241 } 242 243 Jump checkInput() 244 { 245 return branch32(BelowOrEqual, index, length); 246 } 247 248 Jump atEndOfInput() 249 { 250 return branch32(Equal, index, length); 251 } 252 253 Jump notAtEndOfInput() 254 { 255 return branch32(NotEqual, index, length); 256 } 257 258 Jump jumpIfCharEquals(UChar ch, int inputPosition) 259 { 260 return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); 261 } 262 263 Jump jumpIfCharNotEquals(UChar ch, int inputPosition) 264 { 265 return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch)); 266 } 267 268 void readCharacter(int inputPosition, RegisterID reg) 269 { 270 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); 271 } 272 273 void storeToFrame(RegisterID reg, unsigned frameLocation) 274 { 275 poke(reg, frameLocation); 276 } 277 278 void storeToFrame(TrustedImm32 imm, unsigned frameLocation) 279 { 280 poke(imm, frameLocation); 281 } 282 283 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) 284 { 285 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); 286 } 287 288 void loadFromFrame(unsigned frameLocation, RegisterID reg) 289 { 290 peek(reg, frameLocation); 291 } 292 293 void loadFromFrameAndJump(unsigned frameLocation) 294 { 295 jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); 296 } 297 298 struct IndirectJumpEntry { 299 IndirectJumpEntry(int32_t stackOffset) 300 : m_stackOffset(stackOffset) 301 { 302 } 303 304 IndirectJumpEntry(int32_t stackOffset, Jump jump) 305 : m_stackOffset(stackOffset) 306 { 307 addJump(jump); 308 } 309 310 IndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) 311 : m_stackOffset(stackOffset) 312 { 313 addDataLabel(dataLabel); 314 } 315 316 void addJump(Jump jump) 317 { 318 m_relJumps.append(jump); 319 } 320 321 void addDataLabel(DataLabelPtr dataLabel) 322 { 323 m_dataLabelPtrVector.append(dataLabel); 324 } 325 326 int32_t m_stackOffset; 327 JumpList m_relJumps; 328 Vector<DataLabelPtr, 16> m_dataLabelPtrVector; 329 }; 330 331 struct AlternativeBacktrackRecord { 332 DataLabelPtr dataLabel; 333 Label backtrackLocation; 334 335 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation) 336 : dataLabel(dataLabel) 337 , backtrackLocation(backtrackLocation) 338 { 339 } 340 }; 341 342 struct ParenthesesTail; 343 struct TermGenerationState; 344 345 struct GenerationState { 346 typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap; 347 348 GenerationState() 349 : m_parenNestingLevel(0) 350 { 351 } 352 353 void addIndirectJumpEntry(int32_t stackOffset, Jump jump) 354 { 355 IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); 356 357 ASSERT(stackOffset >= 0); 358 359 uint32_t offset = static_cast<uint32_t>(stackOffset); 360 361 if (result == m_indirectJumpMap.end()) 362 m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump)); 363 else 364 result->second->addJump(jump); 365 } 366 367 void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps) 368 { 369 JumpList::JumpVector jumpVector = jumps.jumps(); 370 size_t size = jumpVector.size(); 371 for (size_t i = 0; i < size; ++i) 372 addIndirectJumpEntry(stackOffset, jumpVector[i]); 373 374 jumps.empty(); 375 } 376 377 void addIndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel) 378 { 379 IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset); 380 381 ASSERT(stackOffset >= 0); 382 383 uint32_t offset = static_cast<uint32_t>(stackOffset); 384 385 if (result == m_indirectJumpMap.end()) 386 m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, dataLabel)); 387 else 388 result->second->addDataLabel(dataLabel); 389 } 390 391 void emitIndirectJumpTable(MacroAssembler* masm) 392 { 393 for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) { 394 IndirectJumpEntry* indJumpEntry = iter->second; 395 size_t size = indJumpEntry->m_dataLabelPtrVector.size(); 396 if (size) { 397 // Link any associated DataLabelPtr's with indirect jump via label 398 Label hereLabel = masm->label(); 399 for (size_t i = 0; i < size; ++i) 400 m_backtrackRecords.append(AlternativeBacktrackRecord(indJumpEntry->m_dataLabelPtrVector[i], hereLabel)); 401 } 402 indJumpEntry->m_relJumps.link(masm); 403 masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset)); 404 delete indJumpEntry; 405 } 406 } 407 408 void incrementParenNestingLevel() 409 { 410 ++m_parenNestingLevel; 411 } 412 413 void decrementParenNestingLevel() 414 { 415 --m_parenNestingLevel; 416 } 417 418 ParenthesesTail* addParenthesesTail(PatternTerm& term, JumpList* jumpListToPriorParen) 419 { 420 ParenthesesTail* parenthesesTail = new ParenthesesTail(term, m_parenNestingLevel, jumpListToPriorParen); 421 m_parenTails.append(parenthesesTail); 422 m_parenTailsForIteration.append(parenthesesTail); 423 424 return parenthesesTail; 425 } 426 427 void emitParenthesesTail(YarrGenerator* generator) 428 { 429 unsigned vectorSize = m_parenTails.size(); 430 bool priorBacktrackFallThrough = false; 431 432 // Emit in reverse order so parentTail N can fall through to N-1 433 for (unsigned index = vectorSize; index > 0; --index) { 434 JumpList jumpsToNext; 435 priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1); 436 if (index > 1) 437 jumpsToNext.linkTo(generator->label(), generator); 438 else 439 addJumpsToNextInteration(jumpsToNext); 440 } 441 m_parenTails.clear(); 442 } 443 444 void addJumpToNextInteration(Jump jump) 445 { 446 m_jumpsToNextInteration.append(jump); 447 } 448 449 void addJumpsToNextInteration(JumpList jumps) 450 { 451 m_jumpsToNextInteration.append(jumps); 452 } 453 454 void addDataLabelToNextIteration(DataLabelPtr dataLabel) 455 { 456 m_dataPtrsToNextIteration.append(dataLabel); 457 } 458 459 void linkToNextIteration(Label label) 460 { 461 m_nextIteration = label; 462 463 for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i) 464 m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration)); 465 466 m_dataPtrsToNextIteration.clear(); 467 468 for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i) 469 m_parenTailsForIteration[i]->setNextIteration(m_nextIteration); 470 471 m_parenTailsForIteration.clear(); 472 } 473 474 void linkToNextIteration(YarrGenerator* generator) 475 { 476 m_jumpsToNextInteration.linkTo(m_nextIteration, generator); 477 } 478 479 int m_parenNestingLevel; 480 Vector<AlternativeBacktrackRecord> m_backtrackRecords; 481 IndirectJumpHashMap m_indirectJumpMap; 482 Label m_nextIteration; 483 Vector<OwnPtr<ParenthesesTail> > m_parenTails; 484 JumpList m_jumpsToNextInteration; 485 Vector<DataLabelPtr> m_dataPtrsToNextIteration; 486 Vector<ParenthesesTail*> m_parenTailsForIteration; 487 }; 488 489 struct BacktrackDestination { 490 typedef enum { 491 NoBacktrack, 492 BacktrackLabel, 493 BacktrackStackOffset, 494 BacktrackJumpList, 495 BacktrackLinked 496 } BacktrackType; 497 498 BacktrackDestination() 499 : m_backtrackType(NoBacktrack) 500 , m_backtrackToLabel(0) 501 , m_subDataLabelPtr(0) 502 , m_nextBacktrack(0) 503 , m_backtrackSourceLabel(0) 504 , m_backtrackSourceJumps(0) 505 { 506 } 507 508 BacktrackDestination(int32_t stackOffset) 509 : m_backtrackType(BacktrackStackOffset) 510 , m_backtrackStackOffset(stackOffset) 511 , m_backtrackToLabel(0) 512 , m_subDataLabelPtr(0) 513 , m_nextBacktrack(0) 514 , m_backtrackSourceLabel(0) 515 , m_backtrackSourceJumps(0) 516 { 517 } 518 519 BacktrackDestination(Label label) 520 : m_backtrackType(BacktrackLabel) 521 , m_backtrackLabel(label) 522 , m_backtrackToLabel(0) 523 , m_subDataLabelPtr(0) 524 , m_nextBacktrack(0) 525 , m_backtrackSourceLabel(0) 526 , m_backtrackSourceJumps(0) 527 { 528 } 529 530 void clear(bool doDataLabelClear = true) 531 { 532 m_backtrackType = NoBacktrack; 533 if (doDataLabelClear) 534 clearDataLabel(); 535 m_nextBacktrack = 0; 536 } 537 538 void clearDataLabel() 539 { 540 m_dataLabelPtr = DataLabelPtr(); 541 } 542 543 bool hasDestination() 544 { 545 return (m_backtrackType != NoBacktrack); 546 } 547 548 bool isStackOffset() 549 { 550 return (m_backtrackType == BacktrackStackOffset); 551 } 552 553 bool isLabel() 554 { 555 return (m_backtrackType == BacktrackLabel); 556 } 557 558 bool isJumpList() 559 { 560 return (m_backtrackType == BacktrackJumpList); 561 } 562 563 bool hasDataLabel() 564 { 565 return m_dataLabelPtr.isSet(); 566 } 567 568 void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true) 569 { 570 m_backtrackType = rhs.m_backtrackType; 571 if (m_backtrackType == BacktrackStackOffset) 572 m_backtrackStackOffset = rhs.m_backtrackStackOffset; 573 else if (m_backtrackType == BacktrackLabel) 574 m_backtrackLabel = rhs.m_backtrackLabel; 575 if (copyDataLabel) 576 m_dataLabelPtr = rhs.m_dataLabelPtr; 577 m_backtrackSourceJumps = rhs.m_backtrackSourceJumps; 578 m_backtrackSourceLabel = rhs.m_backtrackSourceLabel; 579 } 580 581 void copyTo(BacktrackDestination& lhs) 582 { 583 lhs.m_backtrackType = m_backtrackType; 584 if (m_backtrackType == BacktrackStackOffset) 585 lhs.m_backtrackStackOffset = m_backtrackStackOffset; 586 else if (m_backtrackType == BacktrackLabel) 587 lhs.m_backtrackLabel = m_backtrackLabel; 588 lhs.m_backtrackSourceJumps = m_backtrackSourceJumps; 589 lhs.m_backtrackSourceLabel = m_backtrackSourceLabel; 590 lhs.m_dataLabelPtr = m_dataLabelPtr; 591 lhs.m_backTrackJumps = m_backTrackJumps; 592 } 593 594 void addBacktrackJump(Jump jump) 595 { 596 m_backTrackJumps.append(jump); 597 } 598 599 void setStackOffset(int32_t stackOffset) 600 { 601 m_backtrackType = BacktrackStackOffset; 602 m_backtrackStackOffset = stackOffset; 603 } 604 605 void setLabel(Label label) 606 { 607 m_backtrackType = BacktrackLabel; 608 m_backtrackLabel = label; 609 } 610 611 void setNextBacktrackLabel(Label label) 612 { 613 if (m_nextBacktrack) 614 m_nextBacktrack->setLabel(label); 615 } 616 617 void propagateBacktrackToLabel(const BacktrackDestination& rhs) 618 { 619 if (!m_backtrackToLabel && rhs.m_backtrackToLabel) 620 m_backtrackToLabel = rhs.m_backtrackToLabel; 621 } 622 623 void setBacktrackToLabel(Label* backtrackToLabel) 624 { 625 if (!m_backtrackToLabel) 626 m_backtrackToLabel = backtrackToLabel; 627 } 628 629 bool hasBacktrackToLabel() 630 { 631 return m_backtrackToLabel; 632 } 633 634 void setBacktrackJumpList(JumpList* jumpList) 635 { 636 m_backtrackType = BacktrackJumpList; 637 m_backtrackSourceJumps = jumpList; 638 } 639 640 void setBacktrackSourceLabel(Label* backtrackSourceLabel) 641 { 642 m_backtrackSourceLabel = backtrackSourceLabel; 643 } 644 645 void setDataLabel(DataLabelPtr dp) 646 { 647 if (m_subDataLabelPtr) { 648 *m_subDataLabelPtr = dp; 649 m_subDataLabelPtr = 0; 650 } else { 651 ASSERT(!hasDataLabel()); 652 m_dataLabelPtr = dp; 653 } 654 } 655 656 void clearSubDataLabelPtr() 657 { 658 m_subDataLabelPtr = 0; 659 } 660 661 void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr) 662 { 663 m_subDataLabelPtr = subDataLabelPtr; 664 } 665 666 void linkToNextBacktrack(BacktrackDestination* nextBacktrack) 667 { 668 m_nextBacktrack = nextBacktrack; 669 } 670 671 int32_t getStackOffset() 672 { 673 ASSERT(m_backtrackType == BacktrackStackOffset); 674 return m_backtrackStackOffset; 675 } 676 677 Label getLabel() 678 { 679 ASSERT(m_backtrackType == BacktrackLabel); 680 return m_backtrackLabel; 681 } 682 683 JumpList& getBacktrackJumps() 684 { 685 return m_backTrackJumps; 686 } 687 688 DataLabelPtr& getDataLabel() 689 { 690 return m_dataLabelPtr; 691 } 692 693 void jumpToBacktrack(MacroAssembler* masm) 694 { 695 if (isJumpList()) { 696 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 697 masm->jump().linkTo(*m_backtrackSourceLabel, masm); 698 else 699 m_backtrackSourceJumps->append(masm->jump()); 700 } else if (isStackOffset()) 701 masm->jump(Address(stackPointerRegister, m_backtrackStackOffset)); 702 else if (isLabel()) 703 masm->jump().linkTo(m_backtrackLabel, masm); 704 else 705 m_backTrackJumps.append(masm->jump()); 706 } 707 708 void jumpToBacktrack(YarrGenerator* generator, Jump jump) 709 { 710 if (isJumpList()) { 711 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 712 jump.linkTo(*m_backtrackSourceLabel, generator); 713 else 714 m_backtrackSourceJumps->append(jump); 715 } else if (isStackOffset()) 716 generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump); 717 else if (isLabel()) 718 jump.linkTo(getLabel(), generator); 719 else 720 m_backTrackJumps.append(jump); 721 } 722 723 void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) 724 { 725 if (isJumpList()) { 726 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 727 jumps.linkTo(*m_backtrackSourceLabel, generator); 728 else 729 m_backtrackSourceJumps->append(jumps); 730 } else if (isStackOffset()) 731 generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps); 732 else if (isLabel()) 733 jumps.linkTo(getLabel(), generator); 734 else 735 m_backTrackJumps.append(jumps); 736 } 737 738 bool plantJumpToBacktrackIfExists(YarrGenerator* generator) 739 { 740 if (isJumpList()) { 741 if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet())) 742 generator->jump(*m_backtrackSourceLabel); 743 else 744 m_backtrackSourceJumps->append(generator->jump()); 745 746 return true; 747 } 748 749 if (isStackOffset()) { 750 generator->jump(Address(stackPointerRegister, getStackOffset())); 751 return true; 752 } 753 754 if (isLabel()) { 755 generator->jump(getLabel()); 756 if (hasDataLabel()) { 757 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel())); 758 clearDataLabel(); 759 } 760 return true; 761 } 762 763 return false; 764 } 765 766 void linkBacktrackToLabel(Label backtrackLabel) 767 { 768 if (m_backtrackToLabel) 769 *m_backtrackToLabel = backtrackLabel; 770 } 771 772 void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) 773 { 774 Label hereLabel = generator->label(); 775 776 if (m_backtrackToLabel) { 777 *m_backtrackToLabel = hereLabel; 778 m_backtrackToLabel = 0; 779 } 780 781 m_backTrackJumps.link(generator); 782 783 if (nextIteration) 784 generator->m_expressionState.linkToNextIteration(hereLabel); 785 786 if (hasDataLabel()) { 787 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel)); 788 // data label cleared as a result of the clear() below 789 } 790 791 clear(); 792 } 793 794 void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) 795 { 796 m_backTrackJumps.linkTo(label, generator); 797 798 if (nextIteration) 799 generator->m_expressionState.linkToNextIteration(label); 800 801 if (hasDataLabel()) { 802 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label)); 803 clearDataLabel(); 804 } 805 } 806 807 private: 808 BacktrackType m_backtrackType; 809 int32_t m_backtrackStackOffset; 810 Label m_backtrackLabel; 811 DataLabelPtr m_dataLabelPtr; 812 Label* m_backtrackToLabel; 813 DataLabelPtr* m_subDataLabelPtr; 814 BacktrackDestination* m_nextBacktrack; 815 Label* m_backtrackSourceLabel; 816 JumpList* m_backtrackSourceJumps; 817 JumpList m_backTrackJumps; 818 }; 819 820 struct TermGenerationState { 821 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal) 822 : disjunction(disjunction) 823 , checkedTotal(checkedTotal) 824 , m_subParenNum(0) 825 , m_linkedBacktrack(0) 826 , m_jumpList(0) 827 { 828 } 829 830 void resetAlternative() 831 { 832 m_backtrack.clear(); 833 alt = 0; 834 } 835 bool alternativeValid() 836 { 837 return alt < disjunction->m_alternatives.size(); 838 } 839 void nextAlternative() 840 { 841 ++alt; 842 } 843 PatternAlternative* alternative() 844 { 845 return disjunction->m_alternatives[alt]; 846 } 847 bool isLastAlternative() 848 { 849 return (alt + 1) == disjunction->m_alternatives.size(); 850 } 851 852 void resetTerm() 853 { 854 ASSERT(alternativeValid()); 855 t = 0; 856 m_subParenNum = 0; 857 } 858 bool termValid() 859 { 860 ASSERT(alternativeValid()); 861 return t < alternative()->m_terms.size(); 862 } 863 void nextTerm() 864 { 865 ASSERT(alternativeValid()); 866 ++t; 867 } 868 PatternTerm& term() 869 { 870 ASSERT(alternativeValid()); 871 return alternative()->m_terms[t]; 872 } 873 bool isLastTerm() 874 { 875 ASSERT(alternativeValid()); 876 return (t + 1) == alternative()->m_terms.size(); 877 } 878 unsigned getSubParenNum() 879 { 880 return m_subParenNum++; 881 } 882 bool isMainDisjunction() 883 { 884 return !disjunction->m_parent; 885 } 886 887 void setJumpListToPriorParen(JumpList* jumpList) 888 { 889 m_jumpList = jumpList; 890 } 891 892 JumpList* getJumpListToPriorParen() 893 { 894 return m_jumpList; 895 } 896 897 PatternTerm& lookaheadTerm() 898 { 899 ASSERT(alternativeValid()); 900 ASSERT((t + 1) < alternative()->m_terms.size()); 901 return alternative()->m_terms[t + 1]; 902 } 903 bool isSinglePatternCharacterLookaheadTerm() 904 { 905 ASSERT(alternativeValid()); 906 return ((t + 1) < alternative()->m_terms.size()) 907 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter) 908 && (lookaheadTerm().quantityType == QuantifierFixedCount) 909 && (lookaheadTerm().quantityCount == 1); 910 } 911 912 int inputOffset() 913 { 914 return term().inputPosition - checkedTotal; 915 } 916 917 void clearBacktrack() 918 { 919 m_backtrack.clear(false); 920 m_linkedBacktrack = 0; 921 } 922 923 void jumpToBacktrack(MacroAssembler* masm) 924 { 925 m_backtrack.jumpToBacktrack(masm); 926 } 927 928 void jumpToBacktrack(YarrGenerator* generator, Jump jump) 929 { 930 m_backtrack.jumpToBacktrack(generator, jump); 931 } 932 933 void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps) 934 { 935 m_backtrack.jumpToBacktrack(generator, jumps); 936 } 937 938 bool plantJumpToBacktrackIfExists(YarrGenerator* generator) 939 { 940 return m_backtrack.plantJumpToBacktrackIfExists(generator); 941 } 942 943 void linkDataLabelToBacktrackIfExists(YarrGenerator* generator, DataLabelPtr dataLabel) 944 { 945 // If we have a stack offset backtrack destination, use it directly 946 if (m_backtrack.isStackOffset()) { 947 generator->m_expressionState.addIndirectJumpEntry(m_backtrack.getStackOffset(), dataLabel); 948 m_backtrack.clearSubDataLabelPtr(); 949 } else { 950 // If we have a backtrack label, connect the datalabel to it directly. 951 if (m_backtrack.isLabel()) 952 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, m_backtrack.getLabel())); 953 else 954 setBacktrackDataLabel(dataLabel); 955 } 956 } 957 958 void addBacktrackJump(Jump jump) 959 { 960 m_backtrack.addBacktrackJump(jump); 961 } 962 963 void setBacktrackDataLabel(DataLabelPtr dp) 964 { 965 m_backtrack.setDataLabel(dp); 966 } 967 968 void setBackTrackStackOffset(int32_t stackOffset) 969 { 970 m_backtrack.setStackOffset(stackOffset); 971 } 972 973 void setBacktrackLabel(Label label) 974 { 975 m_backtrack.setLabel(label); 976 } 977 978 void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false) 979 { 980 m_backtrack.linkAlternativeBacktracks(generator, nextIteration); 981 m_linkedBacktrack = 0; 982 } 983 984 void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false) 985 { 986 m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration); 987 } 988 989 void setBacktrackLink(BacktrackDestination* linkedBacktrack) 990 { 991 m_linkedBacktrack = linkedBacktrack; 992 } 993 994 void chainBacktracks(BacktrackDestination* followonBacktrack) 995 { 996 if (m_linkedBacktrack) 997 m_linkedBacktrack->linkToNextBacktrack(followonBacktrack); 998 } 999 1000 BacktrackDestination& getBacktrackDestination() 1001 { 1002 return m_backtrack; 1003 } 1004 1005 void propagateBacktrackingFrom(YarrGenerator* generator, BacktrackDestination& backtrack, bool doJump = true) 1006 { 1007 if (doJump) 1008 m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps()); 1009 1010 if (m_backtrack.isLabel() && backtrack.hasBacktrackToLabel()) 1011 backtrack.linkBacktrackToLabel(m_backtrack.getLabel()); 1012 1013 if (backtrack.hasDestination()) { 1014 if (m_backtrack.hasDataLabel()) 1015 generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel()); 1016 1017 m_backtrack.copyTarget(backtrack, doJump); 1018 } 1019 } 1020 1021 PatternDisjunction* disjunction; 1022 int checkedTotal; 1023 private: 1024 unsigned alt; 1025 unsigned t; 1026 unsigned m_subParenNum; 1027 BacktrackDestination m_backtrack; 1028 BacktrackDestination* m_linkedBacktrack; 1029 JumpList* m_jumpList; 1030 }; 1031 1032 struct ParenthesesTail { 1033 ParenthesesTail(PatternTerm& term, int nestingLevel, JumpList* jumpListToPriorParen) 1034 : m_term(term) 1035 , m_nestingLevel(nestingLevel) 1036 , m_subParenIndex(0) 1037 , m_jumpListToPriorParen(jumpListToPriorParen) 1038 { 1039 } 1040 1041 void processBacktracks(YarrGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough) 1042 { 1043 m_nonGreedyTryParentheses = nonGreedyTryParentheses; 1044 m_fallThrough = fallThrough; 1045 1046 m_subParenIndex = state.getSubParenNum(); 1047 parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack); 1048 state.chainBacktracks(&m_backtrack); 1049 BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); 1050 stateBacktrack.copyTo(m_backtrack); 1051 stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel); 1052 state.setBacktrackLink(&m_backtrack); 1053 stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr); 1054 1055 m_doDirectBacktrack = m_parenBacktrack.hasDestination(); 1056 1057 if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy)) 1058 m_doDirectBacktrack = false; 1059 1060 if (m_doDirectBacktrack) 1061 state.propagateBacktrackingFrom(generator, m_parenBacktrack, false); 1062 else { 1063 stateBacktrack.setBacktrackJumpList(&m_afterBacktrackJumps); 1064 stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens); 1065 } 1066 } 1067 1068 void setNextIteration(Label nextIteration) 1069 { 1070 if (!m_nestingLevel && !m_backtrackToLabel.isSet()) 1071 m_backtrackToLabel = nextIteration; 1072 } 1073 1074 void addAfterParenJump(Jump jump) 1075 { 1076 m_afterBacktrackJumps.append(jump); 1077 } 1078 1079 bool generateCode(YarrGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough) 1080 { 1081 const RegisterID indexTemporary = regT0; 1082 unsigned parenthesesFrameLocation = m_term.frameLocation; 1083 Jump fromPriorBacktrack; 1084 bool needJumpForPriorParenTail = false; 1085 1086 if (priorBackTrackFallThrough 1087 && ((m_term.quantityType == QuantifierGreedy) 1088 || (m_term.quantityType == QuantifierNonGreedy) 1089 || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) { 1090 // If the prior paren tail code assumed that it could fall through, 1091 // but we need to generate after paren backtrack code, then provide 1092 // a jump around that code for the prior paren tail code. 1093 // A regular expressing like ((xxx)...)? needs this. 1094 fromPriorBacktrack = generator->jump(); 1095 needJumpForPriorParenTail = true; 1096 } 1097 1098 if (!m_backtrack.hasDestination()) { 1099 if (m_backtrackToLabel.isSet()) { 1100 m_backtrack.setLabel(m_backtrackToLabel); 1101 nextBacktrackFallThrough = false; 1102 } else if (m_jumpListToPriorParen) { 1103 // If we don't have a destination, go back to either the prior paren or the next outer paren. 1104 m_backtrack.setBacktrackJumpList(m_jumpListToPriorParen); 1105 nextBacktrackFallThrough = false; 1106 } else 1107 m_backtrack.setBacktrackJumpList(&jumpsToNext); 1108 } else 1109 nextBacktrackFallThrough = false; 1110 1111 // A failure AFTER the parens jumps here - Backtrack to this paren 1112 m_backtrackFromAfterParens = generator->label(); 1113 1114 if (m_dataAfterLabelPtr.isSet()) 1115 generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens)); 1116 1117 m_afterBacktrackJumps.link(generator); 1118 1119 if (m_term.quantityType == QuantifierGreedy) { 1120 // If this is -1 we have now tested with both with and without the parens. 1121 generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); 1122 m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, TrustedImm32(-1))); 1123 } else if (m_term.quantityType == QuantifierNonGreedy) { 1124 // If this is -1 we have now tested with both with and without the parens. 1125 generator->loadFromFrame(parenthesesFrameLocation, indexTemporary); 1126 generator->branch32(Equal, indexTemporary, TrustedImm32(-1)).linkTo(m_nonGreedyTryParentheses, generator); 1127 } 1128 1129 if (!m_doDirectBacktrack) 1130 m_parenBacktrack.plantJumpToBacktrackIfExists(generator); 1131 1132 // A failure WITHIN the parens jumps here 1133 if (needJumpForPriorParenTail) 1134 fromPriorBacktrack.link(generator); 1135 m_parenBacktrack.linkAlternativeBacktracks(generator); 1136 m_withinBacktrackJumps.link(generator); 1137 1138 if (m_term.capture()) 1139 generator->store32(TrustedImm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int))); 1140 1141 if (m_term.quantityType == QuantifierGreedy) { 1142 generator->storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1143 generator->jump().linkTo(m_fallThrough, generator); 1144 nextBacktrackFallThrough = false; 1145 } else if (!nextBacktrackFallThrough) 1146 m_backtrack.jumpToBacktrack(generator); 1147 1148 if (!m_doDirectBacktrack) 1149 m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens); 1150 1151 return nextBacktrackFallThrough; 1152 } 1153 1154 PatternTerm& m_term; 1155 int m_nestingLevel; 1156 unsigned m_subParenIndex; 1157 JumpList* m_jumpListToPriorParen; 1158 Label m_nonGreedyTryParentheses; 1159 Label m_fallThrough; 1160 Label m_backtrackToLabel; 1161 Label m_backtrackFromAfterParens; 1162 DataLabelPtr m_dataAfterLabelPtr; 1163 JumpList m_withinBacktrackJumps; 1164 JumpList m_afterBacktrackJumps; 1165 BacktrackDestination m_parenBacktrack; 1166 BacktrackDestination m_backtrack; 1167 bool m_doDirectBacktrack; 1168 }; 1169 1170 void generateAssertionBOL(TermGenerationState& state) 1171 { 1172 PatternTerm& term = state.term(); 1173 1174 if (m_pattern.m_multiline) { 1175 const RegisterID character = regT0; 1176 1177 JumpList matchDest; 1178 if (!term.inputPosition) 1179 matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal))); 1180 1181 readCharacter(state.inputOffset() - 1, character); 1182 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 1183 state.jumpToBacktrack(this); 1184 1185 matchDest.link(this); 1186 } else { 1187 // Erk, really should poison out these alternatives early. :-/ 1188 if (term.inputPosition) 1189 state.jumpToBacktrack(this); 1190 else 1191 state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal))); 1192 } 1193 } 1194 1195 void generateAssertionEOL(TermGenerationState& state) 1196 { 1197 PatternTerm& term = state.term(); 1198 1199 if (m_pattern.m_multiline) { 1200 const RegisterID character = regT0; 1201 1202 JumpList matchDest; 1203 if (term.inputPosition == state.checkedTotal) 1204 matchDest.append(atEndOfInput()); 1205 1206 readCharacter(state.inputOffset(), character); 1207 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); 1208 state.jumpToBacktrack(this); 1209 1210 matchDest.link(this); 1211 } else { 1212 if (term.inputPosition == state.checkedTotal) 1213 state.jumpToBacktrack(this, notAtEndOfInput()); 1214 // Erk, really should poison out these alternatives early. :-/ 1215 else 1216 state.jumpToBacktrack(this); 1217 } 1218 } 1219 1220 // Also falls though on nextIsNotWordChar. 1221 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) 1222 { 1223 const RegisterID character = regT0; 1224 PatternTerm& term = state.term(); 1225 1226 if (term.inputPosition == state.checkedTotal) 1227 nextIsNotWordChar.append(atEndOfInput()); 1228 1229 readCharacter(state.inputOffset(), character); 1230 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); 1231 } 1232 1233 void generateAssertionWordBoundary(TermGenerationState& state) 1234 { 1235 const RegisterID character = regT0; 1236 PatternTerm& term = state.term(); 1237 1238 Jump atBegin; 1239 JumpList matchDest; 1240 if (!term.inputPosition) 1241 atBegin = branch32(Equal, index, Imm32(state.checkedTotal)); 1242 readCharacter(state.inputOffset() - 1, character); 1243 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); 1244 if (!term.inputPosition) 1245 atBegin.link(this); 1246 1247 // We fall through to here if the last character was not a wordchar. 1248 JumpList nonWordCharThenWordChar; 1249 JumpList nonWordCharThenNonWordChar; 1250 if (term.invert()) { 1251 matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar); 1252 nonWordCharThenWordChar.append(jump()); 1253 } else { 1254 matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar); 1255 nonWordCharThenNonWordChar.append(jump()); 1256 } 1257 state.jumpToBacktrack(this, nonWordCharThenNonWordChar); 1258 1259 // We jump here if the last character was a wordchar. 1260 matchDest.link(this); 1261 JumpList wordCharThenWordChar; 1262 JumpList wordCharThenNonWordChar; 1263 if (term.invert()) { 1264 matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar); 1265 wordCharThenWordChar.append(jump()); 1266 } else { 1267 matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar); 1268 // This can fall-though! 1269 } 1270 1271 state.jumpToBacktrack(this, wordCharThenWordChar); 1272 1273 nonWordCharThenWordChar.link(this); 1274 wordCharThenNonWordChar.link(this); 1275 } 1276 1277 void generatePatternCharacterSingle(TermGenerationState& state) 1278 { 1279 const RegisterID character = regT0; 1280 UChar ch = state.term().patternCharacter; 1281 1282 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1283 readCharacter(state.inputOffset(), character); 1284 or32(TrustedImm32(32), character); 1285 state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 1286 } else { 1287 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1288 state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset())); 1289 } 1290 } 1291 1292 void generatePatternCharacterPair(TermGenerationState& state) 1293 { 1294 const RegisterID character = regT0; 1295 UChar ch1 = state.term().patternCharacter; 1296 UChar ch2 = state.lookaheadTerm().patternCharacter; 1297 1298 int mask = 0; 1299 int chPair = ch1 | (ch2 << 16); 1300 1301 if (m_pattern.m_ignoreCase) { 1302 if (isASCIIAlpha(ch1)) 1303 mask |= 32; 1304 if (isASCIIAlpha(ch2)) 1305 mask |= 32 << 16; 1306 } 1307 1308 if (mask) { 1309 load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character); 1310 or32(Imm32(mask), character); 1311 state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask))); 1312 } else 1313 state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair))); 1314 } 1315 1316 void generatePatternCharacterFixed(TermGenerationState& state) 1317 { 1318 const RegisterID character = regT0; 1319 const RegisterID countRegister = regT1; 1320 PatternTerm& term = state.term(); 1321 UChar ch = term.patternCharacter; 1322 1323 move(index, countRegister); 1324 sub32(Imm32(term.quantityCount), countRegister); 1325 1326 Label loop(this); 1327 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1328 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); 1329 or32(TrustedImm32(32), character); 1330 state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 1331 } else { 1332 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1333 state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch))); 1334 } 1335 add32(TrustedImm32(1), countRegister); 1336 branch32(NotEqual, countRegister, index).linkTo(loop, this); 1337 } 1338 1339 void generatePatternCharacterGreedy(TermGenerationState& state) 1340 { 1341 const RegisterID character = regT0; 1342 const RegisterID countRegister = regT1; 1343 PatternTerm& term = state.term(); 1344 UChar ch = term.patternCharacter; 1345 1346 move(TrustedImm32(0), countRegister); 1347 1348 JumpList failures; 1349 Label loop(this); 1350 failures.append(atEndOfInput()); 1351 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1352 readCharacter(state.inputOffset(), character); 1353 or32(TrustedImm32(32), character); 1354 failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch)))); 1355 } else { 1356 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1357 failures.append(jumpIfCharNotEquals(ch, state.inputOffset())); 1358 } 1359 1360 add32(TrustedImm32(1), countRegister); 1361 add32(TrustedImm32(1), index); 1362 if (term.quantityCount != quantifyInfinite) { 1363 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); 1364 failures.append(jump()); 1365 } else 1366 jump(loop); 1367 1368 Label backtrackBegin(this); 1369 loadFromFrame(term.frameLocation, countRegister); 1370 state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); 1371 sub32(TrustedImm32(1), countRegister); 1372 sub32(TrustedImm32(1), index); 1373 1374 failures.link(this); 1375 1376 storeToFrame(countRegister, term.frameLocation); 1377 1378 state.setBacktrackLabel(backtrackBegin); 1379 } 1380 1381 void generatePatternCharacterNonGreedy(TermGenerationState& state) 1382 { 1383 const RegisterID character = regT0; 1384 const RegisterID countRegister = regT1; 1385 PatternTerm& term = state.term(); 1386 UChar ch = term.patternCharacter; 1387 1388 move(TrustedImm32(0), countRegister); 1389 1390 Jump firstTimeDoNothing = jump(); 1391 1392 Label hardFail(this); 1393 sub32(countRegister, index); 1394 state.jumpToBacktrack(this); 1395 1396 Label backtrackBegin(this); 1397 loadFromFrame(term.frameLocation, countRegister); 1398 1399 atEndOfInput().linkTo(hardFail, this); 1400 if (term.quantityCount != quantifyInfinite) 1401 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); 1402 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { 1403 readCharacter(state.inputOffset(), character); 1404 or32(TrustedImm32(32), character); 1405 branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this); 1406 } else { 1407 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch))); 1408 jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this); 1409 } 1410 1411 add32(TrustedImm32(1), countRegister); 1412 add32(TrustedImm32(1), index); 1413 1414 firstTimeDoNothing.link(this); 1415 storeToFrame(countRegister, term.frameLocation); 1416 1417 state.setBacktrackLabel(backtrackBegin); 1418 } 1419 1420 void generateCharacterClassSingle(TermGenerationState& state) 1421 { 1422 const RegisterID character = regT0; 1423 PatternTerm& term = state.term(); 1424 1425 JumpList matchDest; 1426 readCharacter(state.inputOffset(), character); 1427 matchCharacterClass(character, matchDest, term.characterClass); 1428 1429 if (term.invert()) 1430 state.jumpToBacktrack(this, matchDest); 1431 else { 1432 state.jumpToBacktrack(this); 1433 matchDest.link(this); 1434 } 1435 } 1436 1437 void generateCharacterClassFixed(TermGenerationState& state) 1438 { 1439 const RegisterID character = regT0; 1440 const RegisterID countRegister = regT1; 1441 PatternTerm& term = state.term(); 1442 1443 move(index, countRegister); 1444 sub32(Imm32(term.quantityCount), countRegister); 1445 1446 Label loop(this); 1447 JumpList matchDest; 1448 load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character); 1449 matchCharacterClass(character, matchDest, term.characterClass); 1450 1451 if (term.invert()) 1452 state.jumpToBacktrack(this, matchDest); 1453 else { 1454 state.jumpToBacktrack(this); 1455 matchDest.link(this); 1456 } 1457 1458 add32(TrustedImm32(1), countRegister); 1459 branch32(NotEqual, countRegister, index).linkTo(loop, this); 1460 } 1461 1462 void generateCharacterClassGreedy(TermGenerationState& state) 1463 { 1464 const RegisterID character = regT0; 1465 const RegisterID countRegister = regT1; 1466 PatternTerm& term = state.term(); 1467 1468 move(TrustedImm32(0), countRegister); 1469 1470 JumpList failures; 1471 Label loop(this); 1472 failures.append(atEndOfInput()); 1473 1474 if (term.invert()) { 1475 readCharacter(state.inputOffset(), character); 1476 matchCharacterClass(character, failures, term.characterClass); 1477 } else { 1478 JumpList matchDest; 1479 readCharacter(state.inputOffset(), character); 1480 matchCharacterClass(character, matchDest, term.characterClass); 1481 failures.append(jump()); 1482 matchDest.link(this); 1483 } 1484 1485 add32(TrustedImm32(1), countRegister); 1486 add32(TrustedImm32(1), index); 1487 if (term.quantityCount != quantifyInfinite) { 1488 branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this); 1489 failures.append(jump()); 1490 } else 1491 jump(loop); 1492 1493 Label backtrackBegin(this); 1494 loadFromFrame(term.frameLocation, countRegister); 1495 state.jumpToBacktrack(this, branchTest32(Zero, countRegister)); 1496 sub32(TrustedImm32(1), countRegister); 1497 sub32(TrustedImm32(1), index); 1498 1499 failures.link(this); 1500 1501 storeToFrame(countRegister, term.frameLocation); 1502 1503 state.setBacktrackLabel(backtrackBegin); 1504 } 1505 1506 void generateCharacterClassNonGreedy(TermGenerationState& state) 1507 { 1508 const RegisterID character = regT0; 1509 const RegisterID countRegister = regT1; 1510 PatternTerm& term = state.term(); 1511 1512 move(TrustedImm32(0), countRegister); 1513 1514 Jump firstTimeDoNothing = jump(); 1515 1516 Label hardFail(this); 1517 sub32(countRegister, index); 1518 state.jumpToBacktrack(this); 1519 1520 Label backtrackBegin(this); 1521 loadFromFrame(term.frameLocation, countRegister); 1522 1523 atEndOfInput().linkTo(hardFail, this); 1524 branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail); 1525 1526 JumpList matchDest; 1527 readCharacter(state.inputOffset(), character); 1528 matchCharacterClass(character, matchDest, term.characterClass); 1529 1530 if (term.invert()) 1531 matchDest.linkTo(hardFail, this); 1532 else { 1533 jump(hardFail); 1534 matchDest.link(this); 1535 } 1536 1537 add32(TrustedImm32(1), countRegister); 1538 add32(TrustedImm32(1), index); 1539 1540 firstTimeDoNothing.link(this); 1541 storeToFrame(countRegister, term.frameLocation); 1542 1543 state.setBacktrackLabel(backtrackBegin); 1544 } 1545 1546 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation) 1547 { 1548 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion)); 1549 ASSERT(parenthesesTerm.quantityCount == 1); 1550 1551 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; 1552 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0; 1553 1554 if (disjunction->m_alternatives.size() == 1) { 1555 state.resetAlternative(); 1556 ASSERT(state.alternativeValid()); 1557 PatternAlternative* alternative = state.alternative(); 1558 optimizeAlternative(alternative); 1559 1560 int countToCheck = alternative->m_minimumSize - preCheckedCount; 1561 if (countToCheck) { 1562 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount)); 1563 1564 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists' 1565 // will be forced to always trampoline into here, just to decrement the index. 1566 // Ick. 1567 Jump skip = jump(); 1568 1569 Label backtrackBegin(this); 1570 sub32(Imm32(countToCheck), index); 1571 state.addBacktrackJump(jump()); 1572 1573 skip.link(this); 1574 1575 state.setBacktrackLabel(backtrackBegin); 1576 1577 state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck)); 1578 state.checkedTotal += countToCheck; 1579 } 1580 1581 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1582 generateTerm(state); 1583 1584 state.checkedTotal -= countToCheck; 1585 } else { 1586 JumpList successes; 1587 bool propogateBacktrack = false; 1588 1589 // Save current state's paren jump list for use with each alternative 1590 JumpList* outerJumpList = state.getJumpListToPriorParen(); 1591 1592 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative(), state.setJumpListToPriorParen(outerJumpList)) { 1593 PatternAlternative* alternative = state.alternative(); 1594 optimizeAlternative(alternative); 1595 1596 ASSERT(alternative->m_minimumSize >= preCheckedCount); 1597 int countToCheck = alternative->m_minimumSize - preCheckedCount; 1598 if (countToCheck) { 1599 state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); 1600 state.checkedTotal += countToCheck; 1601 } 1602 1603 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1604 generateTerm(state); 1605 1606 // Matched an alternative. 1607 DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation); 1608 1609 if (!state.isLastAlternative() || countToCheck) 1610 successes.append(jump()); 1611 1612 // Alternative did not match. 1613 1614 // Do we have a backtrack destination? 1615 // if so, link the data label to it. 1616 state.linkDataLabelToBacktrackIfExists(this, dataLabel); 1617 1618 if (!state.isLastAlternative() || countToCheck) 1619 state.linkAlternativeBacktracks(this); 1620 1621 if (countToCheck) { 1622 sub32(Imm32(countToCheck), index); 1623 state.checkedTotal -= countToCheck; 1624 } else if (state.isLastAlternative()) 1625 propogateBacktrack = true; 1626 } 1627 // We fall through to here when the last alternative fails. 1628 // Add a backtrack out of here for the parenthese handling code to link up. 1629 if (!propogateBacktrack) 1630 state.addBacktrackJump(jump()); 1631 1632 // Save address on stack for the parens code to backtrack to, to retry the 1633 // next alternative. 1634 state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*)); 1635 1636 successes.link(this); 1637 } 1638 } 1639 1640 void generateParenthesesSingle(TermGenerationState& state) 1641 { 1642 const RegisterID indexTemporary = regT0; 1643 PatternTerm& term = state.term(); 1644 PatternDisjunction* disjunction = term.parentheses.disjunction; 1645 ASSERT(term.quantityCount == 1); 1646 1647 unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0; 1648 1649 unsigned parenthesesFrameLocation = term.frameLocation; 1650 unsigned alternativeFrameLocation = parenthesesFrameLocation; 1651 if (term.quantityType != QuantifierFixedCount) 1652 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1653 1654 // optimized case - no capture & no quantifier can be handled in a light-weight manner. 1655 if (!term.capture() && (term.quantityType == QuantifierFixedCount)) { 1656 m_expressionState.incrementParenNestingLevel(); 1657 1658 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1659 1660 // Use the current state's jump list for the nested parentheses. 1661 parenthesesState.setJumpListToPriorParen(state.getJumpListToPriorParen()); 1662 1663 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1664 // this expects that any backtracks back out of the parentheses will be in the 1665 // parenthesesState's m_backTrackJumps vector, and that if they need backtracking 1666 // they will have set an entry point on the parenthesesState's m_backtrackLabel. 1667 BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination(); 1668 BacktrackDestination& stateBacktrack = state.getBacktrackDestination(); 1669 1670 state.propagateBacktrackingFrom(this, parenthesesBacktrack); 1671 stateBacktrack.propagateBacktrackToLabel(parenthesesBacktrack); 1672 1673 state.setJumpListToPriorParen(parenthesesState.getJumpListToPriorParen()); 1674 1675 m_expressionState.decrementParenNestingLevel(); 1676 } else { 1677 Jump nonGreedySkipParentheses; 1678 Label nonGreedyTryParentheses; 1679 if (term.quantityType == QuantifierGreedy) 1680 storeToFrame(index, parenthesesFrameLocation); 1681 else if (term.quantityType == QuantifierNonGreedy) { 1682 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); 1683 nonGreedySkipParentheses = jump(); 1684 nonGreedyTryParentheses = label(); 1685 storeToFrame(index, parenthesesFrameLocation); 1686 } 1687 1688 // store the match start index 1689 if (term.capture()) { 1690 int inputOffset = state.inputOffset() - preCheckedCount; 1691 if (inputOffset) { 1692 move(index, indexTemporary); 1693 add32(Imm32(inputOffset), indexTemporary); 1694 store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 1695 } else 1696 store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int))); 1697 } 1698 1699 ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getJumpListToPriorParen()); 1700 1701 m_expressionState.incrementParenNestingLevel(); 1702 1703 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1704 1705 // Save the parenthesesTail for backtracking from nested parens to this one. 1706 parenthesesState.setJumpListToPriorParen(&parenthesesTail->m_withinBacktrackJumps); 1707 1708 // generate the body of the parentheses 1709 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1710 1711 // For non-fixed counts, backtrack if we didn't match anything. 1712 if (term.quantityType != QuantifierFixedCount) 1713 parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*))))); 1714 1715 // store the match end index 1716 if (term.capture()) { 1717 int inputOffset = state.inputOffset(); 1718 if (inputOffset) { 1719 move(index, indexTemporary); 1720 add32(Imm32(state.inputOffset()), indexTemporary); 1721 store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 1722 } else 1723 store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int))); 1724 } 1725 1726 m_expressionState.decrementParenNestingLevel(); 1727 1728 parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label()); 1729 1730 state.setJumpListToPriorParen(&parenthesesTail->m_afterBacktrackJumps); 1731 1732 parenthesesState.getBacktrackDestination().clear(); 1733 1734 if (term.quantityType == QuantifierNonGreedy) 1735 nonGreedySkipParentheses.link(this); 1736 } 1737 } 1738 1739 void generateParenthesesGreedyNoBacktrack(TermGenerationState& state) 1740 { 1741 PatternTerm& parenthesesTerm = state.term(); 1742 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction; 1743 ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern); 1744 ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle. 1745 1746 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1747 1748 Label matchAgain(this); 1749 1750 storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later. 1751 1752 for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) { 1753 1754 PatternAlternative* alternative = parenthesesState.alternative(); 1755 optimizeAlternative(alternative); 1756 1757 int countToCheck = alternative->m_minimumSize; 1758 if (countToCheck) { 1759 parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck)); 1760 parenthesesState.checkedTotal += countToCheck; 1761 } 1762 1763 for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm()) 1764 generateTerm(parenthesesState); 1765 1766 // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet. 1767 branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain); 1768 1769 // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack, 1770 // or fall through to try the next alternative if no backtrack is available. 1771 parenthesesState.plantJumpToBacktrackIfExists(this); 1772 1773 parenthesesState.linkAlternativeBacktracks(this); 1774 1775 // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop. 1776 1777 if (countToCheck) { 1778 sub32(Imm32(countToCheck), index); 1779 parenthesesState.checkedTotal -= countToCheck; 1780 } 1781 } 1782 1783 // If the last alternative falls through to here, we have a failed match... 1784 // Which means that we match whatever we have matched up to this point (even if nothing). 1785 } 1786 1787 void generateParentheticalAssertion(TermGenerationState& state) 1788 { 1789 PatternTerm& term = state.term(); 1790 PatternDisjunction* disjunction = term.parentheses.disjunction; 1791 ASSERT(term.quantityCount == 1); 1792 ASSERT(term.quantityType == QuantifierFixedCount); 1793 1794 unsigned parenthesesFrameLocation = term.frameLocation; 1795 unsigned alternativeFrameLocation = parenthesesFrameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; 1796 1797 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition; 1798 1799 if (term.invert()) { 1800 // Inverted case 1801 storeToFrame(index, parenthesesFrameLocation); 1802 1803 state.checkedTotal -= countCheckedAfterAssertion; 1804 if (countCheckedAfterAssertion) 1805 sub32(Imm32(countCheckedAfterAssertion), index); 1806 1807 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1808 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1809 // Success! - which means - Fail! 1810 loadFromFrame(parenthesesFrameLocation, index); 1811 state.jumpToBacktrack(this); 1812 1813 // And fail means success. 1814 parenthesesState.linkAlternativeBacktracks(this); 1815 1816 loadFromFrame(parenthesesFrameLocation, index); 1817 1818 state.checkedTotal += countCheckedAfterAssertion; 1819 } else { 1820 // Normal case 1821 storeToFrame(index, parenthesesFrameLocation); 1822 1823 state.checkedTotal -= countCheckedAfterAssertion; 1824 if (countCheckedAfterAssertion) 1825 sub32(Imm32(countCheckedAfterAssertion), index); 1826 1827 TermGenerationState parenthesesState(disjunction, state.checkedTotal); 1828 generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation); 1829 // Success! - which means - Success! 1830 loadFromFrame(parenthesesFrameLocation, index); 1831 Jump success = jump(); 1832 1833 parenthesesState.linkAlternativeBacktracks(this); 1834 1835 loadFromFrame(parenthesesFrameLocation, index); 1836 state.jumpToBacktrack(this); 1837 1838 success.link(this); 1839 1840 state.checkedTotal += countCheckedAfterAssertion; 1841 } 1842 } 1843 1844 void generateTerm(TermGenerationState& state) 1845 { 1846 PatternTerm& term = state.term(); 1847 1848 switch (term.type) { 1849 case PatternTerm::TypeAssertionBOL: 1850 generateAssertionBOL(state); 1851 break; 1852 1853 case PatternTerm::TypeAssertionEOL: 1854 generateAssertionEOL(state); 1855 break; 1856 1857 case PatternTerm::TypeAssertionWordBoundary: 1858 generateAssertionWordBoundary(state); 1859 break; 1860 1861 case PatternTerm::TypePatternCharacter: 1862 switch (term.quantityType) { 1863 case QuantifierFixedCount: 1864 if (term.quantityCount == 1) { 1865 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) { 1866 generatePatternCharacterPair(state); 1867 state.nextTerm(); 1868 } else 1869 generatePatternCharacterSingle(state); 1870 } else 1871 generatePatternCharacterFixed(state); 1872 break; 1873 case QuantifierGreedy: 1874 generatePatternCharacterGreedy(state); 1875 break; 1876 case QuantifierNonGreedy: 1877 generatePatternCharacterNonGreedy(state); 1878 break; 1879 } 1880 break; 1881 1882 case PatternTerm::TypeCharacterClass: 1883 switch (term.quantityType) { 1884 case QuantifierFixedCount: 1885 if (term.quantityCount == 1) 1886 generateCharacterClassSingle(state); 1887 else 1888 generateCharacterClassFixed(state); 1889 break; 1890 case QuantifierGreedy: 1891 generateCharacterClassGreedy(state); 1892 break; 1893 case QuantifierNonGreedy: 1894 generateCharacterClassNonGreedy(state); 1895 break; 1896 } 1897 break; 1898 1899 case PatternTerm::TypeBackReference: 1900 m_shouldFallBack = true; 1901 break; 1902 1903 case PatternTerm::TypeForwardReference: 1904 break; 1905 1906 case PatternTerm::TypeParenthesesSubpattern: 1907 if (term.quantityCount == 1 && !term.parentheses.isCopy) 1908 generateParenthesesSingle(state); 1909 else if (term.parentheses.isTerminal) 1910 generateParenthesesGreedyNoBacktrack(state); 1911 else 1912 m_shouldFallBack = true; 1913 break; 1914 1915 case PatternTerm::TypeParentheticalAssertion: 1916 generateParentheticalAssertion(state); 1917 break; 1918 } 1919 } 1920 1921 void generateDisjunction(PatternDisjunction* disjunction) 1922 { 1923 TermGenerationState state(disjunction, 0); 1924 state.resetAlternative(); 1925 1926 // check availability for the next alternative 1927 int countCheckedForCurrentAlternative = 0; 1928 int countToCheckForFirstAlternative = 0; 1929 bool hasShorterAlternatives = false; 1930 bool setRepeatAlternativeLabels = false; 1931 JumpList notEnoughInputForPreviousAlternative; 1932 Label firstAlternative; 1933 Label firstAlternativeInputChecked; 1934 1935 // The label 'firstAlternative' is used to plant a check to see if there is 1936 // sufficient input available to run the first repeating alternative. 1937 // The label 'firstAlternativeInputChecked' will jump directly to matching 1938 // the first repeating alternative having skipped this check. 1939 1940 if (state.alternativeValid()) { 1941 PatternAlternative* alternative = state.alternative(); 1942 if (!alternative->onceThrough()) { 1943 firstAlternative = Label(this); 1944 setRepeatAlternativeLabels = true; 1945 } 1946 countToCheckForFirstAlternative = alternative->m_minimumSize; 1947 state.checkedTotal += countToCheckForFirstAlternative; 1948 if (countToCheckForFirstAlternative) 1949 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); 1950 countCheckedForCurrentAlternative = countToCheckForFirstAlternative; 1951 } 1952 1953 if (setRepeatAlternativeLabels) 1954 firstAlternativeInputChecked = Label(this); 1955 1956 while (state.alternativeValid()) { 1957 PatternAlternative* alternative = state.alternative(); 1958 optimizeAlternative(alternative); 1959 1960 // Track whether any alternatives are shorter than the first one. 1961 if (!alternative->onceThrough()) 1962 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative); 1963 1964 for (state.resetTerm(); state.termValid(); state.nextTerm()) 1965 generateTerm(state); 1966 1967 // If we get here, the alternative matched. 1968 if (m_pattern.m_body->m_callFrameSize) 1969 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 1970 1971 ASSERT(index != returnRegister); 1972 if (m_pattern.m_body->m_hasFixedSize) { 1973 move(index, returnRegister); 1974 if (alternative->m_minimumSize) 1975 sub32(Imm32(alternative->m_minimumSize), returnRegister); 1976 1977 store32(returnRegister, output); 1978 } else 1979 load32(Address(output), returnRegister); 1980 1981 store32(index, Address(output, 4)); 1982 1983 generateReturn(); 1984 1985 state.nextAlternative(); 1986 if (alternative->onceThrough() && state.alternativeValid()) 1987 state.clearBacktrack(); 1988 1989 // if there are any more alternatives, plant the check for input before looping. 1990 if (state.alternativeValid()) { 1991 state.setJumpListToPriorParen(0); 1992 PatternAlternative* nextAlternative = state.alternative(); 1993 if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) { 1994 // We have handled non-repeating alternatives, jump to next iteration 1995 // and loop over repeating alternatives. 1996 state.jumpToBacktrack(this); 1997 1998 countToCheckForFirstAlternative = nextAlternative->m_minimumSize; 1999 2000 // If we get here, there the last input checked failed. 2001 notEnoughInputForPreviousAlternative.link(this); 2002 2003 state.linkAlternativeBacktracks(this); 2004 2005 // Back up to start the looping alternatives. 2006 if (countCheckedForCurrentAlternative) 2007 sub32(Imm32(countCheckedForCurrentAlternative), index); 2008 2009 firstAlternative = Label(this); 2010 2011 state.checkedTotal = countToCheckForFirstAlternative; 2012 if (countToCheckForFirstAlternative) 2013 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative)); 2014 2015 countCheckedForCurrentAlternative = countToCheckForFirstAlternative; 2016 2017 firstAlternativeInputChecked = Label(this); 2018 2019 setRepeatAlternativeLabels = true; 2020 } else { 2021 int countToCheckForNextAlternative = nextAlternative->m_minimumSize; 2022 2023 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one. 2024 // If we get here, then the last input checked failed. 2025 notEnoughInputForPreviousAlternative.link(this); 2026 2027 // Check if sufficent input available to run the next alternative 2028 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); 2029 // We are now in the correct state to enter the next alternative; this add is only required 2030 // to mirror and revert operation of the sub32, just below. 2031 add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); 2032 2033 // If we get here, then the last input checked passed. 2034 state.linkAlternativeBacktracks(this); 2035 2036 // No need to check if we can run the next alternative, since it is shorter - 2037 // just update index. 2038 sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index); 2039 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one. 2040 // If we get here, then the last input checked failed. 2041 // If there is insufficient input to run the current alternative, and the next alternative is longer, 2042 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if 2043 // we had checked. 2044 notEnoughInputForPreviousAlternative.link(this); 2045 add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index); 2046 notEnoughInputForPreviousAlternative.append(jump()); 2047 2048 // The next alternative is longer than the current one; check the difference. 2049 state.linkAlternativeBacktracks(this); 2050 2051 notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative)); 2052 } else { // CASE 3: Both alternatives are the same length. 2053 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative); 2054 2055 // If the next alterative is the same length as this one, then no need to check the input - 2056 // if there was sufficent input to run the current alternative then there is sufficient 2057 // input to run the next one; if not, there isn't. 2058 state.linkAlternativeBacktracks(this); 2059 } 2060 state.checkedTotal -= countCheckedForCurrentAlternative; 2061 countCheckedForCurrentAlternative = countToCheckForNextAlternative; 2062 state.checkedTotal += countCheckedForCurrentAlternative; 2063 } 2064 } 2065 } 2066 2067 // If we get here, all Alternatives failed... 2068 2069 state.checkedTotal -= countCheckedForCurrentAlternative; 2070 2071 if (!setRepeatAlternativeLabels) { 2072 // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link 2073 // the match failures to this point, and fall through to the return below. 2074 state.linkAlternativeBacktracks(this, true); 2075 2076 notEnoughInputForPreviousAlternative.link(this); 2077 } else { 2078 // How much more input need there be to be able to retry from the first alternative? 2079 // examples: 2080 // /yarr_jit/ or /wrec|pcre/ 2081 // In these examples we need check for one more input before looping. 2082 // /yarr_jit|pcre/ 2083 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative 2084 // being four longer than the last alternative checked, and another +1 to effectively move 2085 // the start position along by one). 2086 // /yarr|rules/ or /wrec|notsomuch/ 2087 // In these examples, provided that there was sufficient input to have just been matching for 2088 // the second alternative we can loop without checking for available input (since the second 2089 // alternative is longer than the first). In the latter example we need to decrement index 2090 // (by 4) so the start position is only progressed by 1 from the last iteration. 2091 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1; 2092 2093 // First, deal with the cases where there was sufficient input to try the last alternative. 2094 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below. 2095 state.linkAlternativeBacktracks(this, true); 2096 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop! 2097 state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true); 2098 else { // no need to check the input, but we do have some bookkeeping to do first. 2099 state.linkAlternativeBacktracks(this, true); 2100 2101 // Where necessary update our preserved start position. 2102 if (!m_pattern.m_body->m_hasFixedSize) { 2103 move(index, regT0); 2104 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); 2105 store32(regT0, Address(output)); 2106 } 2107 2108 // Update index if necessary, and loop (without checking). 2109 if (incrementForNextIter) 2110 add32(Imm32(incrementForNextIter), index); 2111 jump().linkTo(firstAlternativeInputChecked, this); 2112 } 2113 2114 notEnoughInputForPreviousAlternative.link(this); 2115 // Update our idea of the start position, if we're tracking this. 2116 if (!m_pattern.m_body->m_hasFixedSize) { 2117 if (countCheckedForCurrentAlternative - 1) { 2118 move(index, regT0); 2119 sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0); 2120 store32(regT0, Address(output)); 2121 } else 2122 store32(index, Address(output)); 2123 } 2124 2125 // Check if there is sufficent input to run the first alternative again. 2126 jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this); 2127 // No - insufficent input to run the first alteranative, are there any other alternatives we 2128 // might need to check? If so, the last check will have left the index incremented by 2129 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative 2130 // LESS input is available, to have the effect of just progressing the start position by 1 2131 // from the last iteration. If this check passes we can just jump up to the check associated 2132 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the 2133 // first alternative again, and this check will fail (otherwise the check planted just above 2134 // here would have passed). This is a bit sad, however it saves trying to do something more 2135 // complex here in compilation, and in the common case we should end up coallescing the checks. 2136 // 2137 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least 2138 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150, 2139 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there 2140 // is sufficient input to run either alternative (constantly failing). If there had been only 2141 // one alternative, or if the shorter alternative had come first, we would have terminated 2142 // immediately. :-/ 2143 if (hasShorterAlternatives) 2144 jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this); 2145 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true, 2146 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ... 2147 // but since we're about to return a failure this doesn't really matter!) 2148 } 2149 2150 if (m_pattern.m_body->m_callFrameSize) 2151 addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 2152 2153 move(TrustedImm32(-1), returnRegister); 2154 2155 generateReturn(); 2156 2157 m_expressionState.emitParenthesesTail(this); 2158 m_expressionState.emitIndirectJumpTable(this); 2159 m_expressionState.linkToNextIteration(this); 2160 } 2161 2162 void generateEnter() 2163 { 2164 #if CPU(X86_64) 2165 push(X86Registers::ebp); 2166 move(stackPointerRegister, X86Registers::ebp); 2167 push(X86Registers::ebx); 2168 #elif CPU(X86) 2169 push(X86Registers::ebp); 2170 move(stackPointerRegister, X86Registers::ebp); 2171 // TODO: do we need spill registers to fill the output pointer if there are no sub captures? 2172 push(X86Registers::ebx); 2173 push(X86Registers::edi); 2174 push(X86Registers::esi); 2175 // load output into edi (2 = saved ebp + return address). 2176 #if COMPILER(MSVC) 2177 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); 2178 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); 2179 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); 2180 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); 2181 #else 2182 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); 2183 #endif 2184 #elif CPU(ARM) 2185 push(ARMRegisters::r4); 2186 push(ARMRegisters::r5); 2187 push(ARMRegisters::r6); 2188 #if CPU(ARM_TRADITIONAL) 2189 push(ARMRegisters::r8); // scratch register 2190 #endif 2191 move(ARMRegisters::r3, output); 2192 #elif CPU(SH4) 2193 push(SH4Registers::r11); 2194 push(SH4Registers::r13); 2195 #elif CPU(MIPS) 2196 // Do nothing. 2197 #endif 2198 } 2199 2200 void generateReturn() 2201 { 2202 #if CPU(X86_64) 2203 pop(X86Registers::ebx); 2204 pop(X86Registers::ebp); 2205 #elif CPU(X86) 2206 pop(X86Registers::esi); 2207 pop(X86Registers::edi); 2208 pop(X86Registers::ebx); 2209 pop(X86Registers::ebp); 2210 #elif CPU(ARM) 2211 #if CPU(ARM_TRADITIONAL) 2212 pop(ARMRegisters::r8); // scratch register 2213 #endif 2214 pop(ARMRegisters::r6); 2215 pop(ARMRegisters::r5); 2216 pop(ARMRegisters::r4); 2217 #elif CPU(SH4) 2218 pop(SH4Registers::r13); 2219 pop(SH4Registers::r11); 2220 #elif CPU(MIPS) 2221 // Do nothing 2222 #endif 2223 ret(); 2224 } 2225 2226 public: 2227 YarrGenerator(YarrPattern& pattern) 2228 : m_pattern(pattern) 2229 , m_shouldFallBack(false) 2230 { 2231 } 2232 2233 void generate() 2234 { 2235 generateEnter(); 2236 2237 if (!m_pattern.m_body->m_hasFixedSize) 2238 store32(index, Address(output)); 2239 2240 if (m_pattern.m_body->m_callFrameSize) 2241 subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister); 2242 2243 generateDisjunction(m_pattern.m_body); 2244 } 2245 2246 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) 2247 { 2248 generate(); 2249 2250 LinkBuffer patchBuffer(this, globalData->regexAllocator.poolForSize(size()), 0); 2251 2252 for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i) 2253 patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation)); 2254 2255 jitObject.set(patchBuffer.finalizeCode()); 2256 jitObject.setFallBack(m_shouldFallBack); 2257 } 2258 2259 private: 2260 YarrPattern& m_pattern; 2261 bool m_shouldFallBack; 2262 GenerationState m_expressionState; 2263 }; 2264 2265 void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject) 2266 { 2267 YarrGenerator(pattern).compile(globalData, jitObject); 2268 } 2269 2270 int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output) 2271 { 2272 return jitObject.execute(input, start, length, output); 2273 } 2274 2275 }} 2276 2277 #endif 2278