1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/TextIterator.h" 29 30 #include "HTMLNames.h" 31 #include "bindings/v8/ExceptionStatePlaceholder.h" 32 #include "core/dom/Document.h" 33 #include "core/dom/NodeTraversal.h" 34 #include "core/dom/Range.h" 35 #include "core/dom/shadow/ShadowRoot.h" 36 #include "core/editing/VisiblePosition.h" 37 #include "core/editing/VisibleUnits.h" 38 #include "core/editing/htmlediting.h" 39 #include "core/html/HTMLElement.h" 40 #include "core/html/HTMLTextFormControlElement.h" 41 #include "core/platform/graphics/Font.h" 42 #include "core/platform/text/TextBoundaries.h" 43 #include "core/platform/text/TextBreakIteratorInternalICU.h" 44 #include "core/rendering/InlineTextBox.h" 45 #include "core/rendering/RenderImage.h" 46 #include "core/rendering/RenderTableCell.h" 47 #include "core/rendering/RenderTableRow.h" 48 #include "core/rendering/RenderTextControl.h" 49 #include "core/rendering/RenderTextFragment.h" 50 #include "wtf/text/CString.h" 51 #include "wtf/text/StringBuilder.h" 52 #include "wtf/unicode/CharacterNames.h" 53 #include <unicode/usearch.h> 54 55 using namespace WTF::Unicode; 56 using namespace std; 57 58 namespace WebCore { 59 60 using namespace HTMLNames; 61 62 // Buffer that knows how to compare with a search target. 63 // Keeps enough of the previous text to be able to search in the future, but no more. 64 // Non-breaking spaces are always equal to normal spaces. 65 // Case folding is also done if the CaseInsensitive option is specified. 66 // Matches are further filtered if the AtWordStarts option is specified, although some 67 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 68 class SearchBuffer { 69 WTF_MAKE_NONCOPYABLE(SearchBuffer); 70 public: 71 SearchBuffer(const String& target, FindOptions); 72 ~SearchBuffer(); 73 74 // Returns number of characters appended; guaranteed to be in the range [1, length]. 75 template<typename CharType> 76 void append(const CharType*, size_t length); 77 size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; } 78 79 bool needsMoreContext() const; 80 void prependContext(const UChar*, size_t length); 81 void reachedBreak(); 82 83 // Result is the size in characters of what was found. 84 // And <startOffset> is the number of characters back to the start of what was found. 85 size_t search(size_t& startOffset); 86 bool atBreak() const; 87 88 private: 89 bool isBadMatch(const UChar*, size_t length) const; 90 bool isWordStartMatch(size_t start, size_t length) const; 91 92 Vector<UChar> m_target; 93 FindOptions m_options; 94 95 Vector<UChar> m_buffer; 96 size_t m_overlap; 97 size_t m_prefixLength; 98 size_t m_numberOfCharactersJustAppended; 99 bool m_atBreak; 100 bool m_needsMoreContext; 101 102 bool m_targetRequiresKanaWorkaround; 103 Vector<UChar> m_normalizedTarget; 104 mutable Vector<UChar> m_normalizedMatch; 105 }; 106 107 // -------- 108 109 static const unsigned bitsInWord = sizeof(unsigned) * 8; 110 static const unsigned bitInWordMask = bitsInWord - 1; 111 112 BitStack::BitStack() 113 : m_size(0) 114 { 115 } 116 117 BitStack::~BitStack() 118 { 119 } 120 121 void BitStack::push(bool bit) 122 { 123 unsigned index = m_size / bitsInWord; 124 unsigned shift = m_size & bitInWordMask; 125 if (!shift && index == m_words.size()) { 126 m_words.grow(index + 1); 127 m_words[index] = 0; 128 } 129 unsigned& word = m_words[index]; 130 unsigned mask = 1U << shift; 131 if (bit) 132 word |= mask; 133 else 134 word &= ~mask; 135 ++m_size; 136 } 137 138 void BitStack::pop() 139 { 140 if (m_size) 141 --m_size; 142 } 143 144 bool BitStack::top() const 145 { 146 if (!m_size) 147 return false; 148 unsigned shift = (m_size - 1) & bitInWordMask; 149 return m_words.last() & (1U << shift); 150 } 151 152 unsigned BitStack::size() const 153 { 154 return m_size; 155 } 156 157 // -------- 158 159 #if !ASSERT_DISABLED 160 161 static unsigned depthCrossingShadowBoundaries(Node* node) 162 { 163 unsigned depth = 0; 164 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 165 ++depth; 166 return depth; 167 } 168 169 #endif 170 171 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 172 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) 173 { 174 if (!rangeEndContainer) 175 return 0; 176 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { 177 if (Node* next = rangeEndContainer->childNode(rangeEndOffset)) 178 return next; 179 } 180 for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) { 181 if (Node* next = node->nextSibling()) 182 return next; 183 } 184 return 0; 185 } 186 187 // -------- 188 189 static inline bool fullyClipsContents(Node* node) 190 { 191 RenderObject* renderer = node->renderer(); 192 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 193 return false; 194 return toRenderBox(renderer)->size().isEmpty(); 195 } 196 197 static inline bool ignoresContainerClip(Node* node) 198 { 199 RenderObject* renderer = node->renderer(); 200 if (!renderer || renderer->isText()) 201 return false; 202 return renderer->style()->hasOutOfFlowPosition(); 203 } 204 205 static void pushFullyClippedState(BitStack& stack, Node* node) 206 { 207 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 208 209 // Push true if this node full clips its contents, or if a parent already has fully 210 // clipped and this is not a node that ignores its container's clip. 211 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 212 } 213 214 static void setUpFullyClippedStack(BitStack& stack, Node* node) 215 { 216 // Put the nodes in a vector so we can iterate in reverse order. 217 Vector<Node*, 100> ancestry; 218 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 219 ancestry.append(parent); 220 221 // Call pushFullyClippedState on each node starting with the earliest ancestor. 222 size_t size = ancestry.size(); 223 for (size_t i = 0; i < size; ++i) 224 pushFullyClippedState(stack, ancestry[size - i - 1]); 225 pushFullyClippedState(stack, node); 226 227 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 228 } 229 230 // -------- 231 232 TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior) 233 : m_startContainer(0) 234 , m_startOffset(0) 235 , m_endContainer(0) 236 , m_endOffset(0) 237 , m_positionNode(0) 238 , m_textLength(0) 239 , m_remainingTextBox(0) 240 , m_firstLetterText(0) 241 , m_sortedTextBoxesPosition(0) 242 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 243 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 244 , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding) 245 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) 246 , m_handledFirstLetter(false) 247 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 248 , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters) 249 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 250 , m_shouldStop(false) 251 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) 252 { 253 if (!r) 254 return; 255 256 // get and validate the range endpoints 257 Node* startContainer = r->startContainer(); 258 if (!startContainer) 259 return; 260 int startOffset = r->startOffset(); 261 Node* endContainer = r->endContainer(); 262 int endOffset = r->endOffset(); 263 264 // Callers should be handing us well-formed ranges. If we discover that this isn't 265 // the case, we could consider changing this assertion to an early return. 266 ASSERT(r->boundaryPointsValid()); 267 268 // remember range - this does not change 269 m_startContainer = startContainer; 270 m_startOffset = startOffset; 271 m_endContainer = endContainer; 272 m_endOffset = endOffset; 273 274 // set up the current node for processing 275 m_node = r->firstNode(); 276 if (!m_node) 277 return; 278 setUpFullyClippedStack(m_fullyClippedStack, m_node); 279 m_offset = m_node == m_startContainer ? m_startOffset : 0; 280 m_handledNode = false; 281 m_handledChildren = false; 282 283 // calculate first out of bounds node 284 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); 285 286 // initialize node processing state 287 m_needsAnotherNewline = false; 288 m_textBox = 0; 289 290 // initialize record of previous node processing 291 m_hasEmitted = false; 292 m_lastTextNode = 0; 293 m_lastTextNodeEndedWithCollapsedSpace = false; 294 m_lastCharacter = 0; 295 296 #ifndef NDEBUG 297 // need this just because of the assert in advance() 298 m_positionNode = m_node; 299 #endif 300 301 // identify the first run 302 advance(); 303 } 304 305 TextIterator::~TextIterator() 306 { 307 } 308 309 void TextIterator::advance() 310 { 311 if (m_shouldStop) 312 return; 313 314 // reset the run information 315 m_positionNode = 0; 316 m_textLength = 0; 317 318 // handle remembered node that needed a newline after the text node's newline 319 if (m_needsAnotherNewline) { 320 // Emit the extra newline, and position it *inside* m_node, after m_node's 321 // contents, in case it's a block, in the same way that we position the first 322 // newline. The range for the emitted newline should start where the line 323 // break begins. 324 // FIXME: It would be cleaner if we emitted two newlines during the last 325 // iteration, instead of using m_needsAnotherNewline. 326 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 327 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 328 m_needsAnotherNewline = false; 329 return; 330 } 331 332 if (!m_textBox && m_remainingTextBox) { 333 m_textBox = m_remainingTextBox; 334 m_remainingTextBox = 0; 335 m_firstLetterText = 0; 336 m_offset = 0; 337 } 338 // handle remembered text box 339 if (m_textBox) { 340 handleTextBox(); 341 if (m_positionNode) 342 return; 343 } 344 345 while (m_node && m_node != m_pastEndNode) { 346 if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) 347 m_shouldStop = true; 348 349 // if the range ends at offset 0 of an element, represent the 350 // position, but not the content, of that element e.g. if the 351 // node is a blockflow element, emit a newline that 352 // precedes the element 353 if (m_node == m_endContainer && m_endOffset == 0) { 354 representNodeOffsetZero(); 355 m_node = 0; 356 return; 357 } 358 359 RenderObject* renderer = m_node->renderer(); 360 if (!renderer) { 361 m_handledNode = true; 362 m_handledChildren = true; 363 } else { 364 // handle current node according to its type 365 if (!m_handledNode) { 366 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE? 367 m_handledNode = handleTextNode(); 368 else if (renderer && (renderer->isImage() || renderer->isWidget() || 369 (renderer->node() && renderer->node()->isElementNode() && 370 (toElement(renderer->node())->isFormControlElement() 371 || toElement(renderer->node())->hasTagName(legendTag) 372 || toElement(renderer->node())->hasTagName(meterTag) 373 || toElement(renderer->node())->hasTagName(progressTag))))) 374 m_handledNode = handleReplacedElement(); 375 else 376 m_handledNode = handleNonTextNode(); 377 if (m_positionNode) 378 return; 379 } 380 } 381 382 // find a new current node to handle in depth-first manner, 383 // calling exitNode() as we come back thru a parent node 384 Node* next = m_handledChildren ? 0 : m_node->firstChild(); 385 m_offset = 0; 386 if (!next) { 387 next = m_node->nextSibling(); 388 if (!next) { 389 bool pastEnd = NodeTraversal::next(m_node) == m_pastEndNode; 390 Node* parentNode = m_node->parentOrShadowHostNode(); 391 while (!next && parentNode) { 392 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 393 return; 394 bool haveRenderer = m_node->renderer(); 395 m_node = parentNode; 396 m_fullyClippedStack.pop(); 397 parentNode = m_node->parentOrShadowHostNode(); 398 if (haveRenderer) 399 exitNode(); 400 if (m_positionNode) { 401 m_handledNode = true; 402 m_handledChildren = true; 403 return; 404 } 405 next = m_node->nextSibling(); 406 } 407 } 408 m_fullyClippedStack.pop(); 409 } 410 411 // set the new current node 412 m_node = next; 413 if (m_node) 414 pushFullyClippedState(m_fullyClippedStack, m_node); 415 m_handledNode = false; 416 m_handledChildren = false; 417 m_handledFirstLetter = false; 418 m_firstLetterText = 0; 419 420 // how would this ever be? 421 if (m_positionNode) 422 return; 423 } 424 } 425 426 UChar TextIterator::characterAt(unsigned index) const 427 { 428 ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length())); 429 if (!(index < static_cast<unsigned>(length()))) 430 return 0; 431 432 if (m_singleCharacterBuffer) { 433 ASSERT(!index); 434 ASSERT(length() == 1); 435 return m_singleCharacterBuffer; 436 } 437 438 return string()[startOffset() + index]; 439 } 440 441 String TextIterator::substring(unsigned position, unsigned length) const 442 { 443 ASSERT_WITH_SECURITY_IMPLICATION(position < static_cast<unsigned>(this->length())); 444 ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length())); 445 if (!length) 446 return emptyString(); 447 if (m_singleCharacterBuffer) { 448 ASSERT(!position); 449 ASSERT(length == 1); 450 return String(&m_singleCharacterBuffer, 1); 451 } 452 return string().substring(startOffset() + position, length); 453 } 454 455 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const 456 { 457 unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength); 458 if (!lengthToAppend) 459 return; 460 if (m_singleCharacterBuffer) { 461 ASSERT(!position); 462 builder.append(m_singleCharacterBuffer); 463 } else { 464 builder.append(string(), startOffset() + position, lengthToAppend); 465 } 466 } 467 468 bool TextIterator::handleTextNode() 469 { 470 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) 471 return false; 472 473 RenderText* renderer = toRenderText(m_node->renderer()); 474 475 m_lastTextNode = m_node; 476 String str = renderer->text(); 477 478 // handle pre-formatted text 479 if (!renderer->style()->collapseWhiteSpace()) { 480 int runStart = m_offset; 481 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 482 emitCharacter(' ', m_node, 0, runStart, runStart); 483 return false; 484 } 485 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) { 486 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 487 if (m_firstLetterText) { 488 String firstLetter = m_firstLetterText->text(); 489 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length()); 490 m_firstLetterText = 0; 491 m_textBox = 0; 492 return false; 493 } 494 } 495 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 496 return false; 497 int strLength = str.length(); 498 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; 499 int runEnd = min(strLength, end); 500 501 if (runStart >= runEnd) 502 return true; 503 504 emitText(m_node, runStart, runEnd); 505 return true; 506 } 507 508 if (renderer->firstTextBox()) 509 m_textBox = renderer->firstTextBox(); 510 511 bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset; 512 if (shouldHandleFirstLetter) 513 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 514 515 if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) { 516 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 517 return false; 518 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 519 return true; 520 } 521 522 if (m_firstLetterText) 523 renderer = m_firstLetterText; 524 525 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 526 if (renderer->containsReversedText()) { 527 m_sortedTextBoxes.clear(); 528 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { 529 m_sortedTextBoxes.append(textBox); 530 } 531 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 532 m_sortedTextBoxesPosition = 0; 533 m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]; 534 } 535 536 handleTextBox(); 537 return true; 538 } 539 540 void TextIterator::handleTextBox() 541 { 542 RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer()); 543 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) { 544 m_textBox = 0; 545 return; 546 } 547 String str = renderer->text(); 548 unsigned start = m_offset; 549 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX; 550 while (m_textBox) { 551 unsigned textBoxStart = m_textBox->start(); 552 unsigned runStart = max(textBoxStart, start); 553 554 // Check for collapsed space at the start of this run. 555 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 556 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace 557 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); 558 if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) { 559 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { 560 unsigned spaceRunStart = runStart - 1; 561 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') 562 --spaceRunStart; 563 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); 564 } else 565 emitCharacter(' ', m_node, 0, runStart, runStart); 566 return; 567 } 568 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 569 unsigned runEnd = min(textBoxEnd, end); 570 571 // Determine what the next text box will be, but don't advance yet 572 InlineTextBox* nextTextBox = 0; 573 if (renderer->containsReversedText()) { 574 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 575 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 576 } else 577 nextTextBox = m_textBox->nextTextBox(); 578 ASSERT(!nextTextBox || nextTextBox->renderer() == renderer); 579 580 if (runStart < runEnd) { 581 // Handle either a single newline character (which becomes a space), 582 // or a run of characters that does not include a newline. 583 // This effectively translates newlines to spaces without copying the text. 584 if (str[runStart] == '\n') { 585 emitCharacter(' ', m_node, 0, runStart, runStart + 1); 586 m_offset = runStart + 1; 587 } else { 588 size_t subrunEnd = str.find('\n', runStart); 589 if (subrunEnd == notFound || subrunEnd > runEnd) 590 subrunEnd = runEnd; 591 592 m_offset = subrunEnd; 593 emitText(m_node, renderer, runStart, subrunEnd); 594 } 595 596 // If we are doing a subrun that doesn't go to the end of the text box, 597 // come back again to finish handling this text box; don't advance to the next one. 598 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 599 return; 600 601 // Advance and return 602 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); 603 if (nextRunStart > runEnd) 604 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 605 m_textBox = nextTextBox; 606 if (renderer->containsReversedText()) 607 ++m_sortedTextBoxesPosition; 608 return; 609 } 610 // Advance and continue 611 m_textBox = nextTextBox; 612 if (renderer->containsReversedText()) 613 ++m_sortedTextBoxesPosition; 614 } 615 if (!m_textBox && m_remainingTextBox) { 616 m_textBox = m_remainingTextBox; 617 m_remainingTextBox = 0; 618 m_firstLetterText = 0; 619 m_offset = 0; 620 handleTextBox(); 621 } 622 } 623 624 static inline RenderText* firstRenderTextInFirstLetter(RenderObject* firstLetter) 625 { 626 if (!firstLetter) 627 return 0; 628 629 // FIXME: Should this check descendent objects? 630 for (RenderObject* current = firstLetter->firstChild(); current; current = current->nextSibling()) { 631 if (current->isText()) 632 return toRenderText(current); 633 } 634 return 0; 635 } 636 637 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) 638 { 639 if (renderer->firstLetter()) { 640 RenderObject* r = renderer->firstLetter(); 641 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 642 return; 643 if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) { 644 m_handledFirstLetter = true; 645 m_remainingTextBox = m_textBox; 646 m_textBox = firstLetter->firstTextBox(); 647 m_sortedTextBoxes.clear(); 648 m_firstLetterText = firstLetter; 649 } 650 } 651 m_handledFirstLetter = true; 652 } 653 654 bool TextIterator::handleReplacedElement() 655 { 656 if (m_fullyClippedStack.top()) 657 return false; 658 659 RenderObject* renderer = m_node->renderer(); 660 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 661 return false; 662 663 if (m_lastTextNodeEndedWithCollapsedSpace) { 664 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 665 return false; 666 } 667 668 if (m_entersTextControls && renderer->isTextControl()) { 669 if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->textFormControlElement()->innerTextElement()) { 670 m_node = innerTextElement->containingShadowRoot(); 671 pushFullyClippedState(m_fullyClippedStack, m_node); 672 m_offset = 0; 673 return false; 674 } 675 } 676 677 m_hasEmitted = true; 678 679 if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) { 680 emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1); 681 return true; 682 } 683 684 if (m_emitsCharactersBetweenAllVisiblePositions) { 685 // We want replaced elements to behave like punctuation for boundary 686 // finding, and to simply take up space for the selection preservation 687 // code in moveParagraphs, so we use a comma. 688 emitCharacter(',', m_node->parentNode(), m_node, 0, 1); 689 return true; 690 } 691 692 m_positionNode = m_node->parentNode(); 693 m_positionOffsetBaseNode = m_node; 694 m_positionStartOffset = 0; 695 m_positionEndOffset = 1; 696 m_singleCharacterBuffer = 0; 697 698 if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) { 699 m_text = toRenderImage(renderer)->altText(); 700 if (!m_text.isEmpty()) { 701 m_textLength = m_text.length(); 702 m_lastCharacter = m_text[m_textLength - 1]; 703 return true; 704 } 705 } 706 707 m_textLength = 0; 708 m_lastCharacter = 0; 709 710 return true; 711 } 712 713 bool TextIterator::hasVisibleTextNode(RenderText* renderer) 714 { 715 if (renderer->style()->visibility() == VISIBLE) 716 return true; 717 if (renderer->isTextFragment()) { 718 RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer); 719 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE) 720 return true; 721 } 722 return false; 723 } 724 725 static bool shouldEmitTabBeforeNode(Node* node) 726 { 727 RenderObject* r = node->renderer(); 728 729 // Table cells are delimited by tabs. 730 if (!r || !isTableCell(node)) 731 return false; 732 733 // Want a tab before every cell other than the first one 734 RenderTableCell* rc = toRenderTableCell(r); 735 RenderTable* t = rc->table(); 736 return t && (t->cellBefore(rc) || t->cellAbove(rc)); 737 } 738 739 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText) 740 { 741 RenderObject* renderer = node->renderer(); 742 743 if (renderer ? !renderer->isBR() : !node->hasTagName(brTag)) 744 return false; 745 return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->hasTagName(inputTag)); 746 } 747 748 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node) 749 { 750 // Block flow (versus inline flow) is represented by having 751 // a newline both before and after the element. 752 RenderObject* r = node->renderer(); 753 if (!r) { 754 return (node->hasTagName(blockquoteTag) 755 || node->hasTagName(ddTag) 756 || node->hasTagName(divTag) 757 || node->hasTagName(dlTag) 758 || node->hasTagName(dtTag) 759 || node->hasTagName(h1Tag) 760 || node->hasTagName(h2Tag) 761 || node->hasTagName(h3Tag) 762 || node->hasTagName(h4Tag) 763 || node->hasTagName(h5Tag) 764 || node->hasTagName(h6Tag) 765 || node->hasTagName(hrTag) 766 || node->hasTagName(liTag) 767 || node->hasTagName(listingTag) 768 || node->hasTagName(olTag) 769 || node->hasTagName(pTag) 770 || node->hasTagName(preTag) 771 || node->hasTagName(trTag) 772 || node->hasTagName(ulTag)); 773 } 774 775 // Need to make an exception for table cells, because they are blocks, but we 776 // want them tab-delimited rather than having newlines before and after. 777 if (isTableCell(node)) 778 return false; 779 780 // Need to make an exception for table row elements, because they are neither 781 // "inline" or "RenderBlock", but we want newlines for them. 782 if (r->isTableRow()) { 783 RenderTable* t = toRenderTableRow(r)->table(); 784 if (t && !t->isInline()) 785 return true; 786 } 787 788 return !r->isInline() && r->isRenderBlock() 789 && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText(); 790 } 791 792 static bool shouldEmitNewlineAfterNode(Node* node) 793 { 794 // FIXME: It should be better but slower to create a VisiblePosition here. 795 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 796 return false; 797 // Check if this is the very last renderer in the document. 798 // If so, then we should not emit a newline. 799 while ((node = NodeTraversal::nextSkippingChildren(node))) 800 if (node->renderer()) 801 return true; 802 return false; 803 } 804 805 static bool shouldEmitNewlineBeforeNode(Node* node) 806 { 807 return shouldEmitNewlinesBeforeAndAfterNode(node); 808 } 809 810 static bool shouldEmitExtraNewlineForNode(Node* node) 811 { 812 // When there is a significant collapsed bottom margin, emit an extra 813 // newline for a more realistic result. We end up getting the right 814 // result even without margin collapsing. For example: <div><p>text</p></div> 815 // will work right even if both the <div> and the <p> have bottom margins. 816 RenderObject* r = node->renderer(); 817 if (!r || !r->isBox()) 818 return false; 819 820 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears 821 // not to do this at all 822 if (node->hasTagName(h1Tag) 823 || node->hasTagName(h2Tag) 824 || node->hasTagName(h3Tag) 825 || node->hasTagName(h4Tag) 826 || node->hasTagName(h5Tag) 827 || node->hasTagName(h6Tag) 828 || node->hasTagName(pTag)) { 829 RenderStyle* style = r->style(); 830 if (style) { 831 int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); 832 int fontSize = style->fontDescription().computedPixelSize(); 833 if (bottomMargin * 2 >= fontSize) 834 return true; 835 } 836 } 837 838 return false; 839 } 840 841 static int collapsedSpaceLength(RenderText* renderer, int textEnd) 842 { 843 const String& text = renderer->text(); 844 int length = text.length(); 845 for (int i = textEnd; i < length; ++i) { 846 if (!renderer->style()->isCollapsibleWhiteSpace(text[i])) 847 return i - textEnd; 848 } 849 850 return length - textEnd; 851 } 852 853 static int maxOffsetIncludingCollapsedSpaces(Node* node) 854 { 855 int offset = caretMaxOffset(node); 856 857 if (node->renderer() && node->renderer()->isText()) 858 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); 859 860 return offset; 861 } 862 863 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 864 bool TextIterator::shouldRepresentNodeOffsetZero() 865 { 866 if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable()) 867 return true; 868 869 // Leave element positioned flush with start of a paragraph 870 // (e.g. do not insert tab before a table cell at the start of a paragraph) 871 if (m_lastCharacter == '\n') 872 return false; 873 874 // Otherwise, show the position if we have emitted any characters 875 if (m_hasEmitted) 876 return true; 877 878 // We've not emitted anything yet. Generally, there is no need for any positioning then. 879 // The only exception is when the element is visually not in the same line as 880 // the start of the range (e.g. the range starts at the end of the previous paragraph). 881 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 882 // make quicker checks to possibly avoid that. Another check that we could make is 883 // is whether the inline vs block flow changed since the previous visible element. 884 // I think we're already in a special enough case that that won't be needed, tho. 885 886 // No character needed if this is the first node in the range. 887 if (m_node == m_startContainer) 888 return false; 889 890 // If we are outside the start container's subtree, assume we need to emit. 891 // FIXME: m_startContainer could be an inline block 892 if (!m_node->isDescendantOf(m_startContainer)) 893 return true; 894 895 // If we started as m_startContainer offset 0 and the current node is a descendant of 896 // the start container, we already had enough context to correctly decide whether to 897 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 898 // so don't second guess that now. 899 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 900 // immaterial since we likely would have already emitted something by now. 901 if (m_startOffset == 0) 902 return false; 903 904 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 905 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 906 // we would create VisiblePositions on every call to this function without this check. 907 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE 908 || (m_node->renderer()->isBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag))) 909 return false; 910 911 // The startPos.isNotNull() check is needed because the start could be before the body, 912 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 913 // The currPos.isNotNull() check is needed because positions in non-HTML content 914 // (like SVG) do not have visible positions, and we don't want to emit for them either. 915 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 916 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 917 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 918 } 919 920 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) 921 { 922 return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); 923 } 924 925 void TextIterator::representNodeOffsetZero() 926 { 927 // Emit a character to show the positioning of m_node. 928 929 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 930 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 931 // on m_node to see if it necessitates emitting a character first and will early return 932 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 933 if (shouldEmitTabBeforeNode(m_node)) { 934 if (shouldRepresentNodeOffsetZero()) 935 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); 936 } else if (shouldEmitNewlineBeforeNode(m_node)) { 937 if (shouldRepresentNodeOffsetZero()) 938 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); 939 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { 940 if (shouldRepresentNodeOffsetZero()) 941 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0); 942 } 943 } 944 945 bool TextIterator::handleNonTextNode() 946 { 947 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText)) 948 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); 949 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) 950 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1); 951 else 952 representNodeOffsetZero(); 953 954 return true; 955 } 956 957 void TextIterator::exitNode() 958 { 959 // prevent emitting a newline when exiting a collapsed block at beginning of the range 960 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 961 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 962 // therefore look like a blank line. 963 if (!m_hasEmitted) 964 return; 965 966 // Emit with a position *inside* m_node, after m_node's contents, in 967 // case it is a block, because the run should start where the 968 // emitted character is positioned visually. 969 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 970 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 971 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 972 // TextIterator in _web_attributedStringFromRange. 973 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 974 if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) { 975 // use extra newline to represent margin bottom, as needed 976 bool addNewline = shouldEmitExtraNewlineForNode(m_node); 977 978 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 979 // contain a VisiblePosition when doing selection preservation. 980 if (m_lastCharacter != '\n') { 981 // insert a newline with a position following this block's contents. 982 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 983 // remember whether to later add a newline for the current node 984 ASSERT(!m_needsAnotherNewline); 985 m_needsAnotherNewline = addNewline; 986 } else if (addNewline) 987 // insert a newline with a position following this block's contents. 988 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 989 } 990 991 // If nothing was emitted, see if we need to emit a space. 992 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) 993 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1); 994 } 995 996 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 997 { 998 m_hasEmitted = true; 999 1000 // remember information with which to construct the TextIterator::range() 1001 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode 1002 m_positionNode = textNode; 1003 m_positionOffsetBaseNode = offsetBaseNode; 1004 m_positionStartOffset = textStartOffset; 1005 m_positionEndOffset = textEndOffset; 1006 1007 // remember information with which to construct the TextIterator::characters() and length() 1008 m_singleCharacterBuffer = c; 1009 ASSERT(m_singleCharacterBuffer); 1010 m_textLength = 1; 1011 1012 // remember some iteration state 1013 m_lastTextNodeEndedWithCollapsedSpace = false; 1014 m_lastCharacter = c; 1015 } 1016 1017 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset) 1018 { 1019 RenderText* renderer = toRenderText(renderObject); 1020 m_text = m_emitsOriginalText ? renderer->originalText() : (m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text()); 1021 ASSERT(!m_text.isEmpty()); 1022 ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length())); 1023 ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length())); 1024 ASSERT(textStartOffset <= textEndOffset); 1025 1026 m_positionNode = textNode; 1027 m_positionOffsetBaseNode = 0; 1028 m_positionStartOffset = textStartOffset; 1029 m_positionEndOffset = textEndOffset; 1030 m_singleCharacterBuffer = 0; 1031 m_textLength = textEndOffset - textStartOffset; 1032 m_lastCharacter = m_text[textEndOffset - 1]; 1033 1034 m_lastTextNodeEndedWithCollapsedSpace = false; 1035 m_hasEmitted = true; 1036 } 1037 1038 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) 1039 { 1040 emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset); 1041 } 1042 1043 PassRefPtr<Range> TextIterator::range() const 1044 { 1045 // use the current run information, if we have it 1046 if (m_positionNode) { 1047 if (m_positionOffsetBaseNode) { 1048 int index = m_positionOffsetBaseNode->nodeIndex(); 1049 m_positionStartOffset += index; 1050 m_positionEndOffset += index; 1051 m_positionOffsetBaseNode = 0; 1052 } 1053 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1054 } 1055 1056 // otherwise, return the end of the overall range we were given 1057 if (m_endContainer) 1058 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); 1059 1060 return 0; 1061 } 1062 1063 Node* TextIterator::node() const 1064 { 1065 RefPtr<Range> textRange = range(); 1066 if (!textRange) 1067 return 0; 1068 1069 Node* node = textRange->startContainer(); 1070 if (!node) 1071 return 0; 1072 if (node->offsetInCharacters()) 1073 return node; 1074 1075 return node->childNode(textRange->startOffset()); 1076 } 1077 1078 // -------- 1079 1080 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior) 1081 : m_node(0) 1082 , m_offset(0) 1083 , m_handledNode(false) 1084 , m_handledChildren(false) 1085 , m_startNode(0) 1086 , m_startOffset(0) 1087 , m_endNode(0) 1088 , m_endOffset(0) 1089 , m_positionNode(0) 1090 , m_positionStartOffset(0) 1091 , m_positionEndOffset(0) 1092 , m_textOffset(0) 1093 , m_textLength(0) 1094 , m_lastTextNode(0) 1095 , m_lastCharacter(0) 1096 , m_singleCharacterBuffer(0) 1097 , m_havePassedStartNode(false) 1098 , m_shouldHandleFirstLetter(false) 1099 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1100 , m_shouldStop(false) 1101 , m_emitsOriginalText(false) 1102 { 1103 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); 1104 1105 if (!r) 1106 return; 1107 1108 Node* startNode = r->startContainer(); 1109 if (!startNode) 1110 return; 1111 Node* endNode = r->endContainer(); 1112 int startOffset = r->startOffset(); 1113 int endOffset = r->endOffset(); 1114 1115 if (!startNode->offsetInCharacters()) { 1116 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { 1117 startNode = startNode->childNode(startOffset); 1118 startOffset = 0; 1119 } 1120 } 1121 if (!endNode->offsetInCharacters()) { 1122 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) { 1123 endNode = endNode->childNode(endOffset - 1); 1124 endOffset = lastOffsetInNode(endNode); 1125 } 1126 } 1127 1128 m_node = endNode; 1129 setUpFullyClippedStack(m_fullyClippedStack, m_node); 1130 m_offset = endOffset; 1131 m_handledNode = false; 1132 m_handledChildren = endOffset == 0; 1133 1134 m_startNode = startNode; 1135 m_startOffset = startOffset; 1136 m_endNode = endNode; 1137 m_endOffset = endOffset; 1138 1139 #ifndef NDEBUG 1140 // Need this just because of the assert. 1141 m_positionNode = endNode; 1142 #endif 1143 1144 m_lastTextNode = 0; 1145 m_lastCharacter = '\n'; 1146 1147 m_havePassedStartNode = false; 1148 1149 advance(); 1150 } 1151 1152 void SimplifiedBackwardsTextIterator::advance() 1153 { 1154 ASSERT(m_positionNode); 1155 1156 if (m_shouldStop) 1157 return; 1158 1159 if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) { 1160 m_shouldStop = true; 1161 return; 1162 } 1163 1164 m_positionNode = 0; 1165 m_textLength = 0; 1166 1167 while (m_node && !m_havePassedStartNode) { 1168 // Don't handle node if we start iterating at [node, 0]. 1169 if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) { 1170 RenderObject* renderer = m_node->renderer(); 1171 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { 1172 // FIXME: What about CDATA_SECTION_NODE? 1173 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1174 m_handledNode = handleTextNode(); 1175 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1176 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1177 m_handledNode = handleReplacedElement(); 1178 } else 1179 m_handledNode = handleNonTextNode(); 1180 if (m_positionNode) 1181 return; 1182 } 1183 1184 if (!m_handledChildren && m_node->hasChildNodes()) { 1185 m_node = m_node->lastChild(); 1186 pushFullyClippedState(m_fullyClippedStack, m_node); 1187 } else { 1188 // Exit empty containers as we pass over them or containers 1189 // where [container, 0] is where we started iterating. 1190 if (!m_handledNode 1191 && canHaveChildrenForEditing(m_node) 1192 && m_node->parentNode() 1193 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { 1194 exitNode(); 1195 if (m_positionNode) { 1196 m_handledNode = true; 1197 m_handledChildren = true; 1198 return; 1199 } 1200 } 1201 1202 // Exit all other containers. 1203 while (!m_node->previousSibling()) { 1204 if (!advanceRespectingRange(m_node->parentOrShadowHostNode())) 1205 break; 1206 m_fullyClippedStack.pop(); 1207 exitNode(); 1208 if (m_positionNode) { 1209 m_handledNode = true; 1210 m_handledChildren = true; 1211 return; 1212 } 1213 } 1214 1215 m_fullyClippedStack.pop(); 1216 if (advanceRespectingRange(m_node->previousSibling())) 1217 pushFullyClippedState(m_fullyClippedStack, m_node); 1218 else 1219 m_node = 0; 1220 } 1221 1222 // For the purpose of word boundary detection, 1223 // we should iterate all visible text and trailing (collapsed) whitespaces. 1224 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; 1225 m_handledNode = false; 1226 m_handledChildren = false; 1227 1228 if (m_positionNode) 1229 return; 1230 } 1231 } 1232 1233 bool SimplifiedBackwardsTextIterator::handleTextNode() 1234 { 1235 m_lastTextNode = m_node; 1236 1237 int startOffset; 1238 int offsetInNode; 1239 RenderText* renderer = handleFirstLetter(startOffset, offsetInNode); 1240 if (!renderer) 1241 return true; 1242 1243 String text = renderer->text(); 1244 if (!renderer->firstTextBox() && text.length() > 0) 1245 return true; 1246 1247 m_positionEndOffset = m_offset; 1248 m_offset = startOffset + offsetInNode; 1249 m_positionNode = m_node; 1250 m_positionStartOffset = m_offset; 1251 1252 ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length())); 1253 ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); 1254 ASSERT(m_positionStartOffset <= m_positionEndOffset); 1255 1256 m_textLength = m_positionEndOffset - m_positionStartOffset; 1257 m_textOffset = m_positionStartOffset - offsetInNode; 1258 m_textContainer = text; 1259 m_singleCharacterBuffer = 0; 1260 RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length()); 1261 1262 m_lastCharacter = text[m_positionEndOffset - 1]; 1263 1264 return !m_shouldHandleFirstLetter; 1265 } 1266 1267 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode) 1268 { 1269 RenderText* renderer = toRenderText(m_node->renderer()); 1270 startOffset = (m_node == m_startNode) ? m_startOffset : 0; 1271 1272 if (!renderer->isTextFragment()) { 1273 offsetInNode = 0; 1274 return renderer; 1275 } 1276 1277 RenderTextFragment* fragment = toRenderTextFragment(renderer); 1278 int offsetAfterFirstLetter = fragment->start(); 1279 if (startOffset >= offsetAfterFirstLetter) { 1280 ASSERT(!m_shouldHandleFirstLetter); 1281 offsetInNode = offsetAfterFirstLetter; 1282 return renderer; 1283 } 1284 1285 if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) { 1286 m_shouldHandleFirstLetter = true; 1287 offsetInNode = offsetAfterFirstLetter; 1288 return renderer; 1289 } 1290 1291 m_shouldHandleFirstLetter = false; 1292 offsetInNode = 0; 1293 return firstRenderTextInFirstLetter(fragment->firstLetter()); 1294 } 1295 1296 bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1297 { 1298 unsigned index = m_node->nodeIndex(); 1299 // We want replaced elements to behave like punctuation for boundary 1300 // finding, and to simply take up space for the selection preservation 1301 // code in moveParagraphs, so we use a comma. Unconditionally emit 1302 // here because this iterator is only used for boundary finding. 1303 emitCharacter(',', m_node->parentNode(), index, index + 1); 1304 return true; 1305 } 1306 1307 bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1308 { 1309 // We can use a linefeed in place of a tab because this simple iterator is only used to 1310 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1311 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) { 1312 unsigned index = m_node->nodeIndex(); 1313 // The start of this emitted range is wrong. Ensuring correctness would require 1314 // VisiblePositions and so would be slow. previousBoundary expects this. 1315 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); 1316 } 1317 return true; 1318 } 1319 1320 void SimplifiedBackwardsTextIterator::exitNode() 1321 { 1322 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) { 1323 // The start of this emitted range is wrong. Ensuring correctness would require 1324 // VisiblePositions and so would be slow. previousBoundary expects this. 1325 emitCharacter('\n', m_node, 0, 0); 1326 } 1327 } 1328 1329 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) 1330 { 1331 m_singleCharacterBuffer = c; 1332 m_positionNode = node; 1333 m_positionStartOffset = startOffset; 1334 m_positionEndOffset = endOffset; 1335 m_textOffset = 0; 1336 m_textLength = 1; 1337 m_lastCharacter = c; 1338 } 1339 1340 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1341 { 1342 if (!next) 1343 return false; 1344 m_havePassedStartNode |= m_node == m_startNode; 1345 if (m_havePassedStartNode) 1346 return false; 1347 m_node = next; 1348 return true; 1349 } 1350 1351 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const 1352 { 1353 if (m_positionNode) 1354 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1355 1356 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset); 1357 } 1358 1359 // -------- 1360 1361 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior) 1362 : m_offset(0) 1363 , m_runOffset(0) 1364 , m_atBreak(true) 1365 , m_textIterator(r, behavior) 1366 { 1367 while (!atEnd() && m_textIterator.length() == 0) 1368 m_textIterator.advance(); 1369 } 1370 1371 PassRefPtr<Range> CharacterIterator::range() const 1372 { 1373 RefPtr<Range> r = m_textIterator.range(); 1374 if (!m_textIterator.atEnd()) { 1375 if (m_textIterator.length() <= 1) { 1376 ASSERT(m_runOffset == 0); 1377 } else { 1378 Node* n = r->startContainer(); 1379 ASSERT(n == r->endContainer()); 1380 int offset = r->startOffset() + m_runOffset; 1381 r->setStart(n, offset, ASSERT_NO_EXCEPTION); 1382 r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION); 1383 } 1384 } 1385 return r.release(); 1386 } 1387 1388 void CharacterIterator::advance(int count) 1389 { 1390 if (count <= 0) { 1391 ASSERT(count == 0); 1392 return; 1393 } 1394 1395 m_atBreak = false; 1396 1397 // easy if there is enough left in the current m_textIterator run 1398 int remaining = m_textIterator.length() - m_runOffset; 1399 if (count < remaining) { 1400 m_runOffset += count; 1401 m_offset += count; 1402 return; 1403 } 1404 1405 // exhaust the current m_textIterator run 1406 count -= remaining; 1407 m_offset += remaining; 1408 1409 // move to a subsequent m_textIterator run 1410 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1411 int runLength = m_textIterator.length(); 1412 if (runLength == 0) 1413 m_atBreak = true; 1414 else { 1415 // see whether this is m_textIterator to use 1416 if (count < runLength) { 1417 m_runOffset = count; 1418 m_offset += count; 1419 return; 1420 } 1421 1422 // exhaust this m_textIterator run 1423 count -= runLength; 1424 m_offset += runLength; 1425 } 1426 } 1427 1428 // ran to the end of the m_textIterator... no more runs left 1429 m_atBreak = true; 1430 m_runOffset = 0; 1431 } 1432 1433 String CharacterIterator::string(int numChars) 1434 { 1435 StringBuilder result; 1436 result.reserveCapacity(numChars); 1437 while (numChars > 0 && !atEnd()) { 1438 int runSize = min(numChars, length()); 1439 m_textIterator.appendTextToStringBuilder(result, m_runOffset, runSize); 1440 numChars -= runSize; 1441 advance(runSize); 1442 } 1443 return result.toString(); 1444 } 1445 1446 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) 1447 { 1448 it.advance(offset); 1449 RefPtr<Range> start = it.range(); 1450 1451 if (length > 1) 1452 it.advance(length - 1); 1453 RefPtr<Range> end = it.range(); 1454 1455 return Range::create(start->startContainer()->document(), 1456 start->startContainer(), start->startOffset(), 1457 end->endContainer(), end->endOffset()); 1458 } 1459 1460 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior) 1461 : m_offset(0) 1462 , m_runOffset(0) 1463 , m_atBreak(true) 1464 , m_textIterator(range, behavior) 1465 { 1466 while (!atEnd() && !m_textIterator.length()) 1467 m_textIterator.advance(); 1468 } 1469 1470 PassRefPtr<Range> BackwardsCharacterIterator::range() const 1471 { 1472 RefPtr<Range> r = m_textIterator.range(); 1473 if (!m_textIterator.atEnd()) { 1474 if (m_textIterator.length() <= 1) 1475 ASSERT(m_runOffset == 0); 1476 else { 1477 Node* n = r->startContainer(); 1478 ASSERT(n == r->endContainer()); 1479 int offset = r->endOffset() - m_runOffset; 1480 r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION); 1481 r->setEnd(n, offset, ASSERT_NO_EXCEPTION); 1482 } 1483 } 1484 return r.release(); 1485 } 1486 1487 void BackwardsCharacterIterator::advance(int count) 1488 { 1489 if (count <= 0) { 1490 ASSERT(!count); 1491 return; 1492 } 1493 1494 m_atBreak = false; 1495 1496 int remaining = m_textIterator.length() - m_runOffset; 1497 if (count < remaining) { 1498 m_runOffset += count; 1499 m_offset += count; 1500 return; 1501 } 1502 1503 count -= remaining; 1504 m_offset += remaining; 1505 1506 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1507 int runLength = m_textIterator.length(); 1508 if (runLength == 0) 1509 m_atBreak = true; 1510 else { 1511 if (count < runLength) { 1512 m_runOffset = count; 1513 m_offset += count; 1514 return; 1515 } 1516 1517 count -= runLength; 1518 m_offset += runLength; 1519 } 1520 } 1521 1522 m_atBreak = true; 1523 m_runOffset = 0; 1524 } 1525 1526 // -------- 1527 1528 WordAwareIterator::WordAwareIterator(const Range* range) 1529 : m_didLookAhead(true) // So we consider the first chunk from the text iterator. 1530 , m_textIterator(range) 1531 { 1532 advance(); // Get in position over the first chunk of text. 1533 } 1534 1535 WordAwareIterator::~WordAwareIterator() 1536 { 1537 } 1538 1539 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1540 1541 void WordAwareIterator::advance() 1542 { 1543 m_buffer.clear(); 1544 1545 // If last time we did a look-ahead, start with that looked-ahead chunk now 1546 if (!m_didLookAhead) { 1547 ASSERT(!m_textIterator.atEnd()); 1548 m_textIterator.advance(); 1549 } 1550 m_didLookAhead = false; 1551 1552 // Go to next non-empty chunk. 1553 while (!m_textIterator.atEnd() && m_textIterator.length() == 0) 1554 m_textIterator.advance(); 1555 1556 m_range = m_textIterator.range(); 1557 1558 if (m_textIterator.atEnd()) 1559 return; 1560 1561 while (1) { 1562 // If this chunk ends in whitespace we can just use it as our chunk. 1563 if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1))) 1564 return; 1565 1566 // If this is the first chunk that failed, save it in m_buffer before look ahead. 1567 if (m_buffer.isEmpty()) 1568 m_textIterator.appendTextTo(m_buffer); 1569 1570 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1571 m_textIterator.advance(); 1572 if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) { 1573 m_didLookAhead = true; 1574 return; 1575 } 1576 1577 // Start gobbling chunks until we get to a suitable stopping point 1578 m_textIterator.appendTextTo(m_buffer); 1579 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION); 1580 } 1581 } 1582 1583 int WordAwareIterator::length() const 1584 { 1585 if (!m_buffer.isEmpty()) 1586 return m_buffer.size(); 1587 return m_textIterator.length(); 1588 } 1589 1590 String WordAwareIterator::substring(unsigned position, unsigned length) const 1591 { 1592 if (!m_buffer.isEmpty()) 1593 return String(m_buffer.data() + position, length); 1594 return m_textIterator.substring(position, length); 1595 } 1596 1597 UChar WordAwareIterator::characterAt(unsigned index) const 1598 { 1599 if (!m_buffer.isEmpty()) 1600 return m_buffer[index]; 1601 return m_textIterator.characterAt(index); 1602 } 1603 1604 // -------- 1605 1606 static inline UChar foldQuoteMarkOrSoftHyphen(UChar c) 1607 { 1608 switch (c) { 1609 case hebrewPunctuationGershayim: 1610 case leftDoubleQuotationMark: 1611 case rightDoubleQuotationMark: 1612 return '"'; 1613 case hebrewPunctuationGeresh: 1614 case leftSingleQuotationMark: 1615 case rightSingleQuotationMark: 1616 return '\''; 1617 case softHyphen: 1618 // Replace soft hyphen with an ignorable character so that their presence or absence will 1619 // not affect string comparison. 1620 return 0; 1621 default: 1622 return c; 1623 } 1624 } 1625 1626 static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) 1627 { 1628 for (size_t i = 0; i < length; ++i) 1629 data[i] = foldQuoteMarkOrSoftHyphen(data[i]); 1630 } 1631 1632 static const size_t minimumSearchBufferSize = 8192; 1633 1634 #ifndef NDEBUG 1635 static bool searcherInUse; 1636 #endif 1637 1638 static UStringSearch* createSearcher() 1639 { 1640 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1641 // but it doesn't matter exactly what it is, since we don't perform any searches 1642 // without setting both the pattern and the text. 1643 UErrorCode status = U_ZERO_ERROR; 1644 String searchCollatorName = currentSearchLocaleID() + String("@collation=search"); 1645 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status); 1646 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1647 return searcher; 1648 } 1649 1650 static UStringSearch* searcher() 1651 { 1652 static UStringSearch* searcher = createSearcher(); 1653 return searcher; 1654 } 1655 1656 static inline void lockSearcher() 1657 { 1658 #ifndef NDEBUG 1659 ASSERT(!searcherInUse); 1660 searcherInUse = true; 1661 #endif 1662 } 1663 1664 static inline void unlockSearcher() 1665 { 1666 #ifndef NDEBUG 1667 ASSERT(searcherInUse); 1668 searcherInUse = false; 1669 #endif 1670 } 1671 1672 // ICU's search ignores the distinction between small kana letters and ones 1673 // that are not small, and also characters that differ only in the voicing 1674 // marks when considering only primary collation strength differences. 1675 // This is not helpful for end users, since these differences make words 1676 // distinct, so for our purposes we need these to be considered. 1677 // The Unicode folks do not think the collation algorithm should be 1678 // changed. To work around this, we would like to tailor the ICU searcher, 1679 // but we can't get that to work yet. So instead, we check for cases where 1680 // these differences occur, and skip those matches. 1681 1682 // We refer to the above technique as the "kana workaround". The next few 1683 // functions are helper functinos for the kana workaround. 1684 1685 static inline bool isKanaLetter(UChar character) 1686 { 1687 // Hiragana letters. 1688 if (character >= 0x3041 && character <= 0x3096) 1689 return true; 1690 1691 // Katakana letters. 1692 if (character >= 0x30A1 && character <= 0x30FA) 1693 return true; 1694 if (character >= 0x31F0 && character <= 0x31FF) 1695 return true; 1696 1697 // Halfwidth katakana letters. 1698 if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70) 1699 return true; 1700 1701 return false; 1702 } 1703 1704 static inline bool isSmallKanaLetter(UChar character) 1705 { 1706 ASSERT(isKanaLetter(character)); 1707 1708 switch (character) { 1709 case 0x3041: // HIRAGANA LETTER SMALL A 1710 case 0x3043: // HIRAGANA LETTER SMALL I 1711 case 0x3045: // HIRAGANA LETTER SMALL U 1712 case 0x3047: // HIRAGANA LETTER SMALL E 1713 case 0x3049: // HIRAGANA LETTER SMALL O 1714 case 0x3063: // HIRAGANA LETTER SMALL TU 1715 case 0x3083: // HIRAGANA LETTER SMALL YA 1716 case 0x3085: // HIRAGANA LETTER SMALL YU 1717 case 0x3087: // HIRAGANA LETTER SMALL YO 1718 case 0x308E: // HIRAGANA LETTER SMALL WA 1719 case 0x3095: // HIRAGANA LETTER SMALL KA 1720 case 0x3096: // HIRAGANA LETTER SMALL KE 1721 case 0x30A1: // KATAKANA LETTER SMALL A 1722 case 0x30A3: // KATAKANA LETTER SMALL I 1723 case 0x30A5: // KATAKANA LETTER SMALL U 1724 case 0x30A7: // KATAKANA LETTER SMALL E 1725 case 0x30A9: // KATAKANA LETTER SMALL O 1726 case 0x30C3: // KATAKANA LETTER SMALL TU 1727 case 0x30E3: // KATAKANA LETTER SMALL YA 1728 case 0x30E5: // KATAKANA LETTER SMALL YU 1729 case 0x30E7: // KATAKANA LETTER SMALL YO 1730 case 0x30EE: // KATAKANA LETTER SMALL WA 1731 case 0x30F5: // KATAKANA LETTER SMALL KA 1732 case 0x30F6: // KATAKANA LETTER SMALL KE 1733 case 0x31F0: // KATAKANA LETTER SMALL KU 1734 case 0x31F1: // KATAKANA LETTER SMALL SI 1735 case 0x31F2: // KATAKANA LETTER SMALL SU 1736 case 0x31F3: // KATAKANA LETTER SMALL TO 1737 case 0x31F4: // KATAKANA LETTER SMALL NU 1738 case 0x31F5: // KATAKANA LETTER SMALL HA 1739 case 0x31F6: // KATAKANA LETTER SMALL HI 1740 case 0x31F7: // KATAKANA LETTER SMALL HU 1741 case 0x31F8: // KATAKANA LETTER SMALL HE 1742 case 0x31F9: // KATAKANA LETTER SMALL HO 1743 case 0x31FA: // KATAKANA LETTER SMALL MU 1744 case 0x31FB: // KATAKANA LETTER SMALL RA 1745 case 0x31FC: // KATAKANA LETTER SMALL RI 1746 case 0x31FD: // KATAKANA LETTER SMALL RU 1747 case 0x31FE: // KATAKANA LETTER SMALL RE 1748 case 0x31FF: // KATAKANA LETTER SMALL RO 1749 case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A 1750 case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I 1751 case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U 1752 case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E 1753 case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O 1754 case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA 1755 case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU 1756 case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO 1757 case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU 1758 return true; 1759 } 1760 return false; 1761 } 1762 1763 enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark }; 1764 1765 static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character) 1766 { 1767 ASSERT(isKanaLetter(character)); 1768 1769 switch (character) { 1770 case 0x304C: // HIRAGANA LETTER GA 1771 case 0x304E: // HIRAGANA LETTER GI 1772 case 0x3050: // HIRAGANA LETTER GU 1773 case 0x3052: // HIRAGANA LETTER GE 1774 case 0x3054: // HIRAGANA LETTER GO 1775 case 0x3056: // HIRAGANA LETTER ZA 1776 case 0x3058: // HIRAGANA LETTER ZI 1777 case 0x305A: // HIRAGANA LETTER ZU 1778 case 0x305C: // HIRAGANA LETTER ZE 1779 case 0x305E: // HIRAGANA LETTER ZO 1780 case 0x3060: // HIRAGANA LETTER DA 1781 case 0x3062: // HIRAGANA LETTER DI 1782 case 0x3065: // HIRAGANA LETTER DU 1783 case 0x3067: // HIRAGANA LETTER DE 1784 case 0x3069: // HIRAGANA LETTER DO 1785 case 0x3070: // HIRAGANA LETTER BA 1786 case 0x3073: // HIRAGANA LETTER BI 1787 case 0x3076: // HIRAGANA LETTER BU 1788 case 0x3079: // HIRAGANA LETTER BE 1789 case 0x307C: // HIRAGANA LETTER BO 1790 case 0x3094: // HIRAGANA LETTER VU 1791 case 0x30AC: // KATAKANA LETTER GA 1792 case 0x30AE: // KATAKANA LETTER GI 1793 case 0x30B0: // KATAKANA LETTER GU 1794 case 0x30B2: // KATAKANA LETTER GE 1795 case 0x30B4: // KATAKANA LETTER GO 1796 case 0x30B6: // KATAKANA LETTER ZA 1797 case 0x30B8: // KATAKANA LETTER ZI 1798 case 0x30BA: // KATAKANA LETTER ZU 1799 case 0x30BC: // KATAKANA LETTER ZE 1800 case 0x30BE: // KATAKANA LETTER ZO 1801 case 0x30C0: // KATAKANA LETTER DA 1802 case 0x30C2: // KATAKANA LETTER DI 1803 case 0x30C5: // KATAKANA LETTER DU 1804 case 0x30C7: // KATAKANA LETTER DE 1805 case 0x30C9: // KATAKANA LETTER DO 1806 case 0x30D0: // KATAKANA LETTER BA 1807 case 0x30D3: // KATAKANA LETTER BI 1808 case 0x30D6: // KATAKANA LETTER BU 1809 case 0x30D9: // KATAKANA LETTER BE 1810 case 0x30DC: // KATAKANA LETTER BO 1811 case 0x30F4: // KATAKANA LETTER VU 1812 case 0x30F7: // KATAKANA LETTER VA 1813 case 0x30F8: // KATAKANA LETTER VI 1814 case 0x30F9: // KATAKANA LETTER VE 1815 case 0x30FA: // KATAKANA LETTER VO 1816 return VoicedSoundMark; 1817 case 0x3071: // HIRAGANA LETTER PA 1818 case 0x3074: // HIRAGANA LETTER PI 1819 case 0x3077: // HIRAGANA LETTER PU 1820 case 0x307A: // HIRAGANA LETTER PE 1821 case 0x307D: // HIRAGANA LETTER PO 1822 case 0x30D1: // KATAKANA LETTER PA 1823 case 0x30D4: // KATAKANA LETTER PI 1824 case 0x30D7: // KATAKANA LETTER PU 1825 case 0x30DA: // KATAKANA LETTER PE 1826 case 0x30DD: // KATAKANA LETTER PO 1827 return SemiVoicedSoundMark; 1828 } 1829 return NoVoicedSoundMark; 1830 } 1831 1832 static inline bool isCombiningVoicedSoundMark(UChar character) 1833 { 1834 switch (character) { 1835 case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 1836 case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 1837 return true; 1838 } 1839 return false; 1840 } 1841 1842 static inline bool containsKanaLetters(const String& pattern) 1843 { 1844 unsigned length = pattern.length(); 1845 for (unsigned i = 0; i < length; ++i) { 1846 if (isKanaLetter(pattern[i])) 1847 return true; 1848 } 1849 return false; 1850 } 1851 1852 static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer) 1853 { 1854 ASSERT(length); 1855 1856 buffer.resize(length); 1857 1858 UErrorCode status = U_ZERO_ERROR; 1859 size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status); 1860 ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR); 1861 ASSERT(bufferSize); 1862 1863 buffer.resize(bufferSize); 1864 1865 if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) 1866 return; 1867 1868 status = U_ZERO_ERROR; 1869 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status); 1870 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 1871 } 1872 1873 static bool isNonLatin1Separator(UChar32 character) 1874 { 1875 ASSERT_ARG(character, character >= 256); 1876 1877 return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); 1878 } 1879 1880 static inline bool isSeparator(UChar32 character) 1881 { 1882 static const bool latin1SeparatorTable[256] = { 1883 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1884 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1885 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . / 1886 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ? 1887 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @ 1888 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _ 1889 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ` 1890 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~ 1891 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1892 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1893 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1894 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1895 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1896 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1897 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1898 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 1899 }; 1900 1901 if (character < 256) 1902 return latin1SeparatorTable[character]; 1903 1904 return isNonLatin1Separator(character); 1905 } 1906 1907 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1908 : m_options(options) 1909 , m_prefixLength(0) 1910 , m_numberOfCharactersJustAppended(0) 1911 , m_atBreak(true) 1912 , m_needsMoreContext(options & AtWordStarts) 1913 , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) 1914 { 1915 ASSERT(!target.isEmpty()); 1916 target.appendTo(m_target); 1917 1918 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1919 // of doing it in a separate replacement pass here, but ICU doesn't offer a way 1920 // to add tailoring on top of the locale-specific tailoring as of this writing. 1921 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); 1922 1923 size_t targetLength = m_target.size(); 1924 m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize)); 1925 m_overlap = m_buffer.capacity() / 4; 1926 1927 if ((m_options & AtWordStarts) && targetLength) { 1928 UChar32 targetFirstCharacter; 1929 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); 1930 // Characters in the separator category never really occur at the beginning of a word, 1931 // so if the target begins with such a character, we just ignore the AtWordStart option. 1932 if (isSeparator(targetFirstCharacter)) { 1933 m_options &= ~AtWordStarts; 1934 m_needsMoreContext = false; 1935 } 1936 } 1937 1938 // Grab the single global searcher. 1939 // If we ever have a reason to do more than once search buffer at once, we'll have 1940 // to move to multiple searchers. 1941 lockSearcher(); 1942 1943 UStringSearch* searcher = WebCore::searcher(); 1944 UCollator* collator = usearch_getCollator(searcher); 1945 1946 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; 1947 if (ucol_getStrength(collator) != strength) { 1948 ucol_setStrength(collator, strength); 1949 usearch_reset(searcher); 1950 } 1951 1952 UErrorCode status = U_ZERO_ERROR; 1953 usearch_setPattern(searcher, m_target.data(), targetLength, &status); 1954 ASSERT(status == U_ZERO_ERROR); 1955 1956 // The kana workaround requires a normalized copy of the target string. 1957 if (m_targetRequiresKanaWorkaround) 1958 normalizeCharacters(m_target.data(), m_target.size(), m_normalizedTarget); 1959 } 1960 1961 inline SearchBuffer::~SearchBuffer() 1962 { 1963 // Leave the static object pointing to a valid string. 1964 UErrorCode status = U_ZERO_ERROR; 1965 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status); 1966 ASSERT(status == U_ZERO_ERROR); 1967 1968 unlockSearcher(); 1969 } 1970 1971 template<typename CharType> 1972 inline void SearchBuffer::append(const CharType* characters, size_t length) 1973 { 1974 ASSERT(length); 1975 1976 if (m_atBreak) { 1977 m_buffer.shrink(0); 1978 m_prefixLength = 0; 1979 m_atBreak = false; 1980 } else if (m_buffer.size() == m_buffer.capacity()) { 1981 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 1982 m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap); 1983 m_buffer.shrink(m_overlap); 1984 } 1985 1986 size_t oldLength = m_buffer.size(); 1987 size_t usableLength = min(m_buffer.capacity() - oldLength, length); 1988 ASSERT(usableLength); 1989 m_buffer.resize(oldLength + usableLength); 1990 UChar* destination = m_buffer.data() + oldLength; 1991 StringImpl::copyChars(destination, characters, usableLength); 1992 foldQuoteMarksAndSoftHyphens(destination, usableLength); 1993 m_numberOfCharactersJustAppended = usableLength; 1994 } 1995 1996 inline bool SearchBuffer::needsMoreContext() const 1997 { 1998 return m_needsMoreContext; 1999 } 2000 2001 inline void SearchBuffer::prependContext(const UChar* characters, size_t length) 2002 { 2003 ASSERT(m_needsMoreContext); 2004 ASSERT(m_prefixLength == m_buffer.size()); 2005 2006 if (!length) 2007 return; 2008 2009 m_atBreak = false; 2010 2011 size_t wordBoundaryContextStart = length; 2012 if (wordBoundaryContextStart) { 2013 U16_BACK_1(characters, 0, wordBoundaryContextStart); 2014 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); 2015 } 2016 2017 size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); 2018 m_buffer.prepend(characters + length - usableLength, usableLength); 2019 m_prefixLength += usableLength; 2020 2021 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 2022 m_needsMoreContext = false; 2023 } 2024 2025 inline bool SearchBuffer::atBreak() const 2026 { 2027 return m_atBreak; 2028 } 2029 2030 inline void SearchBuffer::reachedBreak() 2031 { 2032 m_atBreak = true; 2033 } 2034 2035 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 2036 { 2037 // This function implements the kana workaround. If usearch treats 2038 // it as a match, but we do not want to, then it's a "bad match". 2039 if (!m_targetRequiresKanaWorkaround) 2040 return false; 2041 2042 // Normalize into a match buffer. We reuse a single buffer rather than 2043 // creating a new one each time. 2044 normalizeCharacters(match, matchLength, m_normalizedMatch); 2045 2046 const UChar* a = m_normalizedTarget.begin(); 2047 const UChar* aEnd = m_normalizedTarget.end(); 2048 2049 const UChar* b = m_normalizedMatch.begin(); 2050 const UChar* bEnd = m_normalizedMatch.end(); 2051 2052 while (true) { 2053 // Skip runs of non-kana-letter characters. This is necessary so we can 2054 // correctly handle strings where the target and match have different-length 2055 // runs of characters that match, while still double checking the correctness 2056 // of matches of kana letters with other kana letters. 2057 while (a != aEnd && !isKanaLetter(*a)) 2058 ++a; 2059 while (b != bEnd && !isKanaLetter(*b)) 2060 ++b; 2061 2062 // If we reached the end of either the target or the match, we should have 2063 // reached the end of both; both should have the same number of kana letters. 2064 if (a == aEnd || b == bEnd) { 2065 ASSERT(a == aEnd); 2066 ASSERT(b == bEnd); 2067 return false; 2068 } 2069 2070 // Check for differences in the kana letter character itself. 2071 if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b)) 2072 return true; 2073 if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b)) 2074 return true; 2075 ++a; 2076 ++b; 2077 2078 // Check for differences in combining voiced sound marks found after the letter. 2079 while (1) { 2080 if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) { 2081 if (b != bEnd && isCombiningVoicedSoundMark(*b)) 2082 return true; 2083 break; 2084 } 2085 if (!(b != bEnd && isCombiningVoicedSoundMark(*b))) 2086 return true; 2087 if (*a != *b) 2088 return true; 2089 ++a; 2090 ++b; 2091 } 2092 } 2093 } 2094 2095 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 2096 { 2097 ASSERT(m_options & AtWordStarts); 2098 2099 if (!start) 2100 return true; 2101 2102 int size = m_buffer.size(); 2103 int offset = start; 2104 UChar32 firstCharacter; 2105 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 2106 2107 if (m_options & TreatMedialCapitalAsWordStart) { 2108 UChar32 previousCharacter; 2109 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 2110 2111 if (isSeparator(firstCharacter)) { 2112 // The start of a separator run is a word start (".org" in "webkit.org"). 2113 if (!isSeparator(previousCharacter)) 2114 return true; 2115 } else if (isASCIIUpper(firstCharacter)) { 2116 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 2117 if (!isASCIIUpper(previousCharacter)) 2118 return true; 2119 // The last character of an uppercase run followed by a non-separator, non-digit 2120 // is a word start ("Request" in "XMLHTTPRequest"). 2121 offset = start; 2122 U16_FWD_1(m_buffer.data(), offset, size); 2123 UChar32 nextCharacter = 0; 2124 if (offset < size) 2125 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 2126 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 2127 return true; 2128 } else if (isASCIIDigit(firstCharacter)) { 2129 // The start of a digit run is a word start ("2" in "WebKit2"). 2130 if (!isASCIIDigit(previousCharacter)) 2131 return true; 2132 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 2133 // The start of a non-separator, non-uppercase, non-digit run is a word start, 2134 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 2135 return true; 2136 } 2137 } 2138 2139 // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes 2140 // a word, so treat the position before any CJK character as a word start. 2141 if (Font::isCJKIdeographOrSymbol(firstCharacter)) 2142 return true; 2143 2144 size_t wordBreakSearchStart = start + length; 2145 while (wordBreakSearchStart > start) 2146 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); 2147 return wordBreakSearchStart == start; 2148 } 2149 2150 inline size_t SearchBuffer::search(size_t& start) 2151 { 2152 size_t size = m_buffer.size(); 2153 if (m_atBreak) { 2154 if (!size) 2155 return 0; 2156 } else { 2157 if (size != m_buffer.capacity()) 2158 return 0; 2159 } 2160 2161 UStringSearch* searcher = WebCore::searcher(); 2162 2163 UErrorCode status = U_ZERO_ERROR; 2164 usearch_setText(searcher, m_buffer.data(), size, &status); 2165 ASSERT(status == U_ZERO_ERROR); 2166 2167 usearch_setOffset(searcher, m_prefixLength, &status); 2168 ASSERT(status == U_ZERO_ERROR); 2169 2170 int matchStart = usearch_next(searcher, &status); 2171 ASSERT(status == U_ZERO_ERROR); 2172 2173 nextMatch: 2174 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 2175 ASSERT(matchStart == USEARCH_DONE); 2176 return 0; 2177 } 2178 2179 // Matches that start in the overlap area are only tentative. 2180 // The same match may appear later, matching more characters, 2181 // possibly including a combining character that's not yet in the buffer. 2182 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 2183 size_t overlap = m_overlap; 2184 if (m_options & AtWordStarts) { 2185 // Ensure that there is sufficient context before matchStart the next time around for 2186 // determining if it is at a word boundary. 2187 int wordBoundaryContextStart = matchStart; 2188 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 2189 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); 2190 overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart)); 2191 } 2192 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 2193 m_prefixLength -= min(m_prefixLength, size - overlap); 2194 m_buffer.shrink(overlap); 2195 return 0; 2196 } 2197 2198 size_t matchedLength = usearch_getMatchedLength(searcher); 2199 ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); 2200 2201 // If this match is "bad", move on to the next match. 2202 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { 2203 matchStart = usearch_next(searcher, &status); 2204 ASSERT(status == U_ZERO_ERROR); 2205 goto nextMatch; 2206 } 2207 2208 size_t newSize = size - (matchStart + 1); 2209 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 2210 m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1); 2211 m_buffer.shrink(newSize); 2212 2213 start = size - matchStart; 2214 return matchedLength; 2215 } 2216 2217 // -------- 2218 2219 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) 2220 { 2221 int length = 0; 2222 for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) 2223 length += it.length(); 2224 2225 return length; 2226 } 2227 2228 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 2229 { 2230 CharacterIterator entireRangeIterator(entireRange); 2231 return characterSubrange(entireRangeIterator, characterOffset, characterCount); 2232 } 2233 2234 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) 2235 { 2236 RefPtr<Range> resultRange = scope->document()->createRange(); 2237 2238 int docTextPosition = 0; 2239 int rangeEnd = rangeLocation + rangeLength; 2240 bool startRangeFound = false; 2241 2242 RefPtr<Range> textRunRange; 2243 2244 TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); 2245 2246 // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. 2247 if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) { 2248 textRunRange = it.range(); 2249 2250 resultRange->setStart(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION); 2251 resultRange->setEnd(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION); 2252 2253 return resultRange.release(); 2254 } 2255 2256 for (; !it.atEnd(); it.advance()) { 2257 int len = it.length(); 2258 textRunRange = it.range(); 2259 2260 bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len; 2261 bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len; 2262 2263 // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only 2264 // in those cases that textRunRange is used. 2265 if (foundEnd) { 2266 // FIXME: This is a workaround for the fact that the end of a run is often at the wrong 2267 // position for emitted '\n's. 2268 if (len == 1 && it.characterAt(0) == '\n') { 2269 scope->document()->updateLayoutIgnorePendingStylesheets(); 2270 it.advance(); 2271 if (!it.atEnd()) { 2272 RefPtr<Range> range = it.range(); 2273 textRunRange->setEnd(range->startContainer(), range->startOffset(), ASSERT_NO_EXCEPTION); 2274 } else { 2275 Position runStart = textRunRange->startPosition(); 2276 Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); 2277 if (runEnd.isNotNull()) 2278 textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ASSERT_NO_EXCEPTION); 2279 } 2280 } 2281 } 2282 2283 if (foundStart) { 2284 startRangeFound = true; 2285 if (textRunRange->startContainer()->isTextNode()) { 2286 int offset = rangeLocation - docTextPosition; 2287 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), IGNORE_EXCEPTION); 2288 } else { 2289 if (rangeLocation == docTextPosition) 2290 resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), IGNORE_EXCEPTION); 2291 else 2292 resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION); 2293 } 2294 } 2295 2296 if (foundEnd) { 2297 if (textRunRange->startContainer()->isTextNode()) { 2298 int offset = rangeEnd - docTextPosition; 2299 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), IGNORE_EXCEPTION); 2300 } else { 2301 if (rangeEnd == docTextPosition) 2302 resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), IGNORE_EXCEPTION); 2303 else 2304 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION); 2305 } 2306 docTextPosition += len; 2307 break; 2308 } 2309 docTextPosition += len; 2310 } 2311 2312 if (!startRangeFound) 2313 return 0; 2314 2315 if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds 2316 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION); 2317 } 2318 2319 return resultRange.release(); 2320 } 2321 2322 bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range, size_t& location, size_t& length) 2323 { 2324 location = notFound; 2325 length = 0; 2326 2327 if (!range->startContainer()) 2328 return false; 2329 2330 // The critical assumption is that this only gets called with ranges that 2331 // concentrate on a given area containing the selection root. This is done 2332 // because of text fields and textareas. The DOM for those is not 2333 // directly in the document DOM, so ensure that the range does not cross a 2334 // boundary of one of those. 2335 if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope)) 2336 return false; 2337 if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope)) 2338 return false; 2339 2340 RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset()); 2341 ASSERT(testRange->startContainer() == scope); 2342 location = TextIterator::rangeLength(testRange.get()); 2343 2344 testRange->setEnd(range->endContainer(), range->endOffset(), IGNORE_EXCEPTION); 2345 ASSERT(testRange->startContainer() == scope); 2346 length = TextIterator::rangeLength(testRange.get()) - location; 2347 return true; 2348 } 2349 2350 // -------- 2351 2352 String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDisplayString) 2353 { 2354 // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192 2355 static const unsigned initialCapacity = 1 << 15; 2356 2357 unsigned bufferLength = 0; 2358 StringBuilder builder; 2359 builder.reserveCapacity(initialCapacity); 2360 TextIteratorBehavior behavior = defaultBehavior; 2361 if (!isDisplayString) 2362 behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding); 2363 2364 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { 2365 it.appendTextToStringBuilder(builder); 2366 bufferLength += it.length(); 2367 } 2368 2369 if (!bufferLength) 2370 return emptyString(); 2371 2372 String result = builder.toString(); 2373 2374 if (isDisplayString && r->ownerDocument()) 2375 r->ownerDocument()->displayStringModifiedByEncoding(result); 2376 2377 return result; 2378 } 2379 2380 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward) 2381 { 2382 RefPtr<Range> result = range->cloneRange(ASSERT_NO_EXCEPTION); 2383 result->collapse(!forward, ASSERT_NO_EXCEPTION); 2384 return result.release(); 2385 } 2386 2387 static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) 2388 { 2389 matchStart = 0; 2390 size_t matchLength = 0; 2391 2392 SearchBuffer buffer(target, options); 2393 2394 if (buffer.needsMoreContext()) { 2395 RefPtr<Range> startRange = it.range(); 2396 RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange(); 2397 beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION); 2398 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2399 Vector<UChar, 1024> characters; 2400 backwardsIterator.prependTextTo(characters); 2401 buffer.prependContext(characters.data(), characters.size()); 2402 if (!buffer.needsMoreContext()) 2403 break; 2404 } 2405 } 2406 2407 while (!it.atEnd()) { 2408 it.appendTextTo(buffer); 2409 it.advance(buffer.numberOfCharactersJustAppended()); 2410 tryAgain: 2411 size_t matchStartOffset; 2412 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2413 // Note that we found a match, and where we found it. 2414 size_t lastCharacterInBufferOffset = it.characterOffset(); 2415 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2416 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2417 matchLength = newMatchLength; 2418 // If searching forward, stop on the first match. 2419 // If searching backward, don't stop, so we end up with the last match. 2420 if (!(options & Backwards)) 2421 break; 2422 goto tryAgain; 2423 } 2424 if (it.atBreak() && !buffer.atBreak()) { 2425 buffer.reachedBreak(); 2426 goto tryAgain; 2427 } 2428 } 2429 2430 return matchLength; 2431 } 2432 2433 PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) 2434 { 2435 // CharacterIterator requires renderers to be up-to-date 2436 range->ownerDocument()->updateLayout(); 2437 2438 // First, find the text. 2439 size_t matchStart; 2440 size_t matchLength; 2441 { 2442 CharacterIterator findIterator(range, TextIteratorEntersTextControls); 2443 matchLength = findPlainText(findIterator, target, options, matchStart); 2444 if (!matchLength) 2445 return collapsedToBoundary(range, !(options & Backwards)); 2446 } 2447 2448 // Then, find the document position of the start and the end of the text. 2449 CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls); 2450 return characterSubrange(computeRangeIterator, matchStart, matchLength); 2451 } 2452 2453 } 2454