1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/TextIterator.h" 29 30 #include "bindings/core/v8/ExceptionStatePlaceholder.h" 31 #include "core/HTMLNames.h" 32 #include "core/dom/Document.h" 33 #include "core/dom/NodeTraversal.h" 34 #include "core/dom/shadow/ShadowRoot.h" 35 #include "core/editing/VisiblePosition.h" 36 #include "core/editing/VisibleUnits.h" 37 #include "core/editing/htmlediting.h" 38 #include "core/frame/FrameView.h" 39 #include "core/html/HTMLElement.h" 40 #include "core/html/HTMLTextFormControlElement.h" 41 #include "core/rendering/InlineTextBox.h" 42 #include "core/rendering/RenderImage.h" 43 #include "core/rendering/RenderTableCell.h" 44 #include "core/rendering/RenderTableRow.h" 45 #include "core/rendering/RenderTextControl.h" 46 #include "core/rendering/RenderTextFragment.h" 47 #include "platform/fonts/Character.h" 48 #include "platform/fonts/Font.h" 49 #include "platform/text/TextBoundaries.h" 50 #include "platform/text/TextBreakIteratorInternalICU.h" 51 #include "platform/text/UnicodeUtilities.h" 52 #include "wtf/text/CString.h" 53 #include "wtf/text/StringBuilder.h" 54 #include "wtf/unicode/CharacterNames.h" 55 #include <unicode/usearch.h> 56 #include <unicode/utf16.h> 57 58 using namespace WTF::Unicode; 59 60 namespace blink { 61 62 using namespace HTMLNames; 63 64 // Buffer that knows how to compare with a search target. 65 // Keeps enough of the previous text to be able to search in the future, but no more. 66 // Non-breaking spaces are always equal to normal spaces. 67 // Case folding is also done if the CaseInsensitive option is specified. 68 // Matches are further filtered if the AtWordStarts option is specified, although some 69 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 70 class SearchBuffer { 71 WTF_MAKE_NONCOPYABLE(SearchBuffer); 72 public: 73 SearchBuffer(const String& target, FindOptions); 74 ~SearchBuffer(); 75 76 // Returns number of characters appended; guaranteed to be in the range [1, length]. 77 template<typename CharType> 78 void append(const CharType*, size_t length); 79 size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; } 80 81 bool needsMoreContext() const; 82 void prependContext(const UChar*, size_t length); 83 void reachedBreak(); 84 85 // Result is the size in characters of what was found. 86 // And <startOffset> is the number of characters back to the start of what was found. 87 size_t search(size_t& startOffset); 88 bool atBreak() const; 89 90 private: 91 bool isBadMatch(const UChar*, size_t length) const; 92 bool isWordStartMatch(size_t start, size_t length) const; 93 94 Vector<UChar> m_target; 95 FindOptions m_options; 96 97 Vector<UChar> m_buffer; 98 size_t m_overlap; 99 size_t m_prefixLength; 100 size_t m_numberOfCharactersJustAppended; 101 bool m_atBreak; 102 bool m_needsMoreContext; 103 104 bool m_targetRequiresKanaWorkaround; 105 Vector<UChar> m_normalizedTarget; 106 mutable Vector<UChar> m_normalizedMatch; 107 }; 108 109 // -------- 110 111 static const unsigned bitsInWord = sizeof(unsigned) * 8; 112 static const unsigned bitInWordMask = bitsInWord - 1; 113 114 BitStack::BitStack() 115 : m_size(0) 116 { 117 } 118 119 BitStack::~BitStack() 120 { 121 } 122 123 void BitStack::push(bool bit) 124 { 125 unsigned index = m_size / bitsInWord; 126 unsigned shift = m_size & bitInWordMask; 127 if (!shift && index == m_words.size()) { 128 m_words.grow(index + 1); 129 m_words[index] = 0; 130 } 131 unsigned& word = m_words[index]; 132 unsigned mask = 1U << shift; 133 if (bit) 134 word |= mask; 135 else 136 word &= ~mask; 137 ++m_size; 138 } 139 140 void BitStack::pop() 141 { 142 if (m_size) 143 --m_size; 144 } 145 146 bool BitStack::top() const 147 { 148 if (!m_size) 149 return false; 150 unsigned shift = (m_size - 1) & bitInWordMask; 151 return m_words.last() & (1U << shift); 152 } 153 154 unsigned BitStack::size() const 155 { 156 return m_size; 157 } 158 159 // -------- 160 161 #if ENABLE(ASSERT) 162 163 static unsigned depthCrossingShadowBoundaries(Node* node) 164 { 165 unsigned depth = 0; 166 for (ContainerNode* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 167 ++depth; 168 return depth; 169 } 170 171 #endif 172 173 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 174 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) 175 { 176 if (!rangeEndContainer) 177 return 0; 178 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { 179 if (Node* next = NodeTraversal::childAt(*rangeEndContainer, rangeEndOffset)) 180 return next; 181 } 182 for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) { 183 if (Node* next = node->nextSibling()) 184 return next; 185 } 186 return 0; 187 } 188 189 // -------- 190 191 static inline bool fullyClipsContents(Node* node) 192 { 193 RenderObject* renderer = node->renderer(); 194 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 195 return false; 196 return toRenderBox(renderer)->size().isEmpty(); 197 } 198 199 static inline bool ignoresContainerClip(Node* node) 200 { 201 RenderObject* renderer = node->renderer(); 202 if (!renderer || renderer->isText()) 203 return false; 204 return renderer->style()->hasOutOfFlowPosition(); 205 } 206 207 static void pushFullyClippedState(BitStack& stack, Node* node) 208 { 209 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 210 211 // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364> 212 // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if 213 // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches 214 // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics 215 // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox). 216 // 217 // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering 218 // primitives, not with DOM primitives. 219 220 // Push true if this node full clips its contents, or if a parent already has fully 221 // clipped and this is not a node that ignores its container's clip. 222 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 223 } 224 225 static void setUpFullyClippedStack(BitStack& stack, Node* node) 226 { 227 // Put the nodes in a vector so we can iterate in reverse order. 228 WillBeHeapVector<RawPtrWillBeMember<ContainerNode>, 100> ancestry; 229 for (ContainerNode* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 230 ancestry.append(parent); 231 232 // Call pushFullyClippedState on each node starting with the earliest ancestor. 233 size_t size = ancestry.size(); 234 for (size_t i = 0; i < size; ++i) 235 pushFullyClippedState(stack, ancestry[size - i - 1]); 236 pushFullyClippedState(stack, node); 237 238 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 239 } 240 241 // -------- 242 243 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior) 244 : m_startContainer(nullptr) 245 , m_startOffset(0) 246 , m_endContainer(nullptr) 247 , m_endOffset(0) 248 , m_positionNode(nullptr) 249 , m_textLength(0) 250 , m_needsAnotherNewline(false) 251 , m_textBox(0) 252 , m_remainingTextBox(0) 253 , m_firstLetterText(nullptr) 254 , m_lastTextNode(nullptr) 255 , m_lastTextNodeEndedWithCollapsedSpace(false) 256 , m_lastCharacter(0) 257 , m_sortedTextBoxesPosition(0) 258 , m_hasEmitted(false) 259 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 260 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 261 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) 262 , m_handledFirstLetter(false) 263 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 264 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 265 , m_shouldStop(false) 266 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) 267 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots) 268 , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter) 269 { 270 if (range) 271 initialize(range->startPosition(), range->endPosition()); 272 } 273 274 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 275 : m_startContainer(nullptr) 276 , m_startOffset(0) 277 , m_endContainer(nullptr) 278 , m_endOffset(0) 279 , m_positionNode(nullptr) 280 , m_textLength(0) 281 , m_needsAnotherNewline(false) 282 , m_textBox(0) 283 , m_remainingTextBox(0) 284 , m_firstLetterText(nullptr) 285 , m_lastTextNode(nullptr) 286 , m_lastTextNodeEndedWithCollapsedSpace(false) 287 , m_lastCharacter(0) 288 , m_sortedTextBoxesPosition(0) 289 , m_hasEmitted(false) 290 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 291 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 292 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) 293 , m_handledFirstLetter(false) 294 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 295 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 296 , m_shouldStop(false) 297 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) 298 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots) 299 , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter) 300 { 301 initialize(start, end); 302 } 303 304 void TextIterator::initialize(const Position& start, const Position& end) 305 { 306 ASSERT(comparePositions(start, end) <= 0); 307 308 // Get and validate |start| and |end|. 309 Node* startContainer = start.containerNode(); 310 if (!startContainer) 311 return; 312 int startOffset = start.computeOffsetInContainerNode(); 313 Node* endContainer = end.containerNode(); 314 if (!endContainer) 315 return; 316 int endOffset = end.computeOffsetInContainerNode(); 317 318 // Remember the range - this does not change. 319 m_startContainer = startContainer; 320 m_startOffset = startOffset; 321 m_endContainer = endContainer; 322 m_endOffset = endOffset; 323 324 // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from 325 // the common ancestor tree scope. 326 const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope()); 327 ASSERT(commonAncestorTreeScope); 328 m_shadowDepth = 0; 329 for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope()) 330 ++m_shadowDepth; 331 332 // Set up the current node for processing. 333 if (startContainer->offsetInCharacters()) 334 m_node = startContainer; 335 else if (Node* child = NodeTraversal::childAt(*startContainer, startOffset)) 336 m_node = child; 337 else if (!startOffset) 338 m_node = startContainer; 339 else 340 m_node = NodeTraversal::nextSkippingChildren(*startContainer); 341 342 if (!m_node) 343 return; 344 345 m_node->document().updateLayoutIgnorePendingStylesheets(); 346 347 setUpFullyClippedStack(m_fullyClippedStack, m_node); 348 m_offset = m_node == m_startContainer ? m_startOffset : 0; 349 m_iterationProgress = HandledNone; 350 351 // Calculate first out of bounds node. 352 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); 353 354 // Identify the first run. 355 advance(); 356 } 357 358 TextIterator::~TextIterator() 359 { 360 } 361 362 bool TextIterator::isInsideReplacedElement() const 363 { 364 if (atEnd() || length() != 1 || !m_node) 365 return false; 366 367 RenderObject* renderer = m_node->renderer(); 368 return renderer && renderer->isReplaced(); 369 } 370 371 void TextIterator::advance() 372 { 373 if (m_shouldStop) 374 return; 375 376 ASSERT(!m_node || !m_node->document().needsRenderTreeUpdate()); 377 378 // reset the run information 379 m_positionNode = nullptr; 380 m_textLength = 0; 381 382 // handle remembered node that needed a newline after the text node's newline 383 if (m_needsAnotherNewline) { 384 // Emit the extra newline, and position it *inside* m_node, after m_node's 385 // contents, in case it's a block, in the same way that we position the first 386 // newline. The range for the emitted newline should start where the line 387 // break begins. 388 // FIXME: It would be cleaner if we emitted two newlines during the last 389 // iteration, instead of using m_needsAnotherNewline. 390 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get(); 391 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 392 m_needsAnotherNewline = false; 393 return; 394 } 395 396 if (!m_textBox && m_remainingTextBox) { 397 m_textBox = m_remainingTextBox; 398 m_remainingTextBox = 0; 399 m_firstLetterText = nullptr; 400 m_offset = 0; 401 } 402 // handle remembered text box 403 if (m_textBox) { 404 handleTextBox(); 405 if (m_positionNode) 406 return; 407 } 408 409 while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) { 410 if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) 411 m_shouldStop = true; 412 413 // if the range ends at offset 0 of an element, represent the 414 // position, but not the content, of that element e.g. if the 415 // node is a blockflow element, emit a newline that 416 // precedes the element 417 if (m_node == m_endContainer && !m_endOffset) { 418 representNodeOffsetZero(); 419 m_node = nullptr; 420 return; 421 } 422 423 RenderObject* renderer = m_node->renderer(); 424 if (!renderer) { 425 if (m_node->isShadowRoot()) { 426 // A shadow root doesn't have a renderer, but we want to visit children anyway. 427 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress; 428 } else { 429 m_iterationProgress = HandledChildren; 430 } 431 } else { 432 // Enter author shadow roots, from youngest, if any and if necessary. 433 if (m_iterationProgress < HandledAuthorShadowRoots) { 434 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) { 435 ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot(); 436 ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot); 437 m_node = youngestShadowRoot; 438 m_iterationProgress = HandledNone; 439 ++m_shadowDepth; 440 pushFullyClippedState(m_fullyClippedStack, m_node); 441 continue; 442 } 443 444 m_iterationProgress = HandledAuthorShadowRoots; 445 } 446 447 // Enter user-agent shadow root, if necessary. 448 if (m_iterationProgress < HandledUserAgentShadowRoot) { 449 if (m_entersTextControls && renderer->isTextControl()) { 450 ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot(); 451 ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot); 452 m_node = userAgentShadowRoot; 453 m_iterationProgress = HandledNone; 454 ++m_shadowDepth; 455 pushFullyClippedState(m_fullyClippedStack, m_node); 456 continue; 457 } 458 m_iterationProgress = HandledUserAgentShadowRoot; 459 } 460 461 // Handle the current node according to its type. 462 if (m_iterationProgress < HandledNode) { 463 bool handledNode = false; 464 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE? 465 handledNode = handleTextNode(); 466 } else if (renderer && (renderer->isImage() || renderer->isWidget() 467 || (m_node && m_node->isHTMLElement() 468 && (isHTMLFormControlElement(toHTMLElement(*m_node)) 469 || isHTMLLegendElement(toHTMLElement(*m_node)) 470 || isHTMLMeterElement(toHTMLElement(*m_node)) 471 || isHTMLProgressElement(toHTMLElement(*m_node)))))) { 472 handledNode = handleReplacedElement(); 473 } else { 474 handledNode = handleNonTextNode(); 475 } 476 if (handledNode) 477 m_iterationProgress = HandledNode; 478 if (m_positionNode) 479 return; 480 } 481 } 482 483 // Find a new current node to handle in depth-first manner, 484 // calling exitNode() as we come back thru a parent node. 485 // 486 // 1. Iterate over child nodes, if we haven't done yet. 487 Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0; 488 m_offset = 0; 489 if (!next) { 490 // 2. If we've already iterated children or they are not available, go to the next sibling node. 491 next = m_node->nextSibling(); 492 if (!next) { 493 // 3. If we are at the last child, go up the node tree until we find a next sibling. 494 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode; 495 ContainerNode* parentNode = m_node->parentNode(); 496 while (!next && parentNode) { 497 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 498 return; 499 bool haveRenderer = m_node->renderer(); 500 m_node = parentNode; 501 m_fullyClippedStack.pop(); 502 parentNode = m_node->parentNode(); 503 if (haveRenderer) 504 exitNode(); 505 if (m_positionNode) { 506 m_iterationProgress = HandledChildren; 507 return; 508 } 509 next = m_node->nextSibling(); 510 } 511 512 if (!next && !parentNode && m_shadowDepth > 0) { 513 // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next 514 // sibling shadow root, if any. 515 ShadowRoot* shadowRoot = toShadowRoot(m_node); 516 if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) { 517 ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot(); 518 if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) { 519 m_fullyClippedStack.pop(); 520 m_node = nextShadowRoot; 521 m_iterationProgress = HandledNone; 522 // m_shadowDepth is unchanged since we exit from a shadow root and enter another. 523 pushFullyClippedState(m_fullyClippedStack, m_node); 524 } else { 525 // We are the last shadow root; exit from here and go back to where we were. 526 m_node = shadowRoot->host(); 527 m_iterationProgress = HandledAuthorShadowRoots; 528 --m_shadowDepth; 529 m_fullyClippedStack.pop(); 530 } 531 } else { 532 // If we are in a user-agent shadow root, then go back to the host. 533 ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot); 534 m_node = shadowRoot->host(); 535 m_iterationProgress = HandledUserAgentShadowRoot; 536 --m_shadowDepth; 537 m_fullyClippedStack.pop(); 538 } 539 m_handledFirstLetter = false; 540 m_firstLetterText = nullptr; 541 continue; 542 } 543 } 544 m_fullyClippedStack.pop(); 545 } 546 547 // set the new current node 548 m_node = next; 549 if (m_node) 550 pushFullyClippedState(m_fullyClippedStack, m_node); 551 m_iterationProgress = HandledNone; 552 m_handledFirstLetter = false; 553 m_firstLetterText = nullptr; 554 555 // how would this ever be? 556 if (m_positionNode) 557 return; 558 } 559 } 560 561 UChar TextIterator::characterAt(unsigned index) const 562 { 563 ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length())); 564 if (!(index < static_cast<unsigned>(length()))) 565 return 0; 566 567 if (m_singleCharacterBuffer) { 568 ASSERT(!index); 569 ASSERT(length() == 1); 570 return m_singleCharacterBuffer; 571 } 572 573 return string()[positionStartOffset() + index]; 574 } 575 576 String TextIterator::substring(unsigned position, unsigned length) const 577 { 578 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length())); 579 ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length())); 580 if (!length) 581 return emptyString(); 582 if (m_singleCharacterBuffer) { 583 ASSERT(!position); 584 ASSERT(length == 1); 585 return String(&m_singleCharacterBuffer, 1); 586 } 587 return string().substring(positionStartOffset() + position, length); 588 } 589 590 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const 591 { 592 unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength); 593 if (!lengthToAppend) 594 return; 595 if (m_singleCharacterBuffer) { 596 ASSERT(!position); 597 builder.append(m_singleCharacterBuffer); 598 } else { 599 builder.append(string(), positionStartOffset() + position, lengthToAppend); 600 } 601 } 602 603 bool TextIterator::handleTextNode() 604 { 605 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) 606 return false; 607 608 Text* textNode = toText(m_node); 609 RenderText* renderer = textNode->renderer(); 610 611 m_lastTextNode = textNode; 612 String str = renderer->text(); 613 614 // handle pre-formatted text 615 if (!renderer->style()->collapseWhiteSpace()) { 616 int runStart = m_offset; 617 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 618 emitCharacter(space, textNode, 0, runStart, runStart); 619 return false; 620 } 621 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) { 622 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 623 if (m_firstLetterText) { 624 String firstLetter = m_firstLetterText->text(); 625 emitText(textNode, m_firstLetterText, m_offset, m_offset + firstLetter.length()); 626 m_firstLetterText = nullptr; 627 m_textBox = 0; 628 return false; 629 } 630 } 631 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 632 return false; 633 int strLength = str.length(); 634 int end = (textNode == m_endContainer) ? m_endOffset : INT_MAX; 635 int runEnd = std::min(strLength, end); 636 637 if (runStart >= runEnd) 638 return true; 639 640 emitText(textNode, textNode->renderer(), runStart, runEnd); 641 return true; 642 } 643 644 if (renderer->firstTextBox()) 645 m_textBox = renderer->firstTextBox(); 646 647 bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset; 648 if (shouldHandleFirstLetter) 649 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 650 651 if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) { 652 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 653 return false; 654 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 655 return true; 656 } 657 658 if (m_firstLetterText) 659 renderer = m_firstLetterText; 660 661 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 662 if (renderer->containsReversedText()) { 663 m_sortedTextBoxes.clear(); 664 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { 665 m_sortedTextBoxes.append(textBox); 666 } 667 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 668 m_sortedTextBoxesPosition = 0; 669 m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]; 670 } 671 672 handleTextBox(); 673 return true; 674 } 675 676 void TextIterator::handleTextBox() 677 { 678 RenderText* renderer = m_firstLetterText ? m_firstLetterText.get() : toRenderText(m_node->renderer()); 679 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) { 680 m_textBox = 0; 681 return; 682 } 683 String str = renderer->text(); 684 unsigned start = m_offset; 685 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX; 686 while (m_textBox) { 687 unsigned textBoxStart = m_textBox->start(); 688 unsigned runStart = std::max(textBoxStart, start); 689 690 // Check for collapsed space at the start of this run. 691 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 692 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace 693 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); 694 if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) { 695 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { 696 unsigned spaceRunStart = runStart - 1; 697 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') 698 --spaceRunStart; 699 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); 700 } else { 701 emitCharacter(space, m_node, 0, runStart, runStart); 702 } 703 return; 704 } 705 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 706 unsigned runEnd = std::min(textBoxEnd, end); 707 708 // Determine what the next text box will be, but don't advance yet 709 InlineTextBox* nextTextBox = 0; 710 if (renderer->containsReversedText()) { 711 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 712 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 713 } else { 714 nextTextBox = m_textBox->nextTextBox(); 715 } 716 ASSERT(!nextTextBox || nextTextBox->renderer() == renderer); 717 718 if (runStart < runEnd) { 719 // Handle either a single newline character (which becomes a space), 720 // or a run of characters that does not include a newline. 721 // This effectively translates newlines to spaces without copying the text. 722 if (str[runStart] == '\n') { 723 emitCharacter(space, m_node, 0, runStart, runStart + 1); 724 m_offset = runStart + 1; 725 } else { 726 size_t subrunEnd = str.find('\n', runStart); 727 if (subrunEnd == kNotFound || subrunEnd > runEnd) 728 subrunEnd = runEnd; 729 730 m_offset = subrunEnd; 731 emitText(m_node, renderer, runStart, subrunEnd); 732 } 733 734 // If we are doing a subrun that doesn't go to the end of the text box, 735 // come back again to finish handling this text box; don't advance to the next one. 736 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 737 return; 738 739 // Advance and return 740 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); 741 if (nextRunStart > runEnd) 742 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 743 m_textBox = nextTextBox; 744 if (renderer->containsReversedText()) 745 ++m_sortedTextBoxesPosition; 746 return; 747 } 748 // Advance and continue 749 m_textBox = nextTextBox; 750 if (renderer->containsReversedText()) 751 ++m_sortedTextBoxesPosition; 752 } 753 if (!m_textBox && m_remainingTextBox) { 754 m_textBox = m_remainingTextBox; 755 m_remainingTextBox = 0; 756 m_firstLetterText = nullptr; 757 m_offset = 0; 758 handleTextBox(); 759 } 760 } 761 762 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter) 763 { 764 if (!firstLetter) 765 return 0; 766 767 // FIXME: Should this check descendent objects? 768 for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) { 769 if (current->isText()) 770 return toRenderText(current); 771 } 772 return 0; 773 } 774 775 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) 776 { 777 if (renderer->firstLetter()) { 778 RenderBoxModelObject* r = renderer->firstLetter(); 779 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 780 return; 781 if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) { 782 m_handledFirstLetter = true; 783 m_remainingTextBox = m_textBox; 784 m_textBox = firstLetter->firstTextBox(); 785 m_sortedTextBoxes.clear(); 786 m_firstLetterText = firstLetter; 787 } 788 } 789 m_handledFirstLetter = true; 790 } 791 792 bool TextIterator::handleReplacedElement() 793 { 794 if (m_fullyClippedStack.top()) 795 return false; 796 797 RenderObject* renderer = m_node->renderer(); 798 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 799 return false; 800 801 if (m_emitsObjectReplacementCharacter) { 802 emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1); 803 return true; 804 } 805 806 if (m_lastTextNodeEndedWithCollapsedSpace) { 807 emitCharacter(space, m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 808 return false; 809 } 810 811 if (m_entersTextControls && renderer->isTextControl()) { 812 // The shadow tree should be already visited. 813 return true; 814 } 815 816 m_hasEmitted = true; 817 818 if (m_emitsCharactersBetweenAllVisiblePositions) { 819 // We want replaced elements to behave like punctuation for boundary 820 // finding, and to simply take up space for the selection preservation 821 // code in moveParagraphs, so we use a comma. 822 emitCharacter(',', m_node->parentNode(), m_node, 0, 1); 823 return true; 824 } 825 826 m_positionNode = m_node->parentNode(); 827 m_positionOffsetBaseNode = m_node; 828 m_positionStartOffset = 0; 829 m_positionEndOffset = 1; 830 m_singleCharacterBuffer = 0; 831 832 if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) { 833 m_text = toRenderImage(renderer)->altText(); 834 if (!m_text.isEmpty()) { 835 m_textLength = m_text.length(); 836 m_lastCharacter = m_text[m_textLength - 1]; 837 return true; 838 } 839 } 840 841 m_textLength = 0; 842 m_lastCharacter = 0; 843 844 return true; 845 } 846 847 bool TextIterator::hasVisibleTextNode(RenderText* renderer) 848 { 849 if (renderer->style()->visibility() == VISIBLE) 850 return true; 851 if (renderer->isTextFragment()) { 852 RenderTextFragment* fragment = toRenderTextFragment(renderer); 853 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE) 854 return true; 855 } 856 return false; 857 } 858 859 static bool shouldEmitTabBeforeNode(Node* node) 860 { 861 RenderObject* r = node->renderer(); 862 863 // Table cells are delimited by tabs. 864 if (!r || !isTableCell(node)) 865 return false; 866 867 // Want a tab before every cell other than the first one 868 RenderTableCell* rc = toRenderTableCell(r); 869 RenderTable* t = rc->table(); 870 return t && (t->cellBefore(rc) || t->cellAbove(rc)); 871 } 872 873 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText) 874 { 875 RenderObject* renderer = node->renderer(); 876 877 if (renderer ? !renderer->isBR() : !isHTMLBRElement(node)) 878 return false; 879 return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost())); 880 } 881 882 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node) 883 { 884 // Block flow (versus inline flow) is represented by having 885 // a newline both before and after the element. 886 RenderObject* r = node.renderer(); 887 if (!r) { 888 return (node.hasTagName(blockquoteTag) 889 || node.hasTagName(ddTag) 890 || node.hasTagName(divTag) 891 || node.hasTagName(dlTag) 892 || node.hasTagName(dtTag) 893 || node.hasTagName(h1Tag) 894 || node.hasTagName(h2Tag) 895 || node.hasTagName(h3Tag) 896 || node.hasTagName(h4Tag) 897 || node.hasTagName(h5Tag) 898 || node.hasTagName(h6Tag) 899 || node.hasTagName(hrTag) 900 || node.hasTagName(liTag) 901 || node.hasTagName(listingTag) 902 || node.hasTagName(olTag) 903 || node.hasTagName(pTag) 904 || node.hasTagName(preTag) 905 || node.hasTagName(trTag) 906 || node.hasTagName(ulTag)); 907 } 908 909 // Need to make an exception for option and optgroup, because we want to 910 // keep the legacy behavior before we added renderers to them. 911 if (isHTMLOptionElement(node) || isHTMLOptGroupElement(node)) 912 return false; 913 914 // Need to make an exception for table cells, because they are blocks, but we 915 // want them tab-delimited rather than having newlines before and after. 916 if (isTableCell(&node)) 917 return false; 918 919 // Need to make an exception for table row elements, because they are neither 920 // "inline" or "RenderBlock", but we want newlines for them. 921 if (r->isTableRow()) { 922 RenderTable* t = toRenderTableRow(r)->table(); 923 if (t && !t->isInline()) 924 return true; 925 } 926 927 return !r->isInline() && r->isRenderBlock() 928 && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText(); 929 } 930 931 static bool shouldEmitNewlineAfterNode(Node& node) 932 { 933 // FIXME: It should be better but slower to create a VisiblePosition here. 934 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 935 return false; 936 // Check if this is the very last renderer in the document. 937 // If so, then we should not emit a newline. 938 Node* next = &node; 939 do { 940 next = NodeTraversal::nextSkippingChildren(*next); 941 if (next && next->renderer()) 942 return true; 943 } while (next); 944 return false; 945 } 946 947 static bool shouldEmitNewlineBeforeNode(Node& node) 948 { 949 return shouldEmitNewlinesBeforeAndAfterNode(node); 950 } 951 952 static bool shouldEmitExtraNewlineForNode(Node* node) 953 { 954 // When there is a significant collapsed bottom margin, emit an extra 955 // newline for a more realistic result. We end up getting the right 956 // result even without margin collapsing. For example: <div><p>text</p></div> 957 // will work right even if both the <div> and the <p> have bottom margins. 958 RenderObject* r = node->renderer(); 959 if (!r || !r->isBox()) 960 return false; 961 962 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears 963 // not to do this at all 964 if (node->hasTagName(h1Tag) 965 || node->hasTagName(h2Tag) 966 || node->hasTagName(h3Tag) 967 || node->hasTagName(h4Tag) 968 || node->hasTagName(h5Tag) 969 || node->hasTagName(h6Tag) 970 || node->hasTagName(pTag)) { 971 RenderStyle* style = r->style(); 972 if (style) { 973 int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); 974 int fontSize = style->fontDescription().computedPixelSize(); 975 if (bottomMargin * 2 >= fontSize) 976 return true; 977 } 978 } 979 980 return false; 981 } 982 983 static int collapsedSpaceLength(RenderText* renderer, int textEnd) 984 { 985 const String& text = renderer->text(); 986 int length = text.length(); 987 for (int i = textEnd; i < length; ++i) { 988 if (!renderer->style()->isCollapsibleWhiteSpace(text[i])) 989 return i - textEnd; 990 } 991 992 return length - textEnd; 993 } 994 995 static int maxOffsetIncludingCollapsedSpaces(Node* node) 996 { 997 int offset = caretMaxOffset(node); 998 999 if (node->renderer() && node->renderer()->isText()) 1000 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); 1001 1002 return offset; 1003 } 1004 1005 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 1006 bool TextIterator::shouldRepresentNodeOffsetZero() 1007 { 1008 if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTableElement(m_node)) 1009 return true; 1010 1011 // Leave element positioned flush with start of a paragraph 1012 // (e.g. do not insert tab before a table cell at the start of a paragraph) 1013 if (m_lastCharacter == '\n') 1014 return false; 1015 1016 // Otherwise, show the position if we have emitted any characters 1017 if (m_hasEmitted) 1018 return true; 1019 1020 // We've not emitted anything yet. Generally, there is no need for any positioning then. 1021 // The only exception is when the element is visually not in the same line as 1022 // the start of the range (e.g. the range starts at the end of the previous paragraph). 1023 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 1024 // make quicker checks to possibly avoid that. Another check that we could make is 1025 // is whether the inline vs block flow changed since the previous visible element. 1026 // I think we're already in a special enough case that that won't be needed, tho. 1027 1028 // No character needed if this is the first node in the range. 1029 if (m_node == m_startContainer) 1030 return false; 1031 1032 // If we are outside the start container's subtree, assume we need to emit. 1033 // FIXME: m_startContainer could be an inline block 1034 if (!m_node->isDescendantOf(m_startContainer)) 1035 return true; 1036 1037 // If we started as m_startContainer offset 0 and the current node is a descendant of 1038 // the start container, we already had enough context to correctly decide whether to 1039 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 1040 // so don't second guess that now. 1041 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 1042 // immaterial since we likely would have already emitted something by now. 1043 if (!m_startOffset) 1044 return false; 1045 1046 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 1047 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 1048 // we would create VisiblePositions on every call to this function without this check. 1049 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE 1050 || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node))) 1051 return false; 1052 1053 // The startPos.isNotNull() check is needed because the start could be before the body, 1054 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 1055 // The currPos.isNotNull() check is needed because positions in non-HTML content 1056 // (like SVG) do not have visible positions, and we don't want to emit for them either. 1057 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 1058 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 1059 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 1060 } 1061 1062 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) 1063 { 1064 return isRenderedTableElement(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); 1065 } 1066 1067 void TextIterator::representNodeOffsetZero() 1068 { 1069 // Emit a character to show the positioning of m_node. 1070 1071 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 1072 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 1073 // on m_node to see if it necessitates emitting a character first and will early return 1074 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 1075 if (shouldEmitTabBeforeNode(m_node)) { 1076 if (shouldRepresentNodeOffsetZero()) 1077 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); 1078 } else if (shouldEmitNewlineBeforeNode(*m_node)) { 1079 if (shouldRepresentNodeOffsetZero()) 1080 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); 1081 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { 1082 if (shouldRepresentNodeOffsetZero()) 1083 emitCharacter(space, m_node->parentNode(), m_node, 0, 0); 1084 } 1085 } 1086 1087 bool TextIterator::handleNonTextNode() 1088 { 1089 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText)) 1090 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); 1091 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) 1092 emitCharacter(space, m_node->parentNode(), m_node, 0, 1); 1093 else 1094 representNodeOffsetZero(); 1095 1096 return true; 1097 } 1098 1099 void TextIterator::flushPositionOffsets() const 1100 { 1101 if (!m_positionOffsetBaseNode) 1102 return; 1103 int index = m_positionOffsetBaseNode->nodeIndex(); 1104 m_positionStartOffset += index; 1105 m_positionEndOffset += index; 1106 m_positionOffsetBaseNode = nullptr; 1107 } 1108 1109 void TextIterator::exitNode() 1110 { 1111 // prevent emitting a newline when exiting a collapsed block at beginning of the range 1112 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 1113 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 1114 // therefore look like a blank line. 1115 if (!m_hasEmitted) 1116 return; 1117 1118 // Emit with a position *inside* m_node, after m_node's contents, in 1119 // case it is a block, because the run should start where the 1120 // emitted character is positioned visually. 1121 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get(); 1122 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 1123 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 1124 // TextIterator in _web_attributedStringFromRange. 1125 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 1126 if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) { 1127 // use extra newline to represent margin bottom, as needed 1128 bool addNewline = shouldEmitExtraNewlineForNode(m_node); 1129 1130 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 1131 // contain a VisiblePosition when doing selection preservation. 1132 if (m_lastCharacter != '\n') { 1133 // insert a newline with a position following this block's contents. 1134 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 1135 // remember whether to later add a newline for the current node 1136 ASSERT(!m_needsAnotherNewline); 1137 m_needsAnotherNewline = addNewline; 1138 } else if (addNewline) { 1139 // insert a newline with a position following this block's contents. 1140 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 1141 } 1142 } 1143 1144 // If nothing was emitted, see if we need to emit a space. 1145 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) 1146 emitCharacter(space, baseNode->parentNode(), baseNode, 1, 1); 1147 } 1148 1149 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 1150 { 1151 m_hasEmitted = true; 1152 1153 // remember information with which to construct the TextIterator::range() 1154 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode 1155 m_positionNode = textNode; 1156 m_positionOffsetBaseNode = offsetBaseNode; 1157 m_positionStartOffset = textStartOffset; 1158 m_positionEndOffset = textEndOffset; 1159 1160 // remember information with which to construct the TextIterator::characters() and length() 1161 m_singleCharacterBuffer = c; 1162 ASSERT(m_singleCharacterBuffer); 1163 m_textLength = 1; 1164 1165 // remember some iteration state 1166 m_lastTextNodeEndedWithCollapsedSpace = false; 1167 m_lastCharacter = c; 1168 } 1169 1170 void TextIterator::emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset) 1171 { 1172 m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text(); 1173 ASSERT(!m_text.isEmpty()); 1174 ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length())); 1175 ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length())); 1176 ASSERT(textStartOffset <= textEndOffset); 1177 1178 m_positionNode = textNode; 1179 m_positionOffsetBaseNode = nullptr; 1180 m_positionStartOffset = textStartOffset; 1181 m_positionEndOffset = textEndOffset; 1182 m_singleCharacterBuffer = 0; 1183 m_textLength = textEndOffset - textStartOffset; 1184 m_lastCharacter = m_text[textEndOffset - 1]; 1185 1186 m_lastTextNodeEndedWithCollapsedSpace = false; 1187 m_hasEmitted = true; 1188 } 1189 1190 PassRefPtrWillBeRawPtr<Range> TextIterator::createRange() const 1191 { 1192 // use the current run information, if we have it 1193 if (m_positionNode) { 1194 flushPositionOffsets(); 1195 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1196 } 1197 1198 // otherwise, return the end of the overall range we were given 1199 if (m_endContainer) 1200 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); 1201 1202 return nullptr; 1203 } 1204 1205 Document* TextIterator::ownerDocument() const 1206 { 1207 if (m_positionNode) 1208 return &m_positionNode->document(); 1209 if (m_endContainer) 1210 return &m_endContainer->document(); 1211 return 0; 1212 } 1213 1214 Node* TextIterator::node() const 1215 { 1216 if (m_positionNode || m_endContainer) { 1217 Node* node = startContainer(); 1218 if (node->offsetInCharacters()) 1219 return node; 1220 return NodeTraversal::childAt(*node, startOffset()); 1221 } 1222 return 0; 1223 } 1224 1225 int TextIterator::startOffset() const 1226 { 1227 if (m_positionNode) { 1228 flushPositionOffsets(); 1229 return m_positionStartOffset; 1230 } 1231 ASSERT(m_endContainer); 1232 return m_endOffset; 1233 } 1234 1235 int TextIterator::endOffset() const 1236 { 1237 if (m_positionNode) { 1238 flushPositionOffsets(); 1239 return m_positionEndOffset; 1240 } 1241 ASSERT(m_endContainer); 1242 return m_endOffset; 1243 } 1244 1245 Node* TextIterator::startContainer() const 1246 { 1247 if (m_positionNode) { 1248 return m_positionNode; 1249 } 1250 ASSERT(m_endContainer); 1251 return m_endContainer; 1252 } 1253 1254 Node* TextIterator::endContainer() const 1255 { 1256 return startContainer(); 1257 } 1258 1259 Position TextIterator::startPosition() const 1260 { 1261 return createLegacyEditingPosition(startContainer(), startOffset()); 1262 } 1263 1264 Position TextIterator::endPosition() const 1265 { 1266 return createLegacyEditingPosition(endContainer(), endOffset()); 1267 } 1268 1269 // -------- 1270 1271 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior) 1272 : m_node(nullptr) 1273 , m_offset(0) 1274 , m_handledNode(false) 1275 , m_handledChildren(false) 1276 , m_startNode(nullptr) 1277 , m_startOffset(0) 1278 , m_endNode(nullptr) 1279 , m_endOffset(0) 1280 , m_positionNode(nullptr) 1281 , m_positionStartOffset(0) 1282 , m_positionEndOffset(0) 1283 , m_textOffset(0) 1284 , m_textLength(0) 1285 , m_lastTextNode(nullptr) 1286 , m_lastCharacter(0) 1287 , m_singleCharacterBuffer(0) 1288 , m_havePassedStartNode(false) 1289 , m_shouldHandleFirstLetter(false) 1290 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1291 , m_shouldStop(false) 1292 , m_emitsOriginalText(false) 1293 { 1294 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); 1295 1296 if (!r) 1297 return; 1298 1299 Node* startNode = r->startContainer(); 1300 if (!startNode) 1301 return; 1302 Node* endNode = r->endContainer(); 1303 int startOffset = r->startOffset(); 1304 int endOffset = r->endOffset(); 1305 1306 init(startNode, endNode, startOffset, endOffset); 1307 } 1308 1309 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 1310 : m_node(nullptr) 1311 , m_offset(0) 1312 , m_handledNode(false) 1313 , m_handledChildren(false) 1314 , m_startNode(nullptr) 1315 , m_startOffset(0) 1316 , m_endNode(nullptr) 1317 , m_endOffset(0) 1318 , m_positionNode(nullptr) 1319 , m_positionStartOffset(0) 1320 , m_positionEndOffset(0) 1321 , m_textOffset(0) 1322 , m_textLength(0) 1323 , m_lastTextNode(nullptr) 1324 , m_lastCharacter(0) 1325 , m_singleCharacterBuffer(0) 1326 , m_havePassedStartNode(false) 1327 , m_shouldHandleFirstLetter(false) 1328 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1329 , m_shouldStop(false) 1330 , m_emitsOriginalText(false) 1331 { 1332 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); 1333 1334 Node* startNode = start.deprecatedNode(); 1335 if (!startNode) 1336 return; 1337 Node* endNode = end.deprecatedNode(); 1338 int startOffset = start.deprecatedEditingOffset(); 1339 int endOffset = end.deprecatedEditingOffset(); 1340 1341 init(startNode, endNode, startOffset, endOffset); 1342 } 1343 1344 void SimplifiedBackwardsTextIterator::init(Node* startNode, Node* endNode, int startOffset, int endOffset) 1345 { 1346 if (!startNode->offsetInCharacters() && startOffset >= 0) { 1347 // NodeTraversal::childAt() will return 0 if the offset is out of range. We rely on this behavior 1348 // instead of calling countChildren() to avoid traversing the children twice. 1349 if (Node* childAtOffset = NodeTraversal::childAt(*startNode, startOffset)) { 1350 startNode = childAtOffset; 1351 startOffset = 0; 1352 } 1353 } 1354 if (!endNode->offsetInCharacters() && endOffset > 0) { 1355 // NodeTraversal::childAt() will return 0 if the offset is out of range. We rely on this behavior 1356 // instead of calling countChildren() to avoid traversing the children twice. 1357 if (Node* childAtOffset = NodeTraversal::childAt(*endNode, endOffset - 1)) { 1358 endNode = childAtOffset; 1359 endOffset = lastOffsetInNode(endNode); 1360 } 1361 } 1362 1363 m_node = endNode; 1364 setUpFullyClippedStack(m_fullyClippedStack, m_node); 1365 m_offset = endOffset; 1366 m_handledNode = false; 1367 m_handledChildren = !endOffset; 1368 1369 m_startNode = startNode; 1370 m_startOffset = startOffset; 1371 m_endNode = endNode; 1372 m_endOffset = endOffset; 1373 1374 #if ENABLE(ASSERT) 1375 // Need this just because of the assert. 1376 m_positionNode = endNode; 1377 #endif 1378 1379 m_lastTextNode = nullptr; 1380 m_lastCharacter = '\n'; 1381 1382 m_havePassedStartNode = false; 1383 1384 advance(); 1385 } 1386 1387 void SimplifiedBackwardsTextIterator::advance() 1388 { 1389 ASSERT(m_positionNode); 1390 1391 if (m_shouldStop) 1392 return; 1393 1394 if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) { 1395 m_shouldStop = true; 1396 return; 1397 } 1398 1399 m_positionNode = nullptr; 1400 m_textLength = 0; 1401 1402 while (m_node && !m_havePassedStartNode) { 1403 // Don't handle node if we start iterating at [node, 0]. 1404 if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) { 1405 RenderObject* renderer = m_node->renderer(); 1406 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { 1407 // FIXME: What about CDATA_SECTION_NODE? 1408 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1409 m_handledNode = handleTextNode(); 1410 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1411 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1412 m_handledNode = handleReplacedElement(); 1413 } else { 1414 m_handledNode = handleNonTextNode(); 1415 } 1416 if (m_positionNode) 1417 return; 1418 } 1419 1420 if (!m_handledChildren && m_node->hasChildren()) { 1421 m_node = m_node->lastChild(); 1422 pushFullyClippedState(m_fullyClippedStack, m_node); 1423 } else { 1424 // Exit empty containers as we pass over them or containers 1425 // where [container, 0] is where we started iterating. 1426 if (!m_handledNode 1427 && canHaveChildrenForEditing(m_node) 1428 && m_node->parentNode() 1429 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { 1430 exitNode(); 1431 if (m_positionNode) { 1432 m_handledNode = true; 1433 m_handledChildren = true; 1434 return; 1435 } 1436 } 1437 1438 // Exit all other containers. 1439 while (!m_node->previousSibling()) { 1440 if (!advanceRespectingRange(m_node->parentOrShadowHostNode())) 1441 break; 1442 m_fullyClippedStack.pop(); 1443 exitNode(); 1444 if (m_positionNode) { 1445 m_handledNode = true; 1446 m_handledChildren = true; 1447 return; 1448 } 1449 } 1450 1451 m_fullyClippedStack.pop(); 1452 if (advanceRespectingRange(m_node->previousSibling())) 1453 pushFullyClippedState(m_fullyClippedStack, m_node); 1454 else 1455 m_node = nullptr; 1456 } 1457 1458 // For the purpose of word boundary detection, 1459 // we should iterate all visible text and trailing (collapsed) whitespaces. 1460 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; 1461 m_handledNode = false; 1462 m_handledChildren = false; 1463 1464 if (m_positionNode) 1465 return; 1466 } 1467 } 1468 1469 bool SimplifiedBackwardsTextIterator::handleTextNode() 1470 { 1471 m_lastTextNode = toText(m_node); 1472 1473 int startOffset; 1474 int offsetInNode; 1475 RenderText* renderer = handleFirstLetter(startOffset, offsetInNode); 1476 if (!renderer) 1477 return true; 1478 1479 String text = renderer->text(); 1480 if (!renderer->firstTextBox() && text.length() > 0) 1481 return true; 1482 1483 m_positionEndOffset = m_offset; 1484 m_offset = startOffset + offsetInNode; 1485 m_positionNode = m_node; 1486 m_positionStartOffset = m_offset; 1487 1488 ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length())); 1489 ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); 1490 ASSERT(m_positionStartOffset <= m_positionEndOffset); 1491 1492 m_textLength = m_positionEndOffset - m_positionStartOffset; 1493 m_textOffset = m_positionStartOffset - offsetInNode; 1494 m_textContainer = text; 1495 m_singleCharacterBuffer = 0; 1496 RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length()); 1497 1498 m_lastCharacter = text[m_positionEndOffset - 1]; 1499 1500 return !m_shouldHandleFirstLetter; 1501 } 1502 1503 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode) 1504 { 1505 RenderText* renderer = toRenderText(m_node->renderer()); 1506 startOffset = (m_node == m_startNode) ? m_startOffset : 0; 1507 1508 if (!renderer->isTextFragment()) { 1509 offsetInNode = 0; 1510 return renderer; 1511 } 1512 1513 RenderTextFragment* fragment = toRenderTextFragment(renderer); 1514 int offsetAfterFirstLetter = fragment->start(); 1515 if (startOffset >= offsetAfterFirstLetter) { 1516 ASSERT(!m_shouldHandleFirstLetter); 1517 offsetInNode = offsetAfterFirstLetter; 1518 return renderer; 1519 } 1520 1521 if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) { 1522 m_shouldHandleFirstLetter = true; 1523 offsetInNode = offsetAfterFirstLetter; 1524 return renderer; 1525 } 1526 1527 m_shouldHandleFirstLetter = false; 1528 offsetInNode = 0; 1529 RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter()); 1530 1531 m_offset = firstLetterRenderer->caretMaxOffset(); 1532 m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset); 1533 1534 return firstLetterRenderer; 1535 } 1536 1537 bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1538 { 1539 unsigned index = m_node->nodeIndex(); 1540 // We want replaced elements to behave like punctuation for boundary 1541 // finding, and to simply take up space for the selection preservation 1542 // code in moveParagraphs, so we use a comma. Unconditionally emit 1543 // here because this iterator is only used for boundary finding. 1544 emitCharacter(',', m_node->parentNode(), index, index + 1); 1545 return true; 1546 } 1547 1548 bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1549 { 1550 // We can use a linefeed in place of a tab because this simple iterator is only used to 1551 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1552 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) { 1553 unsigned index = m_node->nodeIndex(); 1554 // The start of this emitted range is wrong. Ensuring correctness would require 1555 // VisiblePositions and so would be slow. previousBoundary expects this. 1556 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); 1557 } 1558 return true; 1559 } 1560 1561 void SimplifiedBackwardsTextIterator::exitNode() 1562 { 1563 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) { 1564 // The start of this emitted range is wrong. Ensuring correctness would require 1565 // VisiblePositions and so would be slow. previousBoundary expects this. 1566 emitCharacter('\n', m_node, 0, 0); 1567 } 1568 } 1569 1570 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) 1571 { 1572 m_singleCharacterBuffer = c; 1573 m_positionNode = node; 1574 m_positionStartOffset = startOffset; 1575 m_positionEndOffset = endOffset; 1576 m_textOffset = 0; 1577 m_textLength = 1; 1578 m_lastCharacter = c; 1579 } 1580 1581 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1582 { 1583 if (!next) 1584 return false; 1585 m_havePassedStartNode |= m_node == m_startNode; 1586 if (m_havePassedStartNode) 1587 return false; 1588 m_node = next; 1589 return true; 1590 } 1591 1592 Node* SimplifiedBackwardsTextIterator::startContainer() const 1593 { 1594 if (m_positionNode) 1595 return m_positionNode; 1596 return m_startNode; 1597 } 1598 1599 int SimplifiedBackwardsTextIterator::endOffset() const 1600 { 1601 if (m_positionNode) 1602 return m_positionEndOffset; 1603 return m_startOffset; 1604 } 1605 1606 Position SimplifiedBackwardsTextIterator::startPosition() const 1607 { 1608 if (m_positionNode) 1609 return createLegacyEditingPosition(m_positionNode, m_positionStartOffset); 1610 return createLegacyEditingPosition(m_startNode, m_startOffset); 1611 } 1612 1613 Position SimplifiedBackwardsTextIterator::endPosition() const 1614 { 1615 if (m_positionNode) 1616 return createLegacyEditingPosition(m_positionNode, m_positionEndOffset); 1617 return createLegacyEditingPosition(m_startNode, m_startOffset); 1618 } 1619 1620 // -------- 1621 1622 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior) 1623 : m_offset(0) 1624 , m_runOffset(0) 1625 , m_atBreak(true) 1626 , m_textIterator(range, behavior) 1627 { 1628 initialize(); 1629 } 1630 1631 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 1632 : m_offset(0) 1633 , m_runOffset(0) 1634 , m_atBreak(true) 1635 , m_textIterator(start, end, behavior) 1636 { 1637 initialize(); 1638 } 1639 1640 void CharacterIterator::initialize() 1641 { 1642 while (!atEnd() && !m_textIterator.length()) 1643 m_textIterator.advance(); 1644 } 1645 1646 PassRefPtrWillBeRawPtr<Range> CharacterIterator::createRange() const 1647 { 1648 RefPtrWillBeRawPtr<Range> r = m_textIterator.createRange(); 1649 if (!m_textIterator.atEnd()) { 1650 if (m_textIterator.length() <= 1) { 1651 ASSERT(!m_runOffset); 1652 } else { 1653 Node* n = r->startContainer(); 1654 ASSERT(n == r->endContainer()); 1655 int offset = r->startOffset() + m_runOffset; 1656 r->setStart(n, offset, ASSERT_NO_EXCEPTION); 1657 r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION); 1658 } 1659 } 1660 return r.release(); 1661 } 1662 1663 Document* CharacterIterator::ownerDocument() const 1664 { 1665 return m_textIterator.ownerDocument(); 1666 } 1667 1668 Node* CharacterIterator::startContainer() const 1669 { 1670 return m_textIterator.startContainer(); 1671 } 1672 1673 Node* CharacterIterator::endContainer() const 1674 { 1675 return m_textIterator.endContainer(); 1676 } 1677 1678 int CharacterIterator::startOffset() const 1679 { 1680 if (!m_textIterator.atEnd()) { 1681 if (m_textIterator.length() > 1) 1682 return m_textIterator.startOffset() + m_runOffset; 1683 ASSERT(!m_runOffset); 1684 } 1685 return m_textIterator.startOffset(); 1686 } 1687 1688 int CharacterIterator::endOffset() const 1689 { 1690 if (!m_textIterator.atEnd()) { 1691 if (m_textIterator.length() > 1) 1692 return m_textIterator.startOffset() + m_runOffset + 1; 1693 ASSERT(!m_runOffset); 1694 } 1695 return m_textIterator.endOffset(); 1696 } 1697 1698 Position CharacterIterator::startPosition() const 1699 { 1700 if (!m_textIterator.atEnd()) { 1701 if (m_textIterator.length() > 1) { 1702 Node* n = m_textIterator.startContainer(); 1703 int offset = m_textIterator.startOffset() + m_runOffset; 1704 return createLegacyEditingPosition(n, offset); 1705 } 1706 ASSERT(!m_runOffset); 1707 } 1708 return m_textIterator.startPosition(); 1709 } 1710 1711 Position CharacterIterator::endPosition() const 1712 { 1713 if (!m_textIterator.atEnd()) { 1714 if (m_textIterator.length() > 1) { 1715 Node* n = m_textIterator.startContainer(); 1716 int offset = m_textIterator.startOffset() + m_runOffset; 1717 return createLegacyEditingPosition(n, offset + 1); 1718 } 1719 ASSERT(!m_runOffset); 1720 } 1721 return m_textIterator.endPosition(); 1722 } 1723 1724 void CharacterIterator::advance(int count) 1725 { 1726 if (count <= 0) { 1727 ASSERT(!count); 1728 return; 1729 } 1730 1731 m_atBreak = false; 1732 1733 // easy if there is enough left in the current m_textIterator run 1734 int remaining = m_textIterator.length() - m_runOffset; 1735 if (count < remaining) { 1736 m_runOffset += count; 1737 m_offset += count; 1738 return; 1739 } 1740 1741 // exhaust the current m_textIterator run 1742 count -= remaining; 1743 m_offset += remaining; 1744 1745 // move to a subsequent m_textIterator run 1746 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1747 int runLength = m_textIterator.length(); 1748 if (!runLength) { 1749 m_atBreak = true; 1750 } else { 1751 // see whether this is m_textIterator to use 1752 if (count < runLength) { 1753 m_runOffset = count; 1754 m_offset += count; 1755 return; 1756 } 1757 1758 // exhaust this m_textIterator run 1759 count -= runLength; 1760 m_offset += runLength; 1761 } 1762 } 1763 1764 // ran to the end of the m_textIterator... no more runs left 1765 m_atBreak = true; 1766 m_runOffset = 0; 1767 } 1768 1769 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition) 1770 { 1771 it.advance(offset); 1772 startPosition = it.startPosition(); 1773 1774 if (length > 1) 1775 it.advance(length - 1); 1776 endPosition = it.endPosition(); 1777 } 1778 1779 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior) 1780 : m_offset(0) 1781 , m_runOffset(0) 1782 , m_atBreak(true) 1783 , m_textIterator(range, behavior) 1784 { 1785 while (!atEnd() && !m_textIterator.length()) 1786 m_textIterator.advance(); 1787 } 1788 1789 BackwardsCharacterIterator::BackwardsCharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 1790 : m_offset(0) 1791 , m_runOffset(0) 1792 , m_atBreak(true) 1793 , m_textIterator(start, end, behavior) 1794 { 1795 while (!atEnd() && !m_textIterator.length()) 1796 m_textIterator.advance(); 1797 } 1798 1799 Position BackwardsCharacterIterator::endPosition() const 1800 { 1801 if (!m_textIterator.atEnd()) { 1802 if (m_textIterator.length() > 1) { 1803 Node* n = m_textIterator.startContainer(); 1804 return createLegacyEditingPosition(n, m_textIterator.endOffset() - m_runOffset); 1805 } 1806 ASSERT(!m_runOffset); 1807 } 1808 return m_textIterator.endPosition(); 1809 } 1810 1811 void BackwardsCharacterIterator::advance(int count) 1812 { 1813 if (count <= 0) { 1814 ASSERT(!count); 1815 return; 1816 } 1817 1818 m_atBreak = false; 1819 1820 int remaining = m_textIterator.length() - m_runOffset; 1821 if (count < remaining) { 1822 m_runOffset += count; 1823 m_offset += count; 1824 return; 1825 } 1826 1827 count -= remaining; 1828 m_offset += remaining; 1829 1830 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1831 int runLength = m_textIterator.length(); 1832 if (!runLength) { 1833 m_atBreak = true; 1834 } else { 1835 if (count < runLength) { 1836 m_runOffset = count; 1837 m_offset += count; 1838 return; 1839 } 1840 1841 count -= runLength; 1842 m_offset += runLength; 1843 } 1844 } 1845 1846 m_atBreak = true; 1847 m_runOffset = 0; 1848 } 1849 1850 // -------- 1851 1852 WordAwareIterator::WordAwareIterator(const Position& start, const Position& end) 1853 : m_didLookAhead(true) // So we consider the first chunk from the text iterator. 1854 , m_textIterator(start, end) 1855 { 1856 advance(); // Get in position over the first chunk of text. 1857 } 1858 1859 WordAwareIterator::~WordAwareIterator() 1860 { 1861 } 1862 1863 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1864 1865 void WordAwareIterator::advance() 1866 { 1867 m_buffer.clear(); 1868 1869 // If last time we did a look-ahead, start with that looked-ahead chunk now 1870 if (!m_didLookAhead) { 1871 ASSERT(!m_textIterator.atEnd()); 1872 m_textIterator.advance(); 1873 } 1874 m_didLookAhead = false; 1875 1876 // Go to next non-empty chunk. 1877 while (!m_textIterator.atEnd() && !m_textIterator.length()) 1878 m_textIterator.advance(); 1879 1880 if (m_textIterator.atEnd()) 1881 return; 1882 1883 while (1) { 1884 // If this chunk ends in whitespace we can just use it as our chunk. 1885 if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1))) 1886 return; 1887 1888 // If this is the first chunk that failed, save it in m_buffer before look ahead. 1889 if (m_buffer.isEmpty()) 1890 m_textIterator.appendTextTo(m_buffer); 1891 1892 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1893 m_textIterator.advance(); 1894 if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) { 1895 m_didLookAhead = true; 1896 return; 1897 } 1898 1899 // Start gobbling chunks until we get to a suitable stopping point 1900 m_textIterator.appendTextTo(m_buffer); 1901 } 1902 } 1903 1904 int WordAwareIterator::length() const 1905 { 1906 if (!m_buffer.isEmpty()) 1907 return m_buffer.size(); 1908 return m_textIterator.length(); 1909 } 1910 1911 String WordAwareIterator::substring(unsigned position, unsigned length) const 1912 { 1913 if (!m_buffer.isEmpty()) 1914 return String(m_buffer.data() + position, length); 1915 return m_textIterator.substring(position, length); 1916 } 1917 1918 UChar WordAwareIterator::characterAt(unsigned index) const 1919 { 1920 if (!m_buffer.isEmpty()) 1921 return m_buffer[index]; 1922 return m_textIterator.characterAt(index); 1923 } 1924 1925 // -------- 1926 1927 static const size_t minimumSearchBufferSize = 8192; 1928 1929 #if ENABLE(ASSERT) 1930 static bool searcherInUse; 1931 #endif 1932 1933 static UStringSearch* createSearcher() 1934 { 1935 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1936 // but it doesn't matter exactly what it is, since we don't perform any searches 1937 // without setting both the pattern and the text. 1938 UErrorCode status = U_ZERO_ERROR; 1939 String searchCollatorName = currentSearchLocaleID() + String("@collation=search"); 1940 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status); 1941 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1942 return searcher; 1943 } 1944 1945 static UStringSearch* searcher() 1946 { 1947 static UStringSearch* searcher = createSearcher(); 1948 return searcher; 1949 } 1950 1951 static inline void lockSearcher() 1952 { 1953 #if ENABLE(ASSERT) 1954 ASSERT(!searcherInUse); 1955 searcherInUse = true; 1956 #endif 1957 } 1958 1959 static inline void unlockSearcher() 1960 { 1961 #if ENABLE(ASSERT) 1962 ASSERT(searcherInUse); 1963 searcherInUse = false; 1964 #endif 1965 } 1966 1967 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1968 : m_options(options) 1969 , m_prefixLength(0) 1970 , m_numberOfCharactersJustAppended(0) 1971 , m_atBreak(true) 1972 , m_needsMoreContext(options & AtWordStarts) 1973 , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) 1974 { 1975 ASSERT(!target.isEmpty()); 1976 target.appendTo(m_target); 1977 1978 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1979 // of doing it in a separate replacement pass here, but ICU doesn't offer a way 1980 // to add tailoring on top of the locale-specific tailoring as of this writing. 1981 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); 1982 1983 size_t targetLength = m_target.size(); 1984 m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); 1985 m_overlap = m_buffer.capacity() / 4; 1986 1987 if ((m_options & AtWordStarts) && targetLength) { 1988 UChar32 targetFirstCharacter; 1989 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); 1990 // Characters in the separator category never really occur at the beginning of a word, 1991 // so if the target begins with such a character, we just ignore the AtWordStart option. 1992 if (isSeparator(targetFirstCharacter)) { 1993 m_options &= ~AtWordStarts; 1994 m_needsMoreContext = false; 1995 } 1996 } 1997 1998 // Grab the single global searcher. 1999 // If we ever have a reason to do more than once search buffer at once, we'll have 2000 // to move to multiple searchers. 2001 lockSearcher(); 2002 2003 UStringSearch* searcher = blink::searcher(); 2004 UCollator* collator = usearch_getCollator(searcher); 2005 2006 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; 2007 if (ucol_getStrength(collator) != strength) { 2008 ucol_setStrength(collator, strength); 2009 usearch_reset(searcher); 2010 } 2011 2012 UErrorCode status = U_ZERO_ERROR; 2013 usearch_setPattern(searcher, m_target.data(), targetLength, &status); 2014 ASSERT(status == U_ZERO_ERROR); 2015 2016 // The kana workaround requires a normalized copy of the target string. 2017 if (m_targetRequiresKanaWorkaround) 2018 normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); 2019 } 2020 2021 inline SearchBuffer::~SearchBuffer() 2022 { 2023 // Leave the static object pointing to a valid string. 2024 UErrorCode status = U_ZERO_ERROR; 2025 usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status); 2026 ASSERT(status == U_ZERO_ERROR); 2027 2028 unlockSearcher(); 2029 } 2030 2031 template<typename CharType> 2032 inline void SearchBuffer::append(const CharType* characters, size_t length) 2033 { 2034 ASSERT(length); 2035 2036 if (m_atBreak) { 2037 m_buffer.shrink(0); 2038 m_prefixLength = 0; 2039 m_atBreak = false; 2040 } else if (m_buffer.size() == m_buffer.capacity()) { 2041 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 2042 m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap); 2043 m_buffer.shrink(m_overlap); 2044 } 2045 2046 size_t oldLength = m_buffer.size(); 2047 size_t usableLength = std::min(m_buffer.capacity() - oldLength, length); 2048 ASSERT(usableLength); 2049 m_buffer.resize(oldLength + usableLength); 2050 UChar* destination = m_buffer.data() + oldLength; 2051 StringImpl::copyChars(destination, characters, usableLength); 2052 foldQuoteMarksAndSoftHyphens(destination, usableLength); 2053 m_numberOfCharactersJustAppended = usableLength; 2054 } 2055 2056 inline bool SearchBuffer::needsMoreContext() const 2057 { 2058 return m_needsMoreContext; 2059 } 2060 2061 inline void SearchBuffer::prependContext(const UChar* characters, size_t length) 2062 { 2063 ASSERT(m_needsMoreContext); 2064 ASSERT(m_prefixLength == m_buffer.size()); 2065 2066 if (!length) 2067 return; 2068 2069 m_atBreak = false; 2070 2071 size_t wordBoundaryContextStart = length; 2072 if (wordBoundaryContextStart) { 2073 U16_BACK_1(characters, 0, wordBoundaryContextStart); 2074 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); 2075 } 2076 2077 size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); 2078 m_buffer.prepend(characters + length - usableLength, usableLength); 2079 m_prefixLength += usableLength; 2080 2081 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 2082 m_needsMoreContext = false; 2083 } 2084 2085 inline bool SearchBuffer::atBreak() const 2086 { 2087 return m_atBreak; 2088 } 2089 2090 inline void SearchBuffer::reachedBreak() 2091 { 2092 m_atBreak = true; 2093 } 2094 2095 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 2096 { 2097 // This function implements the kana workaround. If usearch treats 2098 // it as a match, but we do not want to, then it's a "bad match". 2099 if (!m_targetRequiresKanaWorkaround) 2100 return false; 2101 2102 // Normalize into a match buffer. We reuse a single buffer rather than 2103 // creating a new one each time. 2104 normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch); 2105 2106 return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size()); 2107 } 2108 2109 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 2110 { 2111 ASSERT(m_options & AtWordStarts); 2112 2113 if (!start) 2114 return true; 2115 2116 int size = m_buffer.size(); 2117 int offset = start; 2118 UChar32 firstCharacter; 2119 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 2120 2121 if (m_options & TreatMedialCapitalAsWordStart) { 2122 UChar32 previousCharacter; 2123 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 2124 2125 if (isSeparator(firstCharacter)) { 2126 // The start of a separator run is a word start (".org" in "webkit.org"). 2127 if (!isSeparator(previousCharacter)) 2128 return true; 2129 } else if (isASCIIUpper(firstCharacter)) { 2130 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 2131 if (!isASCIIUpper(previousCharacter)) 2132 return true; 2133 // The last character of an uppercase run followed by a non-separator, non-digit 2134 // is a word start ("Request" in "XMLHTTPRequest"). 2135 offset = start; 2136 U16_FWD_1(m_buffer.data(), offset, size); 2137 UChar32 nextCharacter = 0; 2138 if (offset < size) 2139 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 2140 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 2141 return true; 2142 } else if (isASCIIDigit(firstCharacter)) { 2143 // The start of a digit run is a word start ("2" in "WebKit2"). 2144 if (!isASCIIDigit(previousCharacter)) 2145 return true; 2146 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 2147 // The start of a non-separator, non-uppercase, non-digit run is a word start, 2148 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 2149 return true; 2150 } 2151 } 2152 2153 // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes 2154 // a word, so treat the position before any CJK character as a word start. 2155 if (Character::isCJKIdeographOrSymbol(firstCharacter)) 2156 return true; 2157 2158 size_t wordBreakSearchStart = start + length; 2159 while (wordBreakSearchStart > start) 2160 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); 2161 return wordBreakSearchStart == start; 2162 } 2163 2164 inline size_t SearchBuffer::search(size_t& start) 2165 { 2166 size_t size = m_buffer.size(); 2167 if (m_atBreak) { 2168 if (!size) 2169 return 0; 2170 } else { 2171 if (size != m_buffer.capacity()) 2172 return 0; 2173 } 2174 2175 UStringSearch* searcher = blink::searcher(); 2176 2177 UErrorCode status = U_ZERO_ERROR; 2178 usearch_setText(searcher, m_buffer.data(), size, &status); 2179 ASSERT(status == U_ZERO_ERROR); 2180 2181 usearch_setOffset(searcher, m_prefixLength, &status); 2182 ASSERT(status == U_ZERO_ERROR); 2183 2184 int matchStart = usearch_next(searcher, &status); 2185 ASSERT(status == U_ZERO_ERROR); 2186 2187 nextMatch: 2188 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 2189 ASSERT(matchStart == USEARCH_DONE); 2190 return 0; 2191 } 2192 2193 // Matches that start in the overlap area are only tentative. 2194 // The same match may appear later, matching more characters, 2195 // possibly including a combining character that's not yet in the buffer. 2196 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 2197 size_t overlap = m_overlap; 2198 if (m_options & AtWordStarts) { 2199 // Ensure that there is sufficient context before matchStart the next time around for 2200 // determining if it is at a word boundary. 2201 int wordBoundaryContextStart = matchStart; 2202 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 2203 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); 2204 overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart)); 2205 } 2206 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 2207 m_prefixLength -= std::min(m_prefixLength, size - overlap); 2208 m_buffer.shrink(overlap); 2209 return 0; 2210 } 2211 2212 size_t matchedLength = usearch_getMatchedLength(searcher); 2213 ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); 2214 2215 // If this match is "bad", move on to the next match. 2216 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { 2217 matchStart = usearch_next(searcher, &status); 2218 ASSERT(status == U_ZERO_ERROR); 2219 goto nextMatch; 2220 } 2221 2222 size_t newSize = size - (matchStart + 1); 2223 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 2224 m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1); 2225 m_buffer.shrink(newSize); 2226 2227 start = size - matchStart; 2228 return matchedLength; 2229 } 2230 2231 // -------- 2232 2233 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) 2234 { 2235 int length = 0; 2236 TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter; 2237 if (forSelectionPreservation) 2238 behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions; 2239 for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance()) 2240 length += it.length(); 2241 2242 return length; 2243 } 2244 2245 int TextIterator::rangeLength(const Position& start, const Position& end, bool forSelectionPreservation) 2246 { 2247 int length = 0; 2248 TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter; 2249 if (forSelectionPreservation) 2250 behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions; 2251 for (TextIterator it(start, end, behaviorFlags); !it.atEnd(); it.advance()) 2252 length += it.length(); 2253 2254 return length; 2255 } 2256 2257 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 2258 { 2259 CharacterIterator entireRangeIterator(entireRange); 2260 Position start; 2261 Position end; 2262 calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end); 2263 return Range::create(entireRange->ownerDocument(), start, end); 2264 } 2265 2266 void TextIterator::subrange(Position& start, Position& end, int characterOffset, int characterCount) 2267 { 2268 CharacterIterator entireRangeIterator(start, end); 2269 calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end); 2270 } 2271 2272 // -------- 2273 2274 static String createPlainText(TextIterator& it) 2275 { 2276 // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192 2277 static const unsigned initialCapacity = 1 << 15; 2278 2279 unsigned bufferLength = 0; 2280 StringBuilder builder; 2281 builder.reserveCapacity(initialCapacity); 2282 2283 for (; !it.atEnd(); it.advance()) { 2284 it.appendTextToStringBuilder(builder); 2285 bufferLength += it.length(); 2286 } 2287 2288 if (!bufferLength) 2289 return emptyString(); 2290 2291 return builder.toString(); 2292 } 2293 2294 String plainText(const Range* r, TextIteratorBehaviorFlags behavior) 2295 { 2296 TextIterator it(r, behavior); 2297 return createPlainText(it); 2298 } 2299 2300 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 2301 { 2302 TextIterator it(start, end, behavior); 2303 return createPlainText(it); 2304 } 2305 2306 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward) 2307 { 2308 RefPtrWillBeRawPtr<Range> result = range->cloneRange(); 2309 result->collapse(!forward); 2310 return result.release(); 2311 } 2312 2313 // Check if there's any unpaird surrogate code point. 2314 // Non-character code points are not checked. 2315 static bool isValidUTF16(const String& s) 2316 { 2317 if (s.is8Bit()) 2318 return true; 2319 const UChar* ustr = s.characters16(); 2320 size_t length = s.length(); 2321 size_t position = 0; 2322 while (position < length) { 2323 UChar32 character; 2324 U16_NEXT(ustr, position, length, character); 2325 if (U_IS_SURROGATE(character)) 2326 return false; 2327 } 2328 return true; 2329 } 2330 2331 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) 2332 { 2333 matchStart = 0; 2334 size_t matchLength = 0; 2335 2336 if (!isValidUTF16(target)) 2337 return 0; 2338 2339 SearchBuffer buffer(target, options); 2340 2341 if (buffer.needsMoreContext()) { 2342 RefPtrWillBeRawPtr<Range> beforeStartRange = it.ownerDocument()->createRange(); 2343 beforeStartRange->setEnd(it.startContainer(), it.startOffset(), IGNORE_EXCEPTION); 2344 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2345 Vector<UChar, 1024> characters; 2346 backwardsIterator.prependTextTo(characters); 2347 buffer.prependContext(characters.data(), characters.size()); 2348 if (!buffer.needsMoreContext()) 2349 break; 2350 } 2351 } 2352 2353 while (!it.atEnd()) { 2354 it.appendTextTo(buffer); 2355 it.advance(buffer.numberOfCharactersJustAppended()); 2356 tryAgain: 2357 size_t matchStartOffset; 2358 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2359 // Note that we found a match, and where we found it. 2360 size_t lastCharacterInBufferOffset = it.characterOffset(); 2361 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2362 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2363 matchLength = newMatchLength; 2364 // If searching forward, stop on the first match. 2365 // If searching backward, don't stop, so we end up with the last match. 2366 if (!(options & Backwards)) 2367 break; 2368 goto tryAgain; 2369 } 2370 if (it.atBreak() && !buffer.atBreak()) { 2371 buffer.reachedBreak(); 2372 goto tryAgain; 2373 } 2374 } 2375 2376 return matchLength; 2377 } 2378 2379 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots; 2380 2381 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) 2382 { 2383 // First, find the text. 2384 size_t matchStart; 2385 size_t matchLength; 2386 { 2387 CharacterIterator findIterator(range, iteratorFlagsForFindPlainText); 2388 matchLength = findPlainTextInternal(findIterator, target, options, matchStart); 2389 if (!matchLength) 2390 return collapsedToBoundary(range, !(options & Backwards)); 2391 } 2392 2393 // Then, find the document position of the start and the end of the text. 2394 CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText); 2395 Position resultStart; 2396 Position resultEnd; 2397 calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd); 2398 return Range::create(range->ownerDocument(), resultStart, resultEnd); 2399 } 2400 2401 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd) 2402 { 2403 resultStart.clear(); 2404 resultEnd.clear(); 2405 // CharacterIterator requires renderers to be up-to-date. 2406 if (!inputStart.inDocument()) 2407 return; 2408 ASSERT(inputStart.document() == inputEnd.document()); 2409 2410 // FIXME: Reduce the code duplication with above (but how?). 2411 size_t matchStart; 2412 size_t matchLength; 2413 { 2414 CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText); 2415 matchLength = findPlainTextInternal(findIterator, target, options, matchStart); 2416 if (!matchLength) { 2417 const Position& collapseTo = options & Backwards ? inputStart : inputEnd; 2418 resultStart = collapseTo; 2419 resultEnd = collapseTo; 2420 return; 2421 } 2422 } 2423 2424 CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText); 2425 calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd); 2426 } 2427 2428 } 2429