1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/TextIterator.h" 29 30 #include "bindings/v8/ExceptionStatePlaceholder.h" 31 #include "core/HTMLNames.h" 32 #include "core/dom/Document.h" 33 #include "core/dom/NodeTraversal.h" 34 #include "core/dom/shadow/ShadowRoot.h" 35 #include "core/editing/VisiblePosition.h" 36 #include "core/editing/VisibleUnits.h" 37 #include "core/editing/htmlediting.h" 38 #include "core/html/HTMLElement.h" 39 #include "core/html/HTMLTextFormControlElement.h" 40 #include "core/rendering/InlineTextBox.h" 41 #include "core/rendering/RenderImage.h" 42 #include "core/rendering/RenderTableCell.h" 43 #include "core/rendering/RenderTableRow.h" 44 #include "core/rendering/RenderTextControl.h" 45 #include "core/rendering/RenderTextFragment.h" 46 #include "platform/fonts/Character.h" 47 #include "platform/fonts/Font.h" 48 #include "platform/text/TextBoundaries.h" 49 #include "platform/text/TextBreakIteratorInternalICU.h" 50 #include "platform/text/UnicodeUtilities.h" 51 #include "wtf/text/CString.h" 52 #include "wtf/text/StringBuilder.h" 53 #include "wtf/unicode/CharacterNames.h" 54 #include <unicode/usearch.h> 55 #include <unicode/utf16.h> 56 57 using namespace WTF::Unicode; 58 59 namespace WebCore { 60 61 using namespace HTMLNames; 62 63 // Buffer that knows how to compare with a search target. 64 // Keeps enough of the previous text to be able to search in the future, but no more. 65 // Non-breaking spaces are always equal to normal spaces. 66 // Case folding is also done if the CaseInsensitive option is specified. 67 // Matches are further filtered if the AtWordStarts option is specified, although some 68 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 69 class SearchBuffer { 70 WTF_MAKE_NONCOPYABLE(SearchBuffer); 71 public: 72 SearchBuffer(const String& target, FindOptions); 73 ~SearchBuffer(); 74 75 // Returns number of characters appended; guaranteed to be in the range [1, length]. 76 template<typename CharType> 77 void append(const CharType*, size_t length); 78 size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; } 79 80 bool needsMoreContext() const; 81 void prependContext(const UChar*, size_t length); 82 void reachedBreak(); 83 84 // Result is the size in characters of what was found. 85 // And <startOffset> is the number of characters back to the start of what was found. 86 size_t search(size_t& startOffset); 87 bool atBreak() const; 88 89 private: 90 bool isBadMatch(const UChar*, size_t length) const; 91 bool isWordStartMatch(size_t start, size_t length) const; 92 93 Vector<UChar> m_target; 94 FindOptions m_options; 95 96 Vector<UChar> m_buffer; 97 size_t m_overlap; 98 size_t m_prefixLength; 99 size_t m_numberOfCharactersJustAppended; 100 bool m_atBreak; 101 bool m_needsMoreContext; 102 103 bool m_targetRequiresKanaWorkaround; 104 Vector<UChar> m_normalizedTarget; 105 mutable Vector<UChar> m_normalizedMatch; 106 }; 107 108 // -------- 109 110 static const unsigned bitsInWord = sizeof(unsigned) * 8; 111 static const unsigned bitInWordMask = bitsInWord - 1; 112 113 BitStack::BitStack() 114 : m_size(0) 115 { 116 } 117 118 BitStack::~BitStack() 119 { 120 } 121 122 void BitStack::push(bool bit) 123 { 124 unsigned index = m_size / bitsInWord; 125 unsigned shift = m_size & bitInWordMask; 126 if (!shift && index == m_words.size()) { 127 m_words.grow(index + 1); 128 m_words[index] = 0; 129 } 130 unsigned& word = m_words[index]; 131 unsigned mask = 1U << shift; 132 if (bit) 133 word |= mask; 134 else 135 word &= ~mask; 136 ++m_size; 137 } 138 139 void BitStack::pop() 140 { 141 if (m_size) 142 --m_size; 143 } 144 145 bool BitStack::top() const 146 { 147 if (!m_size) 148 return false; 149 unsigned shift = (m_size - 1) & bitInWordMask; 150 return m_words.last() & (1U << shift); 151 } 152 153 unsigned BitStack::size() const 154 { 155 return m_size; 156 } 157 158 // -------- 159 160 #if ASSERT_ENABLED 161 162 static unsigned depthCrossingShadowBoundaries(Node* node) 163 { 164 unsigned depth = 0; 165 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 166 ++depth; 167 return depth; 168 } 169 170 #endif 171 172 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 173 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) 174 { 175 if (!rangeEndContainer) 176 return 0; 177 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { 178 if (Node* next = rangeEndContainer->traverseToChildAt(rangeEndOffset)) 179 return next; 180 } 181 for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) { 182 if (Node* next = node->nextSibling()) 183 return next; 184 } 185 return 0; 186 } 187 188 // -------- 189 190 static inline bool fullyClipsContents(Node* node) 191 { 192 RenderObject* renderer = node->renderer(); 193 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 194 return false; 195 return toRenderBox(renderer)->size().isEmpty(); 196 } 197 198 static inline bool ignoresContainerClip(Node* node) 199 { 200 RenderObject* renderer = node->renderer(); 201 if (!renderer || renderer->isText()) 202 return false; 203 return renderer->style()->hasOutOfFlowPosition(); 204 } 205 206 static void pushFullyClippedState(BitStack& stack, Node* node) 207 { 208 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 209 210 // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364> 211 // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if 212 // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches 213 // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics 214 // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox). 215 // 216 // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering 217 // primitives, not with DOM primitives. 218 219 // Push true if this node full clips its contents, or if a parent already has fully 220 // clipped and this is not a node that ignores its container's clip. 221 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 222 } 223 224 static void setUpFullyClippedStack(BitStack& stack, Node* node) 225 { 226 // Put the nodes in a vector so we can iterate in reverse order. 227 Vector<Node*, 100> ancestry; 228 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 229 ancestry.append(parent); 230 231 // Call pushFullyClippedState on each node starting with the earliest ancestor. 232 size_t size = ancestry.size(); 233 for (size_t i = 0; i < size; ++i) 234 pushFullyClippedState(stack, ancestry[size - i - 1]); 235 pushFullyClippedState(stack, node); 236 237 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 238 } 239 240 // -------- 241 242 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior) 243 : m_startContainer(nullptr) 244 , m_startOffset(0) 245 , m_endContainer(nullptr) 246 , m_endOffset(0) 247 , m_positionNode(nullptr) 248 , m_textLength(0) 249 , m_needsAnotherNewline(false) 250 , m_textBox(0) 251 , m_remainingTextBox(0) 252 , m_firstLetterText(0) 253 , m_lastTextNode(nullptr) 254 , m_lastTextNodeEndedWithCollapsedSpace(false) 255 , m_lastCharacter(0) 256 , m_sortedTextBoxesPosition(0) 257 , m_hasEmitted(false) 258 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 259 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 260 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) 261 , m_handledFirstLetter(false) 262 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 263 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 264 , m_shouldStop(false) 265 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) 266 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots) 267 , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter) 268 { 269 if (range) 270 initialize(range->startPosition(), range->endPosition()); 271 } 272 273 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 274 : m_startContainer(nullptr) 275 , m_startOffset(0) 276 , m_endContainer(nullptr) 277 , m_endOffset(0) 278 , m_positionNode(nullptr) 279 , m_textLength(0) 280 , m_needsAnotherNewline(false) 281 , m_textBox(0) 282 , m_remainingTextBox(0) 283 , m_firstLetterText(0) 284 , m_lastTextNode(nullptr) 285 , m_lastTextNodeEndedWithCollapsedSpace(false) 286 , m_lastCharacter(0) 287 , m_sortedTextBoxesPosition(0) 288 , m_hasEmitted(false) 289 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 290 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 291 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) 292 , m_handledFirstLetter(false) 293 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 294 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 295 , m_shouldStop(false) 296 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) 297 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots) 298 , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter) 299 { 300 initialize(start, end); 301 } 302 303 void TextIterator::initialize(const Position& start, const Position& end) 304 { 305 ASSERT(comparePositions(start, end) <= 0); 306 307 // Get and validate |start| and |end|. 308 Node* startContainer = start.containerNode(); 309 if (!startContainer) 310 return; 311 int startOffset = start.computeOffsetInContainerNode(); 312 Node* endContainer = end.containerNode(); 313 if (!endContainer) 314 return; 315 int endOffset = end.computeOffsetInContainerNode(); 316 317 // Remember the range - this does not change. 318 m_startContainer = startContainer; 319 m_startOffset = startOffset; 320 m_endContainer = endContainer; 321 m_endOffset = endOffset; 322 323 // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from 324 // the common ancestor tree scope. 325 const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope()); 326 ASSERT(commonAncestorTreeScope); 327 m_shadowDepth = 0; 328 for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope()) 329 ++m_shadowDepth; 330 331 // Set up the current node for processing. 332 if (startContainer->offsetInCharacters()) 333 m_node = startContainer; 334 else if (Node* child = startContainer->traverseToChildAt(startOffset)) 335 m_node = child; 336 else if (!startOffset) 337 m_node = startContainer; 338 else 339 m_node = NodeTraversal::nextSkippingChildren(*startContainer); 340 341 if (!m_node) 342 return; 343 344 setUpFullyClippedStack(m_fullyClippedStack, m_node); 345 m_offset = m_node == m_startContainer ? m_startOffset : 0; 346 m_iterationProgress = HandledNone; 347 348 // Calculate first out of bounds node. 349 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); 350 351 // Identify the first run. 352 advance(); 353 } 354 355 TextIterator::~TextIterator() 356 { 357 } 358 359 void TextIterator::advance() 360 { 361 if (m_shouldStop) 362 return; 363 364 // reset the run information 365 m_positionNode = nullptr; 366 m_textLength = 0; 367 368 // handle remembered node that needed a newline after the text node's newline 369 if (m_needsAnotherNewline) { 370 // Emit the extra newline, and position it *inside* m_node, after m_node's 371 // contents, in case it's a block, in the same way that we position the first 372 // newline. The range for the emitted newline should start where the line 373 // break begins. 374 // FIXME: It would be cleaner if we emitted two newlines during the last 375 // iteration, instead of using m_needsAnotherNewline. 376 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get(); 377 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 378 m_needsAnotherNewline = false; 379 return; 380 } 381 382 if (!m_textBox && m_remainingTextBox) { 383 m_textBox = m_remainingTextBox; 384 m_remainingTextBox = 0; 385 m_firstLetterText = 0; 386 m_offset = 0; 387 } 388 // handle remembered text box 389 if (m_textBox) { 390 handleTextBox(); 391 if (m_positionNode) 392 return; 393 } 394 395 while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) { 396 if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) 397 m_shouldStop = true; 398 399 // if the range ends at offset 0 of an element, represent the 400 // position, but not the content, of that element e.g. if the 401 // node is a blockflow element, emit a newline that 402 // precedes the element 403 if (m_node == m_endContainer && !m_endOffset) { 404 representNodeOffsetZero(); 405 m_node = nullptr; 406 return; 407 } 408 409 RenderObject* renderer = m_node->renderer(); 410 if (!renderer) { 411 if (m_node->isShadowRoot()) { 412 // A shadow root doesn't have a renderer, but we want to visit children anyway. 413 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress; 414 } else { 415 m_iterationProgress = HandledChildren; 416 } 417 } else { 418 // Enter author shadow roots, from youngest, if any and if necessary. 419 if (m_iterationProgress < HandledAuthorShadowRoots) { 420 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) { 421 ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot(); 422 ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot); 423 m_node = youngestShadowRoot; 424 m_iterationProgress = HandledNone; 425 ++m_shadowDepth; 426 pushFullyClippedState(m_fullyClippedStack, m_node); 427 continue; 428 } 429 430 m_iterationProgress = HandledAuthorShadowRoots; 431 } 432 433 // Enter user-agent shadow root, if necessary. 434 if (m_iterationProgress < HandledUserAgentShadowRoot) { 435 if (m_entersTextControls && renderer->isTextControl()) { 436 ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot(); 437 ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot); 438 m_node = userAgentShadowRoot; 439 m_iterationProgress = HandledNone; 440 ++m_shadowDepth; 441 pushFullyClippedState(m_fullyClippedStack, m_node); 442 continue; 443 } 444 m_iterationProgress = HandledUserAgentShadowRoot; 445 } 446 447 // Handle the current node according to its type. 448 if (m_iterationProgress < HandledNode) { 449 bool handledNode = false; 450 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE? 451 handledNode = handleTextNode(); 452 } else if (renderer && (renderer->isImage() || renderer->isWidget() 453 || (m_node && m_node->isElementNode() 454 && (toElement(m_node)->isFormControlElement() 455 || isHTMLLegendElement(toElement(*m_node)) 456 || isHTMLMeterElement(toElement(*m_node)) 457 || isHTMLProgressElement(toElement(*m_node)))))) { 458 handledNode = handleReplacedElement(); 459 } else { 460 handledNode = handleNonTextNode(); 461 } 462 if (handledNode) 463 m_iterationProgress = HandledNode; 464 if (m_positionNode) 465 return; 466 } 467 } 468 469 // Find a new current node to handle in depth-first manner, 470 // calling exitNode() as we come back thru a parent node. 471 // 472 // 1. Iterate over child nodes, if we haven't done yet. 473 Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0; 474 m_offset = 0; 475 if (!next) { 476 // 2. If we've already iterated children or they are not available, go to the next sibling node. 477 next = m_node->nextSibling(); 478 if (!next) { 479 // 3. If we are at the last child, go up the node tree until we find a next sibling. 480 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode; 481 Node* parentNode = m_node->parentNode(); 482 while (!next && parentNode) { 483 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 484 return; 485 bool haveRenderer = m_node->renderer(); 486 m_node = parentNode; 487 m_fullyClippedStack.pop(); 488 parentNode = m_node->parentNode(); 489 if (haveRenderer) 490 exitNode(); 491 if (m_positionNode) { 492 m_iterationProgress = HandledChildren; 493 return; 494 } 495 next = m_node->nextSibling(); 496 } 497 498 if (!next && !parentNode && m_shadowDepth > 0) { 499 // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next 500 // sibling shadow root, if any. 501 ShadowRoot* shadowRoot = toShadowRoot(m_node); 502 if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) { 503 ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot(); 504 if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) { 505 m_fullyClippedStack.pop(); 506 m_node = nextShadowRoot; 507 m_iterationProgress = HandledNone; 508 // m_shadowDepth is unchanged since we exit from a shadow root and enter another. 509 pushFullyClippedState(m_fullyClippedStack, m_node); 510 } else { 511 // We are the last shadow root; exit from here and go back to where we were. 512 m_node = shadowRoot->host(); 513 m_iterationProgress = HandledAuthorShadowRoots; 514 --m_shadowDepth; 515 m_fullyClippedStack.pop(); 516 } 517 } else { 518 // If we are in a user-agent shadow root, then go back to the host. 519 ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot); 520 m_node = shadowRoot->host(); 521 m_iterationProgress = HandledUserAgentShadowRoot; 522 --m_shadowDepth; 523 m_fullyClippedStack.pop(); 524 } 525 m_handledFirstLetter = false; 526 m_firstLetterText = 0; 527 continue; 528 } 529 } 530 m_fullyClippedStack.pop(); 531 } 532 533 // set the new current node 534 m_node = next; 535 if (m_node) 536 pushFullyClippedState(m_fullyClippedStack, m_node); 537 m_iterationProgress = HandledNone; 538 m_handledFirstLetter = false; 539 m_firstLetterText = 0; 540 541 // how would this ever be? 542 if (m_positionNode) 543 return; 544 } 545 } 546 547 UChar TextIterator::characterAt(unsigned index) const 548 { 549 ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length())); 550 if (!(index < static_cast<unsigned>(length()))) 551 return 0; 552 553 if (m_singleCharacterBuffer) { 554 ASSERT(!index); 555 ASSERT(length() == 1); 556 return m_singleCharacterBuffer; 557 } 558 559 return string()[startOffset() + index]; 560 } 561 562 String TextIterator::substring(unsigned position, unsigned length) const 563 { 564 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length())); 565 ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length())); 566 if (!length) 567 return emptyString(); 568 if (m_singleCharacterBuffer) { 569 ASSERT(!position); 570 ASSERT(length == 1); 571 return String(&m_singleCharacterBuffer, 1); 572 } 573 return string().substring(startOffset() + position, length); 574 } 575 576 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const 577 { 578 unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength); 579 if (!lengthToAppend) 580 return; 581 if (m_singleCharacterBuffer) { 582 ASSERT(!position); 583 builder.append(m_singleCharacterBuffer); 584 } else { 585 builder.append(string(), startOffset() + position, lengthToAppend); 586 } 587 } 588 589 bool TextIterator::handleTextNode() 590 { 591 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) 592 return false; 593 594 RenderText* renderer = toRenderText(m_node->renderer()); 595 596 m_lastTextNode = m_node; 597 String str = renderer->text(); 598 599 // handle pre-formatted text 600 if (!renderer->style()->collapseWhiteSpace()) { 601 int runStart = m_offset; 602 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 603 emitCharacter(' ', m_node, 0, runStart, runStart); 604 return false; 605 } 606 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) { 607 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 608 if (m_firstLetterText) { 609 String firstLetter = m_firstLetterText->text(); 610 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length()); 611 m_firstLetterText = 0; 612 m_textBox = 0; 613 return false; 614 } 615 } 616 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 617 return false; 618 int strLength = str.length(); 619 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; 620 int runEnd = std::min(strLength, end); 621 622 if (runStart >= runEnd) 623 return true; 624 625 emitText(m_node, runStart, runEnd); 626 return true; 627 } 628 629 if (renderer->firstTextBox()) 630 m_textBox = renderer->firstTextBox(); 631 632 bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset; 633 if (shouldHandleFirstLetter) 634 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 635 636 if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) { 637 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 638 return false; 639 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 640 return true; 641 } 642 643 if (m_firstLetterText) 644 renderer = m_firstLetterText; 645 646 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 647 if (renderer->containsReversedText()) { 648 m_sortedTextBoxes.clear(); 649 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { 650 m_sortedTextBoxes.append(textBox); 651 } 652 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 653 m_sortedTextBoxesPosition = 0; 654 m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]; 655 } 656 657 handleTextBox(); 658 return true; 659 } 660 661 void TextIterator::handleTextBox() 662 { 663 RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer()); 664 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) { 665 m_textBox = 0; 666 return; 667 } 668 String str = renderer->text(); 669 unsigned start = m_offset; 670 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX; 671 while (m_textBox) { 672 unsigned textBoxStart = m_textBox->start(); 673 unsigned runStart = std::max(textBoxStart, start); 674 675 // Check for collapsed space at the start of this run. 676 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 677 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace 678 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); 679 if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) { 680 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { 681 unsigned spaceRunStart = runStart - 1; 682 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') 683 --spaceRunStart; 684 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); 685 } else { 686 emitCharacter(' ', m_node, 0, runStart, runStart); 687 } 688 return; 689 } 690 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 691 unsigned runEnd = std::min(textBoxEnd, end); 692 693 // Determine what the next text box will be, but don't advance yet 694 InlineTextBox* nextTextBox = 0; 695 if (renderer->containsReversedText()) { 696 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 697 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 698 } else { 699 nextTextBox = m_textBox->nextTextBox(); 700 } 701 ASSERT(!nextTextBox || nextTextBox->renderer() == renderer); 702 703 if (runStart < runEnd) { 704 // Handle either a single newline character (which becomes a space), 705 // or a run of characters that does not include a newline. 706 // This effectively translates newlines to spaces without copying the text. 707 if (str[runStart] == '\n') { 708 emitCharacter(' ', m_node, 0, runStart, runStart + 1); 709 m_offset = runStart + 1; 710 } else { 711 size_t subrunEnd = str.find('\n', runStart); 712 if (subrunEnd == kNotFound || subrunEnd > runEnd) 713 subrunEnd = runEnd; 714 715 m_offset = subrunEnd; 716 emitText(m_node, renderer, runStart, subrunEnd); 717 } 718 719 // If we are doing a subrun that doesn't go to the end of the text box, 720 // come back again to finish handling this text box; don't advance to the next one. 721 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 722 return; 723 724 // Advance and return 725 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); 726 if (nextRunStart > runEnd) 727 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 728 m_textBox = nextTextBox; 729 if (renderer->containsReversedText()) 730 ++m_sortedTextBoxesPosition; 731 return; 732 } 733 // Advance and continue 734 m_textBox = nextTextBox; 735 if (renderer->containsReversedText()) 736 ++m_sortedTextBoxesPosition; 737 } 738 if (!m_textBox && m_remainingTextBox) { 739 m_textBox = m_remainingTextBox; 740 m_remainingTextBox = 0; 741 m_firstLetterText = 0; 742 m_offset = 0; 743 handleTextBox(); 744 } 745 } 746 747 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter) 748 { 749 if (!firstLetter) 750 return 0; 751 752 // FIXME: Should this check descendent objects? 753 for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) { 754 if (current->isText()) 755 return toRenderText(current); 756 } 757 return 0; 758 } 759 760 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) 761 { 762 if (renderer->firstLetter()) { 763 RenderBoxModelObject* r = renderer->firstLetter(); 764 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 765 return; 766 if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) { 767 m_handledFirstLetter = true; 768 m_remainingTextBox = m_textBox; 769 m_textBox = firstLetter->firstTextBox(); 770 m_sortedTextBoxes.clear(); 771 m_firstLetterText = firstLetter; 772 } 773 } 774 m_handledFirstLetter = true; 775 } 776 777 bool TextIterator::handleReplacedElement() 778 { 779 if (m_fullyClippedStack.top()) 780 return false; 781 782 RenderObject* renderer = m_node->renderer(); 783 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 784 return false; 785 786 if (m_emitsObjectReplacementCharacter) { 787 emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1); 788 return true; 789 } 790 791 if (m_lastTextNodeEndedWithCollapsedSpace) { 792 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 793 return false; 794 } 795 796 if (m_entersTextControls && renderer->isTextControl()) { 797 // The shadow tree should be already visited. 798 return true; 799 } 800 801 m_hasEmitted = true; 802 803 if (m_emitsCharactersBetweenAllVisiblePositions) { 804 // We want replaced elements to behave like punctuation for boundary 805 // finding, and to simply take up space for the selection preservation 806 // code in moveParagraphs, so we use a comma. 807 emitCharacter(',', m_node->parentNode(), m_node, 0, 1); 808 return true; 809 } 810 811 m_positionNode = m_node->parentNode(); 812 m_positionOffsetBaseNode = m_node; 813 m_positionStartOffset = 0; 814 m_positionEndOffset = 1; 815 m_singleCharacterBuffer = 0; 816 817 if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) { 818 m_text = toRenderImage(renderer)->altText(); 819 if (!m_text.isEmpty()) { 820 m_textLength = m_text.length(); 821 m_lastCharacter = m_text[m_textLength - 1]; 822 return true; 823 } 824 } 825 826 m_textLength = 0; 827 m_lastCharacter = 0; 828 829 return true; 830 } 831 832 bool TextIterator::hasVisibleTextNode(RenderText* renderer) 833 { 834 if (renderer->style()->visibility() == VISIBLE) 835 return true; 836 if (renderer->isTextFragment()) { 837 RenderTextFragment* fragment = toRenderTextFragment(renderer); 838 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE) 839 return true; 840 } 841 return false; 842 } 843 844 static bool shouldEmitTabBeforeNode(Node* node) 845 { 846 RenderObject* r = node->renderer(); 847 848 // Table cells are delimited by tabs. 849 if (!r || !isTableCell(node)) 850 return false; 851 852 // Want a tab before every cell other than the first one 853 RenderTableCell* rc = toRenderTableCell(r); 854 RenderTable* t = rc->table(); 855 return t && (t->cellBefore(rc) || t->cellAbove(rc)); 856 } 857 858 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText) 859 { 860 RenderObject* renderer = node->renderer(); 861 862 if (renderer ? !renderer->isBR() : !isHTMLBRElement(node)) 863 return false; 864 return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost())); 865 } 866 867 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node) 868 { 869 // Block flow (versus inline flow) is represented by having 870 // a newline both before and after the element. 871 RenderObject* r = node.renderer(); 872 if (!r) { 873 return (node.hasTagName(blockquoteTag) 874 || node.hasTagName(ddTag) 875 || node.hasTagName(divTag) 876 || node.hasTagName(dlTag) 877 || node.hasTagName(dtTag) 878 || node.hasTagName(h1Tag) 879 || node.hasTagName(h2Tag) 880 || node.hasTagName(h3Tag) 881 || node.hasTagName(h4Tag) 882 || node.hasTagName(h5Tag) 883 || node.hasTagName(h6Tag) 884 || node.hasTagName(hrTag) 885 || node.hasTagName(liTag) 886 || node.hasTagName(listingTag) 887 || node.hasTagName(olTag) 888 || node.hasTagName(pTag) 889 || node.hasTagName(preTag) 890 || node.hasTagName(trTag) 891 || node.hasTagName(ulTag)); 892 } 893 894 // Need to make an exception for table cells, because they are blocks, but we 895 // want them tab-delimited rather than having newlines before and after. 896 if (isTableCell(&node)) 897 return false; 898 899 // Need to make an exception for table row elements, because they are neither 900 // "inline" or "RenderBlock", but we want newlines for them. 901 if (r->isTableRow()) { 902 RenderTable* t = toRenderTableRow(r)->table(); 903 if (t && !t->isInline()) 904 return true; 905 } 906 907 return !r->isInline() && r->isRenderBlock() 908 && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText(); 909 } 910 911 static bool shouldEmitNewlineAfterNode(Node& node) 912 { 913 // FIXME: It should be better but slower to create a VisiblePosition here. 914 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 915 return false; 916 // Check if this is the very last renderer in the document. 917 // If so, then we should not emit a newline. 918 Node* next = &node; 919 while ((next = NodeTraversal::nextSkippingChildren(*next))) { 920 if (next->renderer()) 921 return true; 922 } 923 return false; 924 } 925 926 static bool shouldEmitNewlineBeforeNode(Node& node) 927 { 928 return shouldEmitNewlinesBeforeAndAfterNode(node); 929 } 930 931 static bool shouldEmitExtraNewlineForNode(Node* node) 932 { 933 // When there is a significant collapsed bottom margin, emit an extra 934 // newline for a more realistic result. We end up getting the right 935 // result even without margin collapsing. For example: <div><p>text</p></div> 936 // will work right even if both the <div> and the <p> have bottom margins. 937 RenderObject* r = node->renderer(); 938 if (!r || !r->isBox()) 939 return false; 940 941 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears 942 // not to do this at all 943 if (node->hasTagName(h1Tag) 944 || node->hasTagName(h2Tag) 945 || node->hasTagName(h3Tag) 946 || node->hasTagName(h4Tag) 947 || node->hasTagName(h5Tag) 948 || node->hasTagName(h6Tag) 949 || node->hasTagName(pTag)) { 950 RenderStyle* style = r->style(); 951 if (style) { 952 int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); 953 int fontSize = style->fontDescription().computedPixelSize(); 954 if (bottomMargin * 2 >= fontSize) 955 return true; 956 } 957 } 958 959 return false; 960 } 961 962 static int collapsedSpaceLength(RenderText* renderer, int textEnd) 963 { 964 const String& text = renderer->text(); 965 int length = text.length(); 966 for (int i = textEnd; i < length; ++i) { 967 if (!renderer->style()->isCollapsibleWhiteSpace(text[i])) 968 return i - textEnd; 969 } 970 971 return length - textEnd; 972 } 973 974 static int maxOffsetIncludingCollapsedSpaces(Node* node) 975 { 976 int offset = caretMaxOffset(node); 977 978 if (node->renderer() && node->renderer()->isText()) 979 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); 980 981 return offset; 982 } 983 984 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 985 bool TextIterator::shouldRepresentNodeOffsetZero() 986 { 987 if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTable(m_node)) 988 return true; 989 990 // Leave element positioned flush with start of a paragraph 991 // (e.g. do not insert tab before a table cell at the start of a paragraph) 992 if (m_lastCharacter == '\n') 993 return false; 994 995 // Otherwise, show the position if we have emitted any characters 996 if (m_hasEmitted) 997 return true; 998 999 // We've not emitted anything yet. Generally, there is no need for any positioning then. 1000 // The only exception is when the element is visually not in the same line as 1001 // the start of the range (e.g. the range starts at the end of the previous paragraph). 1002 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 1003 // make quicker checks to possibly avoid that. Another check that we could make is 1004 // is whether the inline vs block flow changed since the previous visible element. 1005 // I think we're already in a special enough case that that won't be needed, tho. 1006 1007 // No character needed if this is the first node in the range. 1008 if (m_node == m_startContainer) 1009 return false; 1010 1011 // If we are outside the start container's subtree, assume we need to emit. 1012 // FIXME: m_startContainer could be an inline block 1013 if (!m_node->isDescendantOf(m_startContainer)) 1014 return true; 1015 1016 // If we started as m_startContainer offset 0 and the current node is a descendant of 1017 // the start container, we already had enough context to correctly decide whether to 1018 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 1019 // so don't second guess that now. 1020 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 1021 // immaterial since we likely would have already emitted something by now. 1022 if (!m_startOffset) 1023 return false; 1024 1025 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 1026 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 1027 // we would create VisiblePositions on every call to this function without this check. 1028 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE 1029 || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node))) 1030 return false; 1031 1032 // The startPos.isNotNull() check is needed because the start could be before the body, 1033 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 1034 // The currPos.isNotNull() check is needed because positions in non-HTML content 1035 // (like SVG) do not have visible positions, and we don't want to emit for them either. 1036 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 1037 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 1038 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 1039 } 1040 1041 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) 1042 { 1043 return isRenderedTable(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); 1044 } 1045 1046 void TextIterator::representNodeOffsetZero() 1047 { 1048 // Emit a character to show the positioning of m_node. 1049 1050 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 1051 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 1052 // on m_node to see if it necessitates emitting a character first and will early return 1053 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 1054 if (shouldEmitTabBeforeNode(m_node)) { 1055 if (shouldRepresentNodeOffsetZero()) 1056 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); 1057 } else if (shouldEmitNewlineBeforeNode(*m_node)) { 1058 if (shouldRepresentNodeOffsetZero()) 1059 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); 1060 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { 1061 if (shouldRepresentNodeOffsetZero()) 1062 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0); 1063 } 1064 } 1065 1066 bool TextIterator::handleNonTextNode() 1067 { 1068 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText)) 1069 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); 1070 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) 1071 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1); 1072 else 1073 representNodeOffsetZero(); 1074 1075 return true; 1076 } 1077 1078 void TextIterator::exitNode() 1079 { 1080 // prevent emitting a newline when exiting a collapsed block at beginning of the range 1081 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 1082 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 1083 // therefore look like a blank line. 1084 if (!m_hasEmitted) 1085 return; 1086 1087 // Emit with a position *inside* m_node, after m_node's contents, in 1088 // case it is a block, because the run should start where the 1089 // emitted character is positioned visually. 1090 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get(); 1091 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 1092 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 1093 // TextIterator in _web_attributedStringFromRange. 1094 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 1095 if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) { 1096 // use extra newline to represent margin bottom, as needed 1097 bool addNewline = shouldEmitExtraNewlineForNode(m_node); 1098 1099 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 1100 // contain a VisiblePosition when doing selection preservation. 1101 if (m_lastCharacter != '\n') { 1102 // insert a newline with a position following this block's contents. 1103 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 1104 // remember whether to later add a newline for the current node 1105 ASSERT(!m_needsAnotherNewline); 1106 m_needsAnotherNewline = addNewline; 1107 } else if (addNewline) { 1108 // insert a newline with a position following this block's contents. 1109 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 1110 } 1111 } 1112 1113 // If nothing was emitted, see if we need to emit a space. 1114 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) 1115 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1); 1116 } 1117 1118 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 1119 { 1120 m_hasEmitted = true; 1121 1122 // remember information with which to construct the TextIterator::range() 1123 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode 1124 m_positionNode = textNode; 1125 m_positionOffsetBaseNode = offsetBaseNode; 1126 m_positionStartOffset = textStartOffset; 1127 m_positionEndOffset = textEndOffset; 1128 1129 // remember information with which to construct the TextIterator::characters() and length() 1130 m_singleCharacterBuffer = c; 1131 ASSERT(m_singleCharacterBuffer); 1132 m_textLength = 1; 1133 1134 // remember some iteration state 1135 m_lastTextNodeEndedWithCollapsedSpace = false; 1136 m_lastCharacter = c; 1137 } 1138 1139 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset) 1140 { 1141 RenderText* renderer = toRenderText(renderObject); 1142 m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text(); 1143 ASSERT(!m_text.isEmpty()); 1144 ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length())); 1145 ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length())); 1146 ASSERT(textStartOffset <= textEndOffset); 1147 1148 m_positionNode = textNode; 1149 m_positionOffsetBaseNode = nullptr; 1150 m_positionStartOffset = textStartOffset; 1151 m_positionEndOffset = textEndOffset; 1152 m_singleCharacterBuffer = 0; 1153 m_textLength = textEndOffset - textStartOffset; 1154 m_lastCharacter = m_text[textEndOffset - 1]; 1155 1156 m_lastTextNodeEndedWithCollapsedSpace = false; 1157 m_hasEmitted = true; 1158 } 1159 1160 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) 1161 { 1162 emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset); 1163 } 1164 1165 PassRefPtrWillBeRawPtr<Range> TextIterator::range() const 1166 { 1167 // use the current run information, if we have it 1168 if (m_positionNode) { 1169 if (m_positionOffsetBaseNode) { 1170 int index = m_positionOffsetBaseNode->nodeIndex(); 1171 m_positionStartOffset += index; 1172 m_positionEndOffset += index; 1173 m_positionOffsetBaseNode = nullptr; 1174 } 1175 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1176 } 1177 1178 // otherwise, return the end of the overall range we were given 1179 if (m_endContainer) 1180 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); 1181 1182 return nullptr; 1183 } 1184 1185 Node* TextIterator::node() const 1186 { 1187 RefPtrWillBeRawPtr<Range> textRange = range(); 1188 if (!textRange) 1189 return 0; 1190 1191 Node* node = textRange->startContainer(); 1192 if (!node) 1193 return 0; 1194 if (node->offsetInCharacters()) 1195 return node; 1196 1197 return node->traverseToChildAt(textRange->startOffset()); 1198 } 1199 1200 // -------- 1201 1202 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior) 1203 : m_node(nullptr) 1204 , m_offset(0) 1205 , m_handledNode(false) 1206 , m_handledChildren(false) 1207 , m_startNode(nullptr) 1208 , m_startOffset(0) 1209 , m_endNode(nullptr) 1210 , m_endOffset(0) 1211 , m_positionNode(nullptr) 1212 , m_positionStartOffset(0) 1213 , m_positionEndOffset(0) 1214 , m_textOffset(0) 1215 , m_textLength(0) 1216 , m_lastTextNode(nullptr) 1217 , m_lastCharacter(0) 1218 , m_singleCharacterBuffer(0) 1219 , m_havePassedStartNode(false) 1220 , m_shouldHandleFirstLetter(false) 1221 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1222 , m_shouldStop(false) 1223 , m_emitsOriginalText(false) 1224 { 1225 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); 1226 1227 if (!r) 1228 return; 1229 1230 Node* startNode = r->startContainer(); 1231 if (!startNode) 1232 return; 1233 Node* endNode = r->endContainer(); 1234 int startOffset = r->startOffset(); 1235 int endOffset = r->endOffset(); 1236 1237 if (!startNode->offsetInCharacters() && startOffset >= 0) { 1238 // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior 1239 // instead of calling countChildren() to avoid traversing the children twice. 1240 if (Node* childAtOffset = startNode->traverseToChildAt(startOffset)) { 1241 startNode = childAtOffset; 1242 startOffset = 0; 1243 } 1244 } 1245 if (!endNode->offsetInCharacters() && endOffset > 0) { 1246 // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior 1247 // instead of calling countChildren() to avoid traversing the children twice. 1248 if (Node* childAtOffset = endNode->traverseToChildAt(endOffset - 1)) { 1249 endNode = childAtOffset; 1250 endOffset = lastOffsetInNode(endNode); 1251 } 1252 } 1253 1254 m_node = endNode; 1255 setUpFullyClippedStack(m_fullyClippedStack, m_node); 1256 m_offset = endOffset; 1257 m_handledNode = false; 1258 m_handledChildren = !endOffset; 1259 1260 m_startNode = startNode; 1261 m_startOffset = startOffset; 1262 m_endNode = endNode; 1263 m_endOffset = endOffset; 1264 1265 #ifndef NDEBUG 1266 // Need this just because of the assert. 1267 m_positionNode = endNode; 1268 #endif 1269 1270 m_lastTextNode = nullptr; 1271 m_lastCharacter = '\n'; 1272 1273 m_havePassedStartNode = false; 1274 1275 advance(); 1276 } 1277 1278 void SimplifiedBackwardsTextIterator::advance() 1279 { 1280 ASSERT(m_positionNode); 1281 1282 if (m_shouldStop) 1283 return; 1284 1285 if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) { 1286 m_shouldStop = true; 1287 return; 1288 } 1289 1290 m_positionNode = nullptr; 1291 m_textLength = 0; 1292 1293 while (m_node && !m_havePassedStartNode) { 1294 // Don't handle node if we start iterating at [node, 0]. 1295 if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) { 1296 RenderObject* renderer = m_node->renderer(); 1297 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { 1298 // FIXME: What about CDATA_SECTION_NODE? 1299 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1300 m_handledNode = handleTextNode(); 1301 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1302 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1303 m_handledNode = handleReplacedElement(); 1304 } else { 1305 m_handledNode = handleNonTextNode(); 1306 } 1307 if (m_positionNode) 1308 return; 1309 } 1310 1311 if (!m_handledChildren && m_node->hasChildren()) { 1312 m_node = m_node->lastChild(); 1313 pushFullyClippedState(m_fullyClippedStack, m_node); 1314 } else { 1315 // Exit empty containers as we pass over them or containers 1316 // where [container, 0] is where we started iterating. 1317 if (!m_handledNode 1318 && canHaveChildrenForEditing(m_node) 1319 && m_node->parentNode() 1320 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { 1321 exitNode(); 1322 if (m_positionNode) { 1323 m_handledNode = true; 1324 m_handledChildren = true; 1325 return; 1326 } 1327 } 1328 1329 // Exit all other containers. 1330 while (!m_node->previousSibling()) { 1331 if (!advanceRespectingRange(m_node->parentOrShadowHostNode())) 1332 break; 1333 m_fullyClippedStack.pop(); 1334 exitNode(); 1335 if (m_positionNode) { 1336 m_handledNode = true; 1337 m_handledChildren = true; 1338 return; 1339 } 1340 } 1341 1342 m_fullyClippedStack.pop(); 1343 if (advanceRespectingRange(m_node->previousSibling())) 1344 pushFullyClippedState(m_fullyClippedStack, m_node); 1345 else 1346 m_node = nullptr; 1347 } 1348 1349 // For the purpose of word boundary detection, 1350 // we should iterate all visible text and trailing (collapsed) whitespaces. 1351 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; 1352 m_handledNode = false; 1353 m_handledChildren = false; 1354 1355 if (m_positionNode) 1356 return; 1357 } 1358 } 1359 1360 bool SimplifiedBackwardsTextIterator::handleTextNode() 1361 { 1362 m_lastTextNode = m_node; 1363 1364 int startOffset; 1365 int offsetInNode; 1366 RenderText* renderer = handleFirstLetter(startOffset, offsetInNode); 1367 if (!renderer) 1368 return true; 1369 1370 String text = renderer->text(); 1371 if (!renderer->firstTextBox() && text.length() > 0) 1372 return true; 1373 1374 m_positionEndOffset = m_offset; 1375 m_offset = startOffset + offsetInNode; 1376 m_positionNode = m_node; 1377 m_positionStartOffset = m_offset; 1378 1379 ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length())); 1380 ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); 1381 ASSERT(m_positionStartOffset <= m_positionEndOffset); 1382 1383 m_textLength = m_positionEndOffset - m_positionStartOffset; 1384 m_textOffset = m_positionStartOffset - offsetInNode; 1385 m_textContainer = text; 1386 m_singleCharacterBuffer = 0; 1387 RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length()); 1388 1389 m_lastCharacter = text[m_positionEndOffset - 1]; 1390 1391 return !m_shouldHandleFirstLetter; 1392 } 1393 1394 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode) 1395 { 1396 RenderText* renderer = toRenderText(m_node->renderer()); 1397 startOffset = (m_node == m_startNode) ? m_startOffset : 0; 1398 1399 if (!renderer->isTextFragment()) { 1400 offsetInNode = 0; 1401 return renderer; 1402 } 1403 1404 RenderTextFragment* fragment = toRenderTextFragment(renderer); 1405 int offsetAfterFirstLetter = fragment->start(); 1406 if (startOffset >= offsetAfterFirstLetter) { 1407 ASSERT(!m_shouldHandleFirstLetter); 1408 offsetInNode = offsetAfterFirstLetter; 1409 return renderer; 1410 } 1411 1412 if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) { 1413 m_shouldHandleFirstLetter = true; 1414 offsetInNode = offsetAfterFirstLetter; 1415 return renderer; 1416 } 1417 1418 m_shouldHandleFirstLetter = false; 1419 offsetInNode = 0; 1420 RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter()); 1421 1422 m_offset = firstLetterRenderer->caretMaxOffset(); 1423 m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset); 1424 1425 return firstLetterRenderer; 1426 } 1427 1428 bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1429 { 1430 unsigned index = m_node->nodeIndex(); 1431 // We want replaced elements to behave like punctuation for boundary 1432 // finding, and to simply take up space for the selection preservation 1433 // code in moveParagraphs, so we use a comma. Unconditionally emit 1434 // here because this iterator is only used for boundary finding. 1435 emitCharacter(',', m_node->parentNode(), index, index + 1); 1436 return true; 1437 } 1438 1439 bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1440 { 1441 // We can use a linefeed in place of a tab because this simple iterator is only used to 1442 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1443 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) { 1444 unsigned index = m_node->nodeIndex(); 1445 // The start of this emitted range is wrong. Ensuring correctness would require 1446 // VisiblePositions and so would be slow. previousBoundary expects this. 1447 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); 1448 } 1449 return true; 1450 } 1451 1452 void SimplifiedBackwardsTextIterator::exitNode() 1453 { 1454 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) { 1455 // The start of this emitted range is wrong. Ensuring correctness would require 1456 // VisiblePositions and so would be slow. previousBoundary expects this. 1457 emitCharacter('\n', m_node, 0, 0); 1458 } 1459 } 1460 1461 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) 1462 { 1463 m_singleCharacterBuffer = c; 1464 m_positionNode = node; 1465 m_positionStartOffset = startOffset; 1466 m_positionEndOffset = endOffset; 1467 m_textOffset = 0; 1468 m_textLength = 1; 1469 m_lastCharacter = c; 1470 } 1471 1472 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1473 { 1474 if (!next) 1475 return false; 1476 m_havePassedStartNode |= m_node == m_startNode; 1477 if (m_havePassedStartNode) 1478 return false; 1479 m_node = next; 1480 return true; 1481 } 1482 1483 PassRefPtrWillBeRawPtr<Range> SimplifiedBackwardsTextIterator::range() const 1484 { 1485 if (m_positionNode) 1486 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1487 1488 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset); 1489 } 1490 1491 // -------- 1492 1493 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior) 1494 : m_offset(0) 1495 , m_runOffset(0) 1496 , m_atBreak(true) 1497 , m_textIterator(range, behavior) 1498 { 1499 initialize(); 1500 } 1501 1502 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior) 1503 : m_offset(0) 1504 , m_runOffset(0) 1505 , m_atBreak(true) 1506 , m_textIterator(start, end, behavior) 1507 { 1508 initialize(); 1509 } 1510 1511 void CharacterIterator::initialize() 1512 { 1513 while (!atEnd() && !m_textIterator.length()) 1514 m_textIterator.advance(); 1515 } 1516 1517 PassRefPtrWillBeRawPtr<Range> CharacterIterator::range() const 1518 { 1519 RefPtrWillBeRawPtr<Range> r = m_textIterator.range(); 1520 if (!m_textIterator.atEnd()) { 1521 if (m_textIterator.length() <= 1) { 1522 ASSERT(!m_runOffset); 1523 } else { 1524 Node* n = r->startContainer(); 1525 ASSERT(n == r->endContainer()); 1526 int offset = r->startOffset() + m_runOffset; 1527 r->setStart(n, offset, ASSERT_NO_EXCEPTION); 1528 r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION); 1529 } 1530 } 1531 return r.release(); 1532 } 1533 1534 void CharacterIterator::advance(int count) 1535 { 1536 if (count <= 0) { 1537 ASSERT(!count); 1538 return; 1539 } 1540 1541 m_atBreak = false; 1542 1543 // easy if there is enough left in the current m_textIterator run 1544 int remaining = m_textIterator.length() - m_runOffset; 1545 if (count < remaining) { 1546 m_runOffset += count; 1547 m_offset += count; 1548 return; 1549 } 1550 1551 // exhaust the current m_textIterator run 1552 count -= remaining; 1553 m_offset += remaining; 1554 1555 // move to a subsequent m_textIterator run 1556 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1557 int runLength = m_textIterator.length(); 1558 if (!runLength) { 1559 m_atBreak = true; 1560 } else { 1561 // see whether this is m_textIterator to use 1562 if (count < runLength) { 1563 m_runOffset = count; 1564 m_offset += count; 1565 return; 1566 } 1567 1568 // exhaust this m_textIterator run 1569 count -= runLength; 1570 m_offset += runLength; 1571 } 1572 } 1573 1574 // ran to the end of the m_textIterator... no more runs left 1575 m_atBreak = true; 1576 m_runOffset = 0; 1577 } 1578 1579 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition) 1580 { 1581 it.advance(offset); 1582 RefPtrWillBeRawPtr<Range> start = it.range(); 1583 startPosition = start->startPosition(); 1584 1585 if (length > 1) 1586 it.advance(length - 1); 1587 RefPtrWillBeRawPtr<Range> end = it.range(); 1588 endPosition = end->endPosition(); 1589 } 1590 1591 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior) 1592 : m_offset(0) 1593 , m_runOffset(0) 1594 , m_atBreak(true) 1595 , m_textIterator(range, behavior) 1596 { 1597 while (!atEnd() && !m_textIterator.length()) 1598 m_textIterator.advance(); 1599 } 1600 1601 PassRefPtrWillBeRawPtr<Range> BackwardsCharacterIterator::range() const 1602 { 1603 RefPtrWillBeRawPtr<Range> r = m_textIterator.range(); 1604 if (!m_textIterator.atEnd()) { 1605 if (m_textIterator.length() <= 1) { 1606 ASSERT(!m_runOffset); 1607 } else { 1608 Node* n = r->startContainer(); 1609 ASSERT(n == r->endContainer()); 1610 int offset = r->endOffset() - m_runOffset; 1611 r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION); 1612 r->setEnd(n, offset, ASSERT_NO_EXCEPTION); 1613 } 1614 } 1615 return r.release(); 1616 } 1617 1618 void BackwardsCharacterIterator::advance(int count) 1619 { 1620 if (count <= 0) { 1621 ASSERT(!count); 1622 return; 1623 } 1624 1625 m_atBreak = false; 1626 1627 int remaining = m_textIterator.length() - m_runOffset; 1628 if (count < remaining) { 1629 m_runOffset += count; 1630 m_offset += count; 1631 return; 1632 } 1633 1634 count -= remaining; 1635 m_offset += remaining; 1636 1637 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1638 int runLength = m_textIterator.length(); 1639 if (!runLength) { 1640 m_atBreak = true; 1641 } else { 1642 if (count < runLength) { 1643 m_runOffset = count; 1644 m_offset += count; 1645 return; 1646 } 1647 1648 count -= runLength; 1649 m_offset += runLength; 1650 } 1651 } 1652 1653 m_atBreak = true; 1654 m_runOffset = 0; 1655 } 1656 1657 // -------- 1658 1659 WordAwareIterator::WordAwareIterator(const Range* range) 1660 : m_didLookAhead(true) // So we consider the first chunk from the text iterator. 1661 , m_textIterator(range) 1662 { 1663 advance(); // Get in position over the first chunk of text. 1664 } 1665 1666 WordAwareIterator::~WordAwareIterator() 1667 { 1668 } 1669 1670 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1671 1672 void WordAwareIterator::advance() 1673 { 1674 m_buffer.clear(); 1675 1676 // If last time we did a look-ahead, start with that looked-ahead chunk now 1677 if (!m_didLookAhead) { 1678 ASSERT(!m_textIterator.atEnd()); 1679 m_textIterator.advance(); 1680 } 1681 m_didLookAhead = false; 1682 1683 // Go to next non-empty chunk. 1684 while (!m_textIterator.atEnd() && !m_textIterator.length()) 1685 m_textIterator.advance(); 1686 1687 m_range = m_textIterator.range(); 1688 1689 if (m_textIterator.atEnd()) 1690 return; 1691 1692 while (1) { 1693 // If this chunk ends in whitespace we can just use it as our chunk. 1694 if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1))) 1695 return; 1696 1697 // If this is the first chunk that failed, save it in m_buffer before look ahead. 1698 if (m_buffer.isEmpty()) 1699 m_textIterator.appendTextTo(m_buffer); 1700 1701 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1702 m_textIterator.advance(); 1703 if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) { 1704 m_didLookAhead = true; 1705 return; 1706 } 1707 1708 // Start gobbling chunks until we get to a suitable stopping point 1709 m_textIterator.appendTextTo(m_buffer); 1710 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION); 1711 } 1712 } 1713 1714 int WordAwareIterator::length() const 1715 { 1716 if (!m_buffer.isEmpty()) 1717 return m_buffer.size(); 1718 return m_textIterator.length(); 1719 } 1720 1721 String WordAwareIterator::substring(unsigned position, unsigned length) const 1722 { 1723 if (!m_buffer.isEmpty()) 1724 return String(m_buffer.data() + position, length); 1725 return m_textIterator.substring(position, length); 1726 } 1727 1728 UChar WordAwareIterator::characterAt(unsigned index) const 1729 { 1730 if (!m_buffer.isEmpty()) 1731 return m_buffer[index]; 1732 return m_textIterator.characterAt(index); 1733 } 1734 1735 // -------- 1736 1737 static const size_t minimumSearchBufferSize = 8192; 1738 1739 #ifndef NDEBUG 1740 static bool searcherInUse; 1741 #endif 1742 1743 static UStringSearch* createSearcher() 1744 { 1745 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1746 // but it doesn't matter exactly what it is, since we don't perform any searches 1747 // without setting both the pattern and the text. 1748 UErrorCode status = U_ZERO_ERROR; 1749 String searchCollatorName = currentSearchLocaleID() + String("@collation=search"); 1750 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status); 1751 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1752 return searcher; 1753 } 1754 1755 static UStringSearch* searcher() 1756 { 1757 static UStringSearch* searcher = createSearcher(); 1758 return searcher; 1759 } 1760 1761 static inline void lockSearcher() 1762 { 1763 #ifndef NDEBUG 1764 ASSERT(!searcherInUse); 1765 searcherInUse = true; 1766 #endif 1767 } 1768 1769 static inline void unlockSearcher() 1770 { 1771 #ifndef NDEBUG 1772 ASSERT(searcherInUse); 1773 searcherInUse = false; 1774 #endif 1775 } 1776 1777 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1778 : m_options(options) 1779 , m_prefixLength(0) 1780 , m_numberOfCharactersJustAppended(0) 1781 , m_atBreak(true) 1782 , m_needsMoreContext(options & AtWordStarts) 1783 , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) 1784 { 1785 ASSERT(!target.isEmpty()); 1786 target.appendTo(m_target); 1787 1788 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1789 // of doing it in a separate replacement pass here, but ICU doesn't offer a way 1790 // to add tailoring on top of the locale-specific tailoring as of this writing. 1791 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); 1792 1793 size_t targetLength = m_target.size(); 1794 m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); 1795 m_overlap = m_buffer.capacity() / 4; 1796 1797 if ((m_options & AtWordStarts) && targetLength) { 1798 UChar32 targetFirstCharacter; 1799 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); 1800 // Characters in the separator category never really occur at the beginning of a word, 1801 // so if the target begins with such a character, we just ignore the AtWordStart option. 1802 if (isSeparator(targetFirstCharacter)) { 1803 m_options &= ~AtWordStarts; 1804 m_needsMoreContext = false; 1805 } 1806 } 1807 1808 // Grab the single global searcher. 1809 // If we ever have a reason to do more than once search buffer at once, we'll have 1810 // to move to multiple searchers. 1811 lockSearcher(); 1812 1813 UStringSearch* searcher = WebCore::searcher(); 1814 UCollator* collator = usearch_getCollator(searcher); 1815 1816 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; 1817 if (ucol_getStrength(collator) != strength) { 1818 ucol_setStrength(collator, strength); 1819 usearch_reset(searcher); 1820 } 1821 1822 UErrorCode status = U_ZERO_ERROR; 1823 usearch_setPattern(searcher, m_target.data(), targetLength, &status); 1824 ASSERT(status == U_ZERO_ERROR); 1825 1826 // The kana workaround requires a normalized copy of the target string. 1827 if (m_targetRequiresKanaWorkaround) 1828 normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); 1829 } 1830 1831 inline SearchBuffer::~SearchBuffer() 1832 { 1833 // Leave the static object pointing to a valid string. 1834 UErrorCode status = U_ZERO_ERROR; 1835 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status); 1836 ASSERT(status == U_ZERO_ERROR); 1837 1838 unlockSearcher(); 1839 } 1840 1841 template<typename CharType> 1842 inline void SearchBuffer::append(const CharType* characters, size_t length) 1843 { 1844 ASSERT(length); 1845 1846 if (m_atBreak) { 1847 m_buffer.shrink(0); 1848 m_prefixLength = 0; 1849 m_atBreak = false; 1850 } else if (m_buffer.size() == m_buffer.capacity()) { 1851 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 1852 m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap); 1853 m_buffer.shrink(m_overlap); 1854 } 1855 1856 size_t oldLength = m_buffer.size(); 1857 size_t usableLength = std::min(m_buffer.capacity() - oldLength, length); 1858 ASSERT(usableLength); 1859 m_buffer.resize(oldLength + usableLength); 1860 UChar* destination = m_buffer.data() + oldLength; 1861 StringImpl::copyChars(destination, characters, usableLength); 1862 foldQuoteMarksAndSoftHyphens(destination, usableLength); 1863 m_numberOfCharactersJustAppended = usableLength; 1864 } 1865 1866 inline bool SearchBuffer::needsMoreContext() const 1867 { 1868 return m_needsMoreContext; 1869 } 1870 1871 inline void SearchBuffer::prependContext(const UChar* characters, size_t length) 1872 { 1873 ASSERT(m_needsMoreContext); 1874 ASSERT(m_prefixLength == m_buffer.size()); 1875 1876 if (!length) 1877 return; 1878 1879 m_atBreak = false; 1880 1881 size_t wordBoundaryContextStart = length; 1882 if (wordBoundaryContextStart) { 1883 U16_BACK_1(characters, 0, wordBoundaryContextStart); 1884 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); 1885 } 1886 1887 size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); 1888 m_buffer.prepend(characters + length - usableLength, usableLength); 1889 m_prefixLength += usableLength; 1890 1891 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 1892 m_needsMoreContext = false; 1893 } 1894 1895 inline bool SearchBuffer::atBreak() const 1896 { 1897 return m_atBreak; 1898 } 1899 1900 inline void SearchBuffer::reachedBreak() 1901 { 1902 m_atBreak = true; 1903 } 1904 1905 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 1906 { 1907 // This function implements the kana workaround. If usearch treats 1908 // it as a match, but we do not want to, then it's a "bad match". 1909 if (!m_targetRequiresKanaWorkaround) 1910 return false; 1911 1912 // Normalize into a match buffer. We reuse a single buffer rather than 1913 // creating a new one each time. 1914 normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch); 1915 1916 return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size()); 1917 } 1918 1919 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 1920 { 1921 ASSERT(m_options & AtWordStarts); 1922 1923 if (!start) 1924 return true; 1925 1926 int size = m_buffer.size(); 1927 int offset = start; 1928 UChar32 firstCharacter; 1929 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 1930 1931 if (m_options & TreatMedialCapitalAsWordStart) { 1932 UChar32 previousCharacter; 1933 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 1934 1935 if (isSeparator(firstCharacter)) { 1936 // The start of a separator run is a word start (".org" in "webkit.org"). 1937 if (!isSeparator(previousCharacter)) 1938 return true; 1939 } else if (isASCIIUpper(firstCharacter)) { 1940 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 1941 if (!isASCIIUpper(previousCharacter)) 1942 return true; 1943 // The last character of an uppercase run followed by a non-separator, non-digit 1944 // is a word start ("Request" in "XMLHTTPRequest"). 1945 offset = start; 1946 U16_FWD_1(m_buffer.data(), offset, size); 1947 UChar32 nextCharacter = 0; 1948 if (offset < size) 1949 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 1950 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 1951 return true; 1952 } else if (isASCIIDigit(firstCharacter)) { 1953 // The start of a digit run is a word start ("2" in "WebKit2"). 1954 if (!isASCIIDigit(previousCharacter)) 1955 return true; 1956 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 1957 // The start of a non-separator, non-uppercase, non-digit run is a word start, 1958 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 1959 return true; 1960 } 1961 } 1962 1963 // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes 1964 // a word, so treat the position before any CJK character as a word start. 1965 if (Character::isCJKIdeographOrSymbol(firstCharacter)) 1966 return true; 1967 1968 size_t wordBreakSearchStart = start + length; 1969 while (wordBreakSearchStart > start) 1970 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); 1971 return wordBreakSearchStart == start; 1972 } 1973 1974 inline size_t SearchBuffer::search(size_t& start) 1975 { 1976 size_t size = m_buffer.size(); 1977 if (m_atBreak) { 1978 if (!size) 1979 return 0; 1980 } else { 1981 if (size != m_buffer.capacity()) 1982 return 0; 1983 } 1984 1985 UStringSearch* searcher = WebCore::searcher(); 1986 1987 UErrorCode status = U_ZERO_ERROR; 1988 usearch_setText(searcher, m_buffer.data(), size, &status); 1989 ASSERT(status == U_ZERO_ERROR); 1990 1991 usearch_setOffset(searcher, m_prefixLength, &status); 1992 ASSERT(status == U_ZERO_ERROR); 1993 1994 int matchStart = usearch_next(searcher, &status); 1995 ASSERT(status == U_ZERO_ERROR); 1996 1997 nextMatch: 1998 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 1999 ASSERT(matchStart == USEARCH_DONE); 2000 return 0; 2001 } 2002 2003 // Matches that start in the overlap area are only tentative. 2004 // The same match may appear later, matching more characters, 2005 // possibly including a combining character that's not yet in the buffer. 2006 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 2007 size_t overlap = m_overlap; 2008 if (m_options & AtWordStarts) { 2009 // Ensure that there is sufficient context before matchStart the next time around for 2010 // determining if it is at a word boundary. 2011 int wordBoundaryContextStart = matchStart; 2012 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 2013 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); 2014 overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart)); 2015 } 2016 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 2017 m_prefixLength -= std::min(m_prefixLength, size - overlap); 2018 m_buffer.shrink(overlap); 2019 return 0; 2020 } 2021 2022 size_t matchedLength = usearch_getMatchedLength(searcher); 2023 ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); 2024 2025 // If this match is "bad", move on to the next match. 2026 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { 2027 matchStart = usearch_next(searcher, &status); 2028 ASSERT(status == U_ZERO_ERROR); 2029 goto nextMatch; 2030 } 2031 2032 size_t newSize = size - (matchStart + 1); 2033 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 2034 m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1); 2035 m_buffer.shrink(newSize); 2036 2037 start = size - matchStart; 2038 return matchedLength; 2039 } 2040 2041 // -------- 2042 2043 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) 2044 { 2045 int length = 0; 2046 TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter; 2047 if (forSelectionPreservation) 2048 behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions; 2049 for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance()) 2050 length += it.length(); 2051 2052 return length; 2053 } 2054 2055 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 2056 { 2057 CharacterIterator entireRangeIterator(entireRange); 2058 Position start; 2059 Position end; 2060 calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end); 2061 return Range::create(entireRange->ownerDocument(), start, end); 2062 } 2063 2064 // -------- 2065 2066 String plainText(const Range* r, TextIteratorBehaviorFlags behavior) 2067 { 2068 // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192 2069 static const unsigned initialCapacity = 1 << 15; 2070 2071 unsigned bufferLength = 0; 2072 StringBuilder builder; 2073 builder.reserveCapacity(initialCapacity); 2074 2075 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { 2076 it.appendTextToStringBuilder(builder); 2077 bufferLength += it.length(); 2078 } 2079 2080 if (!bufferLength) 2081 return emptyString(); 2082 2083 return builder.toString(); 2084 } 2085 2086 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward) 2087 { 2088 RefPtrWillBeRawPtr<Range> result = range->cloneRange(); 2089 result->collapse(!forward); 2090 return result.release(); 2091 } 2092 2093 // Check if there's any unpaird surrogate code point. 2094 // Non-character code points are not checked. 2095 static bool isValidUTF16(const String& s) 2096 { 2097 if (s.is8Bit()) 2098 return true; 2099 const UChar* ustr = s.characters16(); 2100 size_t length = s.length(); 2101 size_t position = 0; 2102 while (position < length) { 2103 UChar32 character; 2104 U16_NEXT(ustr, position, length, character); 2105 if (U_IS_SURROGATE(character)) 2106 return false; 2107 } 2108 return true; 2109 } 2110 2111 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) 2112 { 2113 matchStart = 0; 2114 size_t matchLength = 0; 2115 2116 if (!isValidUTF16(target)) 2117 return 0; 2118 2119 SearchBuffer buffer(target, options); 2120 2121 if (buffer.needsMoreContext()) { 2122 RefPtrWillBeRawPtr<Range> startRange = it.range(); 2123 RefPtrWillBeRawPtr<Range> beforeStartRange = startRange->ownerDocument().createRange(); 2124 beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION); 2125 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2126 Vector<UChar, 1024> characters; 2127 backwardsIterator.prependTextTo(characters); 2128 buffer.prependContext(characters.data(), characters.size()); 2129 if (!buffer.needsMoreContext()) 2130 break; 2131 } 2132 } 2133 2134 while (!it.atEnd()) { 2135 it.appendTextTo(buffer); 2136 it.advance(buffer.numberOfCharactersJustAppended()); 2137 tryAgain: 2138 size_t matchStartOffset; 2139 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2140 // Note that we found a match, and where we found it. 2141 size_t lastCharacterInBufferOffset = it.characterOffset(); 2142 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2143 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2144 matchLength = newMatchLength; 2145 // If searching forward, stop on the first match. 2146 // If searching backward, don't stop, so we end up with the last match. 2147 if (!(options & Backwards)) 2148 break; 2149 goto tryAgain; 2150 } 2151 if (it.atBreak() && !buffer.atBreak()) { 2152 buffer.reachedBreak(); 2153 goto tryAgain; 2154 } 2155 } 2156 2157 return matchLength; 2158 } 2159 2160 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots; 2161 2162 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) 2163 { 2164 // CharacterIterator requires renderers to be up-to-date 2165 range->ownerDocument().updateLayout(); 2166 2167 // First, find the text. 2168 size_t matchStart; 2169 size_t matchLength; 2170 { 2171 CharacterIterator findIterator(range, iteratorFlagsForFindPlainText); 2172 matchLength = findPlainTextInternal(findIterator, target, options, matchStart); 2173 if (!matchLength) 2174 return collapsedToBoundary(range, !(options & Backwards)); 2175 } 2176 2177 // Then, find the document position of the start and the end of the text. 2178 CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText); 2179 Position resultStart; 2180 Position resultEnd; 2181 calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd); 2182 return Range::create(range->ownerDocument(), resultStart, resultEnd); 2183 } 2184 2185 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd) 2186 { 2187 resultStart.clear(); 2188 resultEnd.clear(); 2189 // CharacterIterator requires renderers to be up-to-date. 2190 if (!inputStart.inDocument()) 2191 return; 2192 ASSERT(inputStart.document() == inputEnd.document()); 2193 inputStart.document()->updateLayout(); 2194 2195 // FIXME: Reduce the code duplication with above (but how?). 2196 size_t matchStart; 2197 size_t matchLength; 2198 { 2199 CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText); 2200 matchLength = findPlainTextInternal(findIterator, target, options, matchStart); 2201 if (!matchLength) { 2202 const Position& collapseTo = options & Backwards ? inputStart : inputEnd; 2203 resultStart = collapseTo; 2204 resultEnd = collapseTo; 2205 return; 2206 } 2207 } 2208 2209 CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText); 2210 calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd); 2211 } 2212 2213 } 2214