1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "core/editing/TextIterator.h" 29 30 #include "HTMLNames.h" 31 #include "bindings/v8/ExceptionStatePlaceholder.h" 32 #include "core/dom/Document.h" 33 #include "core/dom/NodeTraversal.h" 34 #include "core/dom/shadow/ShadowRoot.h" 35 #include "core/editing/VisiblePosition.h" 36 #include "core/editing/VisibleUnits.h" 37 #include "core/editing/htmlediting.h" 38 #include "core/html/HTMLElement.h" 39 #include "core/html/HTMLTextFormControlElement.h" 40 #include "core/rendering/InlineTextBox.h" 41 #include "core/rendering/RenderImage.h" 42 #include "core/rendering/RenderTableCell.h" 43 #include "core/rendering/RenderTableRow.h" 44 #include "core/rendering/RenderTextControl.h" 45 #include "core/rendering/RenderTextFragment.h" 46 #include "platform/fonts/Font.h" 47 #include "platform/text/TextBoundaries.h" 48 #include "platform/text/TextBreakIteratorInternalICU.h" 49 #include "platform/text/UnicodeUtilities.h" 50 #include "wtf/text/CString.h" 51 #include "wtf/text/StringBuilder.h" 52 #include "wtf/unicode/CharacterNames.h" 53 #include <unicode/usearch.h> 54 55 using namespace WTF::Unicode; 56 using namespace std; 57 58 namespace WebCore { 59 60 using namespace HTMLNames; 61 62 // Buffer that knows how to compare with a search target. 63 // Keeps enough of the previous text to be able to search in the future, but no more. 64 // Non-breaking spaces are always equal to normal spaces. 65 // Case folding is also done if the CaseInsensitive option is specified. 66 // Matches are further filtered if the AtWordStarts option is specified, although some 67 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 68 class SearchBuffer { 69 WTF_MAKE_NONCOPYABLE(SearchBuffer); 70 public: 71 SearchBuffer(const String& target, FindOptions); 72 ~SearchBuffer(); 73 74 // Returns number of characters appended; guaranteed to be in the range [1, length]. 75 template<typename CharType> 76 void append(const CharType*, size_t length); 77 size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; } 78 79 bool needsMoreContext() const; 80 void prependContext(const UChar*, size_t length); 81 void reachedBreak(); 82 83 // Result is the size in characters of what was found. 84 // And <startOffset> is the number of characters back to the start of what was found. 85 size_t search(size_t& startOffset); 86 bool atBreak() const; 87 88 private: 89 bool isBadMatch(const UChar*, size_t length) const; 90 bool isWordStartMatch(size_t start, size_t length) const; 91 92 Vector<UChar> m_target; 93 FindOptions m_options; 94 95 Vector<UChar> m_buffer; 96 size_t m_overlap; 97 size_t m_prefixLength; 98 size_t m_numberOfCharactersJustAppended; 99 bool m_atBreak; 100 bool m_needsMoreContext; 101 102 bool m_targetRequiresKanaWorkaround; 103 Vector<UChar> m_normalizedTarget; 104 mutable Vector<UChar> m_normalizedMatch; 105 }; 106 107 // -------- 108 109 static const unsigned bitsInWord = sizeof(unsigned) * 8; 110 static const unsigned bitInWordMask = bitsInWord - 1; 111 112 BitStack::BitStack() 113 : m_size(0) 114 { 115 } 116 117 BitStack::~BitStack() 118 { 119 } 120 121 void BitStack::push(bool bit) 122 { 123 unsigned index = m_size / bitsInWord; 124 unsigned shift = m_size & bitInWordMask; 125 if (!shift && index == m_words.size()) { 126 m_words.grow(index + 1); 127 m_words[index] = 0; 128 } 129 unsigned& word = m_words[index]; 130 unsigned mask = 1U << shift; 131 if (bit) 132 word |= mask; 133 else 134 word &= ~mask; 135 ++m_size; 136 } 137 138 void BitStack::pop() 139 { 140 if (m_size) 141 --m_size; 142 } 143 144 bool BitStack::top() const 145 { 146 if (!m_size) 147 return false; 148 unsigned shift = (m_size - 1) & bitInWordMask; 149 return m_words.last() & (1U << shift); 150 } 151 152 unsigned BitStack::size() const 153 { 154 return m_size; 155 } 156 157 // -------- 158 159 #if !ASSERT_DISABLED 160 161 static unsigned depthCrossingShadowBoundaries(Node* node) 162 { 163 unsigned depth = 0; 164 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 165 ++depth; 166 return depth; 167 } 168 169 #endif 170 171 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 172 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) 173 { 174 if (!rangeEndContainer) 175 return 0; 176 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { 177 if (Node* next = rangeEndContainer->childNode(rangeEndOffset)) 178 return next; 179 } 180 for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) { 181 if (Node* next = node->nextSibling()) 182 return next; 183 } 184 return 0; 185 } 186 187 // -------- 188 189 static inline bool fullyClipsContents(Node* node) 190 { 191 RenderObject* renderer = node->renderer(); 192 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 193 return false; 194 return toRenderBox(renderer)->size().isEmpty(); 195 } 196 197 static inline bool ignoresContainerClip(Node* node) 198 { 199 RenderObject* renderer = node->renderer(); 200 if (!renderer || renderer->isText()) 201 return false; 202 return renderer->style()->hasOutOfFlowPosition(); 203 } 204 205 static void pushFullyClippedState(BitStack& stack, Node* node) 206 { 207 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 208 209 // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364> 210 // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if 211 // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches 212 // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics 213 // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox). 214 // 215 // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering 216 // primitives, not with DOM primitives. 217 218 // Push true if this node full clips its contents, or if a parent already has fully 219 // clipped and this is not a node that ignores its container's clip. 220 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 221 } 222 223 static void setUpFullyClippedStack(BitStack& stack, Node* node) 224 { 225 // Put the nodes in a vector so we can iterate in reverse order. 226 Vector<Node*, 100> ancestry; 227 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 228 ancestry.append(parent); 229 230 // Call pushFullyClippedState on each node starting with the earliest ancestor. 231 size_t size = ancestry.size(); 232 for (size_t i = 0; i < size; ++i) 233 pushFullyClippedState(stack, ancestry[size - i - 1]); 234 pushFullyClippedState(stack, node); 235 236 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 237 } 238 239 // -------- 240 241 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior) 242 : m_shadowDepth(0) 243 , m_startContainer(0) 244 , m_startOffset(0) 245 , m_endContainer(0) 246 , m_endOffset(0) 247 , m_positionNode(0) 248 , m_textLength(0) 249 , m_remainingTextBox(0) 250 , m_firstLetterText(0) 251 , m_sortedTextBoxesPosition(0) 252 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 253 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 254 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) 255 , m_handledFirstLetter(false) 256 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 257 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 258 , m_shouldStop(false) 259 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) 260 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots) 261 { 262 if (!range) 263 return; 264 265 // get and validate the range endpoints 266 Node* startContainer = range->startContainer(); 267 if (!startContainer) 268 return; 269 int startOffset = range->startOffset(); 270 Node* endContainer = range->endContainer(); 271 int endOffset = range->endOffset(); 272 273 // Callers should be handing us well-formed ranges. If we discover that this isn't 274 // the case, we could consider changing this assertion to an early return. 275 ASSERT(range->boundaryPointsValid()); 276 277 // remember range - this does not change 278 m_startContainer = startContainer; 279 m_startOffset = startOffset; 280 m_endContainer = endContainer; 281 m_endOffset = endOffset; 282 283 // set up the current node for processing 284 m_node = range->firstNode(); 285 if (!m_node) 286 return; 287 setUpFullyClippedStack(m_fullyClippedStack, m_node); 288 m_offset = m_node == m_startContainer ? m_startOffset : 0; 289 m_iterationProgress = HandledNone; 290 291 // calculate first out of bounds node 292 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); 293 294 // initialize node processing state 295 m_needsAnotherNewline = false; 296 m_textBox = 0; 297 298 // initialize record of previous node processing 299 m_hasEmitted = false; 300 m_lastTextNode = 0; 301 m_lastTextNodeEndedWithCollapsedSpace = false; 302 m_lastCharacter = 0; 303 304 // identify the first run 305 advance(); 306 } 307 308 TextIterator::~TextIterator() 309 { 310 } 311 312 void TextIterator::advance() 313 { 314 if (m_shouldStop) 315 return; 316 317 // reset the run information 318 m_positionNode = 0; 319 m_textLength = 0; 320 321 // handle remembered node that needed a newline after the text node's newline 322 if (m_needsAnotherNewline) { 323 // Emit the extra newline, and position it *inside* m_node, after m_node's 324 // contents, in case it's a block, in the same way that we position the first 325 // newline. The range for the emitted newline should start where the line 326 // break begins. 327 // FIXME: It would be cleaner if we emitted two newlines during the last 328 // iteration, instead of using m_needsAnotherNewline. 329 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 330 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 331 m_needsAnotherNewline = false; 332 return; 333 } 334 335 if (!m_textBox && m_remainingTextBox) { 336 m_textBox = m_remainingTextBox; 337 m_remainingTextBox = 0; 338 m_firstLetterText = 0; 339 m_offset = 0; 340 } 341 // handle remembered text box 342 if (m_textBox) { 343 handleTextBox(); 344 if (m_positionNode) 345 return; 346 } 347 348 while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) { 349 if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) 350 m_shouldStop = true; 351 352 // if the range ends at offset 0 of an element, represent the 353 // position, but not the content, of that element e.g. if the 354 // node is a blockflow element, emit a newline that 355 // precedes the element 356 if (m_node == m_endContainer && !m_endOffset) { 357 representNodeOffsetZero(); 358 m_node = 0; 359 return; 360 } 361 362 RenderObject* renderer = m_node->renderer(); 363 if (!renderer) { 364 if (m_node->isShadowRoot()) { 365 // A shadow root doesn't have a renderer, but we want to visit children anyway. 366 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress; 367 } else { 368 m_iterationProgress = HandledChildren; 369 } 370 } else { 371 // Enter author shadow roots, from youngest, if any and if necessary. 372 if (m_iterationProgress < HandledAuthorShadowRoots) { 373 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) { 374 ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot(); 375 ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot); 376 m_node = youngestShadowRoot; 377 m_iterationProgress = HandledNone; 378 ++m_shadowDepth; 379 pushFullyClippedState(m_fullyClippedStack, m_node); 380 continue; 381 } 382 383 m_iterationProgress = HandledAuthorShadowRoots; 384 } 385 386 // Enter user-agent shadow root, if necessary. 387 if (m_iterationProgress < HandledUserAgentShadowRoot) { 388 if (m_entersTextControls && renderer->isTextControl()) { 389 ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot(); 390 ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot); 391 m_node = userAgentShadowRoot; 392 m_iterationProgress = HandledNone; 393 ++m_shadowDepth; 394 pushFullyClippedState(m_fullyClippedStack, m_node); 395 continue; 396 } 397 m_iterationProgress = HandledUserAgentShadowRoot; 398 } 399 400 // Handle the current node according to its type. 401 if (m_iterationProgress < HandledNode) { 402 bool handledNode = false; 403 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE? 404 handledNode = handleTextNode(); 405 } else if (renderer && (renderer->isImage() || renderer->isWidget() 406 || (renderer->node() && renderer->node()->isElementNode() 407 && (toElement(renderer->node())->isFormControlElement() 408 || toElement(renderer->node())->hasTagName(legendTag) 409 || toElement(renderer->node())->hasTagName(meterTag) 410 || toElement(renderer->node())->hasTagName(progressTag))))) { 411 handledNode = handleReplacedElement(); 412 } else { 413 handledNode = handleNonTextNode(); 414 } 415 if (handledNode) 416 m_iterationProgress = HandledNode; 417 if (m_positionNode) 418 return; 419 } 420 } 421 422 // Find a new current node to handle in depth-first manner, 423 // calling exitNode() as we come back thru a parent node. 424 // 425 // 1. Iterate over child nodes, if we haven't done yet. 426 Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0; 427 m_offset = 0; 428 if (!next) { 429 // 2. If we've already iterated children or they are not available, go to the next sibling node. 430 next = m_node->nextSibling(); 431 if (!next) { 432 // 3. If we are at the last child, go up the node tree until we find a next sibling. 433 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode; 434 Node* parentNode = m_node->parentNode(); 435 while (!next && parentNode) { 436 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 437 return; 438 bool haveRenderer = m_node->renderer(); 439 m_node = parentNode; 440 m_fullyClippedStack.pop(); 441 parentNode = m_node->parentNode(); 442 if (haveRenderer) 443 exitNode(); 444 if (m_positionNode) { 445 m_iterationProgress = HandledChildren; 446 return; 447 } 448 next = m_node->nextSibling(); 449 } 450 451 if (!next && !parentNode && m_shadowDepth > 0) { 452 // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next 453 // sibling shadow root, if any. 454 ShadowRoot* shadowRoot = toShadowRoot(m_node); 455 if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) { 456 ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot(); 457 if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) { 458 m_fullyClippedStack.pop(); 459 m_node = nextShadowRoot; 460 m_iterationProgress = HandledNone; 461 // m_shadowDepth is unchanged since we exit from a shadow root and enter another. 462 pushFullyClippedState(m_fullyClippedStack, m_node); 463 } else { 464 // We are the last shadow root; exit from here and go back to where we were. 465 m_node = shadowRoot->host(); 466 m_iterationProgress = HandledAuthorShadowRoots; 467 --m_shadowDepth; 468 m_fullyClippedStack.pop(); 469 } 470 } else { 471 // If we are in a user-agent shadow root, then go back to the host. 472 ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot); 473 m_node = shadowRoot->host(); 474 m_iterationProgress = HandledUserAgentShadowRoot; 475 --m_shadowDepth; 476 m_fullyClippedStack.pop(); 477 } 478 m_handledFirstLetter = false; 479 m_firstLetterText = 0; 480 continue; 481 } 482 } 483 m_fullyClippedStack.pop(); 484 } 485 486 // set the new current node 487 m_node = next; 488 if (m_node) 489 pushFullyClippedState(m_fullyClippedStack, m_node); 490 m_iterationProgress = HandledNone; 491 m_handledFirstLetter = false; 492 m_firstLetterText = 0; 493 494 // how would this ever be? 495 if (m_positionNode) 496 return; 497 } 498 } 499 500 UChar TextIterator::characterAt(unsigned index) const 501 { 502 ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length())); 503 if (!(index < static_cast<unsigned>(length()))) 504 return 0; 505 506 if (m_singleCharacterBuffer) { 507 ASSERT(!index); 508 ASSERT(length() == 1); 509 return m_singleCharacterBuffer; 510 } 511 512 return string()[startOffset() + index]; 513 } 514 515 String TextIterator::substring(unsigned position, unsigned length) const 516 { 517 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length())); 518 ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length())); 519 if (!length) 520 return emptyString(); 521 if (m_singleCharacterBuffer) { 522 ASSERT(!position); 523 ASSERT(length == 1); 524 return String(&m_singleCharacterBuffer, 1); 525 } 526 return string().substring(startOffset() + position, length); 527 } 528 529 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const 530 { 531 unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength); 532 if (!lengthToAppend) 533 return; 534 if (m_singleCharacterBuffer) { 535 ASSERT(!position); 536 builder.append(m_singleCharacterBuffer); 537 } else { 538 builder.append(string(), startOffset() + position, lengthToAppend); 539 } 540 } 541 542 bool TextIterator::handleTextNode() 543 { 544 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) 545 return false; 546 547 RenderText* renderer = toRenderText(m_node->renderer()); 548 549 m_lastTextNode = m_node; 550 String str = renderer->text(); 551 552 // handle pre-formatted text 553 if (!renderer->style()->collapseWhiteSpace()) { 554 int runStart = m_offset; 555 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 556 emitCharacter(' ', m_node, 0, runStart, runStart); 557 return false; 558 } 559 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) { 560 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 561 if (m_firstLetterText) { 562 String firstLetter = m_firstLetterText->text(); 563 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length()); 564 m_firstLetterText = 0; 565 m_textBox = 0; 566 return false; 567 } 568 } 569 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 570 return false; 571 int strLength = str.length(); 572 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; 573 int runEnd = min(strLength, end); 574 575 if (runStart >= runEnd) 576 return true; 577 578 emitText(m_node, runStart, runEnd); 579 return true; 580 } 581 582 if (renderer->firstTextBox()) 583 m_textBox = renderer->firstTextBox(); 584 585 bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset; 586 if (shouldHandleFirstLetter) 587 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 588 589 if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) { 590 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 591 return false; 592 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 593 return true; 594 } 595 596 if (m_firstLetterText) 597 renderer = m_firstLetterText; 598 599 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 600 if (renderer->containsReversedText()) { 601 m_sortedTextBoxes.clear(); 602 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { 603 m_sortedTextBoxes.append(textBox); 604 } 605 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 606 m_sortedTextBoxesPosition = 0; 607 m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]; 608 } 609 610 handleTextBox(); 611 return true; 612 } 613 614 void TextIterator::handleTextBox() 615 { 616 RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer()); 617 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) { 618 m_textBox = 0; 619 return; 620 } 621 String str = renderer->text(); 622 unsigned start = m_offset; 623 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX; 624 while (m_textBox) { 625 unsigned textBoxStart = m_textBox->start(); 626 unsigned runStart = max(textBoxStart, start); 627 628 // Check for collapsed space at the start of this run. 629 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 630 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace 631 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); 632 if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) { 633 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { 634 unsigned spaceRunStart = runStart - 1; 635 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') 636 --spaceRunStart; 637 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); 638 } else { 639 emitCharacter(' ', m_node, 0, runStart, runStart); 640 } 641 return; 642 } 643 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 644 unsigned runEnd = min(textBoxEnd, end); 645 646 // Determine what the next text box will be, but don't advance yet 647 InlineTextBox* nextTextBox = 0; 648 if (renderer->containsReversedText()) { 649 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 650 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 651 } else { 652 nextTextBox = m_textBox->nextTextBox(); 653 } 654 ASSERT(!nextTextBox || nextTextBox->renderer() == renderer); 655 656 if (runStart < runEnd) { 657 // Handle either a single newline character (which becomes a space), 658 // or a run of characters that does not include a newline. 659 // This effectively translates newlines to spaces without copying the text. 660 if (str[runStart] == '\n') { 661 emitCharacter(' ', m_node, 0, runStart, runStart + 1); 662 m_offset = runStart + 1; 663 } else { 664 size_t subrunEnd = str.find('\n', runStart); 665 if (subrunEnd == kNotFound || subrunEnd > runEnd) 666 subrunEnd = runEnd; 667 668 m_offset = subrunEnd; 669 emitText(m_node, renderer, runStart, subrunEnd); 670 } 671 672 // If we are doing a subrun that doesn't go to the end of the text box, 673 // come back again to finish handling this text box; don't advance to the next one. 674 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 675 return; 676 677 // Advance and return 678 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); 679 if (nextRunStart > runEnd) 680 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 681 m_textBox = nextTextBox; 682 if (renderer->containsReversedText()) 683 ++m_sortedTextBoxesPosition; 684 return; 685 } 686 // Advance and continue 687 m_textBox = nextTextBox; 688 if (renderer->containsReversedText()) 689 ++m_sortedTextBoxesPosition; 690 } 691 if (!m_textBox && m_remainingTextBox) { 692 m_textBox = m_remainingTextBox; 693 m_remainingTextBox = 0; 694 m_firstLetterText = 0; 695 m_offset = 0; 696 handleTextBox(); 697 } 698 } 699 700 static inline RenderText* firstRenderTextInFirstLetter(RenderObject* firstLetter) 701 { 702 if (!firstLetter) 703 return 0; 704 705 // FIXME: Should this check descendent objects? 706 for (RenderObject* current = firstLetter->firstChild(); current; current = current->nextSibling()) { 707 if (current->isText()) 708 return toRenderText(current); 709 } 710 return 0; 711 } 712 713 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) 714 { 715 if (renderer->firstLetter()) { 716 RenderObject* r = renderer->firstLetter(); 717 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 718 return; 719 if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) { 720 m_handledFirstLetter = true; 721 m_remainingTextBox = m_textBox; 722 m_textBox = firstLetter->firstTextBox(); 723 m_sortedTextBoxes.clear(); 724 m_firstLetterText = firstLetter; 725 } 726 } 727 m_handledFirstLetter = true; 728 } 729 730 bool TextIterator::handleReplacedElement() 731 { 732 if (m_fullyClippedStack.top()) 733 return false; 734 735 RenderObject* renderer = m_node->renderer(); 736 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 737 return false; 738 739 if (m_lastTextNodeEndedWithCollapsedSpace) { 740 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 741 return false; 742 } 743 744 if (m_entersTextControls && renderer->isTextControl()) { 745 // The shadow tree should be already visited. 746 return true; 747 } 748 749 m_hasEmitted = true; 750 751 if (m_emitsCharactersBetweenAllVisiblePositions) { 752 // We want replaced elements to behave like punctuation for boundary 753 // finding, and to simply take up space for the selection preservation 754 // code in moveParagraphs, so we use a comma. 755 emitCharacter(',', m_node->parentNode(), m_node, 0, 1); 756 return true; 757 } 758 759 m_positionNode = m_node->parentNode(); 760 m_positionOffsetBaseNode = m_node; 761 m_positionStartOffset = 0; 762 m_positionEndOffset = 1; 763 m_singleCharacterBuffer = 0; 764 765 if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) { 766 m_text = toRenderImage(renderer)->altText(); 767 if (!m_text.isEmpty()) { 768 m_textLength = m_text.length(); 769 m_lastCharacter = m_text[m_textLength - 1]; 770 return true; 771 } 772 } 773 774 m_textLength = 0; 775 m_lastCharacter = 0; 776 777 return true; 778 } 779 780 bool TextIterator::hasVisibleTextNode(RenderText* renderer) 781 { 782 if (renderer->style()->visibility() == VISIBLE) 783 return true; 784 if (renderer->isTextFragment()) { 785 RenderTextFragment* fragment = toRenderTextFragment(renderer); 786 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE) 787 return true; 788 } 789 return false; 790 } 791 792 static bool shouldEmitTabBeforeNode(Node* node) 793 { 794 RenderObject* r = node->renderer(); 795 796 // Table cells are delimited by tabs. 797 if (!r || !isTableCell(node)) 798 return false; 799 800 // Want a tab before every cell other than the first one 801 RenderTableCell* rc = toRenderTableCell(r); 802 RenderTable* t = rc->table(); 803 return t && (t->cellBefore(rc) || t->cellAbove(rc)); 804 } 805 806 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText) 807 { 808 RenderObject* renderer = node->renderer(); 809 810 if (renderer ? !renderer->isBR() : !node->hasTagName(brTag)) 811 return false; 812 return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->hasTagName(inputTag)); 813 } 814 815 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node) 816 { 817 // Block flow (versus inline flow) is represented by having 818 // a newline both before and after the element. 819 RenderObject* r = node.renderer(); 820 if (!r) { 821 return (node.hasTagName(blockquoteTag) 822 || node.hasTagName(ddTag) 823 || node.hasTagName(divTag) 824 || node.hasTagName(dlTag) 825 || node.hasTagName(dtTag) 826 || node.hasTagName(h1Tag) 827 || node.hasTagName(h2Tag) 828 || node.hasTagName(h3Tag) 829 || node.hasTagName(h4Tag) 830 || node.hasTagName(h5Tag) 831 || node.hasTagName(h6Tag) 832 || node.hasTagName(hrTag) 833 || node.hasTagName(liTag) 834 || node.hasTagName(listingTag) 835 || node.hasTagName(olTag) 836 || node.hasTagName(pTag) 837 || node.hasTagName(preTag) 838 || node.hasTagName(trTag) 839 || node.hasTagName(ulTag)); 840 } 841 842 // Need to make an exception for table cells, because they are blocks, but we 843 // want them tab-delimited rather than having newlines before and after. 844 if (isTableCell(&node)) 845 return false; 846 847 // Need to make an exception for table row elements, because they are neither 848 // "inline" or "RenderBlock", but we want newlines for them. 849 if (r->isTableRow()) { 850 RenderTable* t = toRenderTableRow(r)->table(); 851 if (t && !t->isInline()) 852 return true; 853 } 854 855 return !r->isInline() && r->isRenderBlock() 856 && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText(); 857 } 858 859 static bool shouldEmitNewlineAfterNode(Node& node) 860 { 861 // FIXME: It should be better but slower to create a VisiblePosition here. 862 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 863 return false; 864 // Check if this is the very last renderer in the document. 865 // If so, then we should not emit a newline. 866 Node* next = &node; 867 while ((next = NodeTraversal::nextSkippingChildren(*next))) { 868 if (next->renderer()) 869 return true; 870 } 871 return false; 872 } 873 874 static bool shouldEmitNewlineBeforeNode(Node& node) 875 { 876 return shouldEmitNewlinesBeforeAndAfterNode(node); 877 } 878 879 static bool shouldEmitExtraNewlineForNode(Node* node) 880 { 881 // When there is a significant collapsed bottom margin, emit an extra 882 // newline for a more realistic result. We end up getting the right 883 // result even without margin collapsing. For example: <div><p>text</p></div> 884 // will work right even if both the <div> and the <p> have bottom margins. 885 RenderObject* r = node->renderer(); 886 if (!r || !r->isBox()) 887 return false; 888 889 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears 890 // not to do this at all 891 if (node->hasTagName(h1Tag) 892 || node->hasTagName(h2Tag) 893 || node->hasTagName(h3Tag) 894 || node->hasTagName(h4Tag) 895 || node->hasTagName(h5Tag) 896 || node->hasTagName(h6Tag) 897 || node->hasTagName(pTag)) { 898 RenderStyle* style = r->style(); 899 if (style) { 900 int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); 901 int fontSize = style->fontDescription().computedPixelSize(); 902 if (bottomMargin * 2 >= fontSize) 903 return true; 904 } 905 } 906 907 return false; 908 } 909 910 static int collapsedSpaceLength(RenderText* renderer, int textEnd) 911 { 912 const String& text = renderer->text(); 913 int length = text.length(); 914 for (int i = textEnd; i < length; ++i) { 915 if (!renderer->style()->isCollapsibleWhiteSpace(text[i])) 916 return i - textEnd; 917 } 918 919 return length - textEnd; 920 } 921 922 static int maxOffsetIncludingCollapsedSpaces(Node* node) 923 { 924 int offset = caretMaxOffset(node); 925 926 if (node->renderer() && node->renderer()->isText()) 927 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); 928 929 return offset; 930 } 931 932 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 933 bool TextIterator::shouldRepresentNodeOffsetZero() 934 { 935 if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTable(m_node)) 936 return true; 937 938 // Leave element positioned flush with start of a paragraph 939 // (e.g. do not insert tab before a table cell at the start of a paragraph) 940 if (m_lastCharacter == '\n') 941 return false; 942 943 // Otherwise, show the position if we have emitted any characters 944 if (m_hasEmitted) 945 return true; 946 947 // We've not emitted anything yet. Generally, there is no need for any positioning then. 948 // The only exception is when the element is visually not in the same line as 949 // the start of the range (e.g. the range starts at the end of the previous paragraph). 950 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 951 // make quicker checks to possibly avoid that. Another check that we could make is 952 // is whether the inline vs block flow changed since the previous visible element. 953 // I think we're already in a special enough case that that won't be needed, tho. 954 955 // No character needed if this is the first node in the range. 956 if (m_node == m_startContainer) 957 return false; 958 959 // If we are outside the start container's subtree, assume we need to emit. 960 // FIXME: m_startContainer could be an inline block 961 if (!m_node->isDescendantOf(m_startContainer)) 962 return true; 963 964 // If we started as m_startContainer offset 0 and the current node is a descendant of 965 // the start container, we already had enough context to correctly decide whether to 966 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 967 // so don't second guess that now. 968 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 969 // immaterial since we likely would have already emitted something by now. 970 if (!m_startOffset) 971 return false; 972 973 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 974 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 975 // we would create VisiblePositions on every call to this function without this check. 976 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE 977 || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag))) 978 return false; 979 980 // The startPos.isNotNull() check is needed because the start could be before the body, 981 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 982 // The currPos.isNotNull() check is needed because positions in non-HTML content 983 // (like SVG) do not have visible positions, and we don't want to emit for them either. 984 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 985 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 986 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 987 } 988 989 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) 990 { 991 return isRenderedTable(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); 992 } 993 994 void TextIterator::representNodeOffsetZero() 995 { 996 // Emit a character to show the positioning of m_node. 997 998 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 999 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 1000 // on m_node to see if it necessitates emitting a character first and will early return 1001 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 1002 if (shouldEmitTabBeforeNode(m_node)) { 1003 if (shouldRepresentNodeOffsetZero()) 1004 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); 1005 } else if (shouldEmitNewlineBeforeNode(*m_node)) { 1006 if (shouldRepresentNodeOffsetZero()) 1007 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); 1008 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { 1009 if (shouldRepresentNodeOffsetZero()) 1010 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0); 1011 } 1012 } 1013 1014 bool TextIterator::handleNonTextNode() 1015 { 1016 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText)) 1017 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); 1018 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) 1019 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1); 1020 else 1021 representNodeOffsetZero(); 1022 1023 return true; 1024 } 1025 1026 void TextIterator::exitNode() 1027 { 1028 // prevent emitting a newline when exiting a collapsed block at beginning of the range 1029 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 1030 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 1031 // therefore look like a blank line. 1032 if (!m_hasEmitted) 1033 return; 1034 1035 // Emit with a position *inside* m_node, after m_node's contents, in 1036 // case it is a block, because the run should start where the 1037 // emitted character is positioned visually. 1038 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 1039 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 1040 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 1041 // TextIterator in _web_attributedStringFromRange. 1042 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 1043 if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) { 1044 // use extra newline to represent margin bottom, as needed 1045 bool addNewline = shouldEmitExtraNewlineForNode(m_node); 1046 1047 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 1048 // contain a VisiblePosition when doing selection preservation. 1049 if (m_lastCharacter != '\n') { 1050 // insert a newline with a position following this block's contents. 1051 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 1052 // remember whether to later add a newline for the current node 1053 ASSERT(!m_needsAnotherNewline); 1054 m_needsAnotherNewline = addNewline; 1055 } else if (addNewline) { 1056 // insert a newline with a position following this block's contents. 1057 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 1058 } 1059 } 1060 1061 // If nothing was emitted, see if we need to emit a space. 1062 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) 1063 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1); 1064 } 1065 1066 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 1067 { 1068 m_hasEmitted = true; 1069 1070 // remember information with which to construct the TextIterator::range() 1071 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode 1072 m_positionNode = textNode; 1073 m_positionOffsetBaseNode = offsetBaseNode; 1074 m_positionStartOffset = textStartOffset; 1075 m_positionEndOffset = textEndOffset; 1076 1077 // remember information with which to construct the TextIterator::characters() and length() 1078 m_singleCharacterBuffer = c; 1079 ASSERT(m_singleCharacterBuffer); 1080 m_textLength = 1; 1081 1082 // remember some iteration state 1083 m_lastTextNodeEndedWithCollapsedSpace = false; 1084 m_lastCharacter = c; 1085 } 1086 1087 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset) 1088 { 1089 RenderText* renderer = toRenderText(renderObject); 1090 m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text(); 1091 ASSERT(!m_text.isEmpty()); 1092 ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length())); 1093 ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length())); 1094 ASSERT(textStartOffset <= textEndOffset); 1095 1096 m_positionNode = textNode; 1097 m_positionOffsetBaseNode = 0; 1098 m_positionStartOffset = textStartOffset; 1099 m_positionEndOffset = textEndOffset; 1100 m_singleCharacterBuffer = 0; 1101 m_textLength = textEndOffset - textStartOffset; 1102 m_lastCharacter = m_text[textEndOffset - 1]; 1103 1104 m_lastTextNodeEndedWithCollapsedSpace = false; 1105 m_hasEmitted = true; 1106 } 1107 1108 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) 1109 { 1110 emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset); 1111 } 1112 1113 PassRefPtr<Range> TextIterator::range() const 1114 { 1115 // use the current run information, if we have it 1116 if (m_positionNode) { 1117 if (m_positionOffsetBaseNode) { 1118 int index = m_positionOffsetBaseNode->nodeIndex(); 1119 m_positionStartOffset += index; 1120 m_positionEndOffset += index; 1121 m_positionOffsetBaseNode = 0; 1122 } 1123 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1124 } 1125 1126 // otherwise, return the end of the overall range we were given 1127 if (m_endContainer) 1128 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); 1129 1130 return 0; 1131 } 1132 1133 Node* TextIterator::node() const 1134 { 1135 RefPtr<Range> textRange = range(); 1136 if (!textRange) 1137 return 0; 1138 1139 Node* node = textRange->startContainer(); 1140 if (!node) 1141 return 0; 1142 if (node->offsetInCharacters()) 1143 return node; 1144 1145 return node->childNode(textRange->startOffset()); 1146 } 1147 1148 // -------- 1149 1150 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior) 1151 : m_node(0) 1152 , m_offset(0) 1153 , m_handledNode(false) 1154 , m_handledChildren(false) 1155 , m_startNode(0) 1156 , m_startOffset(0) 1157 , m_endNode(0) 1158 , m_endOffset(0) 1159 , m_positionNode(0) 1160 , m_positionStartOffset(0) 1161 , m_positionEndOffset(0) 1162 , m_textOffset(0) 1163 , m_textLength(0) 1164 , m_lastTextNode(0) 1165 , m_lastCharacter(0) 1166 , m_singleCharacterBuffer(0) 1167 , m_havePassedStartNode(false) 1168 , m_shouldHandleFirstLetter(false) 1169 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1170 , m_shouldStop(false) 1171 , m_emitsOriginalText(false) 1172 { 1173 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); 1174 1175 if (!r) 1176 return; 1177 1178 Node* startNode = r->startContainer(); 1179 if (!startNode) 1180 return; 1181 Node* endNode = r->endContainer(); 1182 int startOffset = r->startOffset(); 1183 int endOffset = r->endOffset(); 1184 1185 if (!startNode->offsetInCharacters()) { 1186 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { 1187 startNode = startNode->childNode(startOffset); 1188 startOffset = 0; 1189 } 1190 } 1191 if (!endNode->offsetInCharacters()) { 1192 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) { 1193 endNode = endNode->childNode(endOffset - 1); 1194 endOffset = lastOffsetInNode(endNode); 1195 } 1196 } 1197 1198 m_node = endNode; 1199 setUpFullyClippedStack(m_fullyClippedStack, m_node); 1200 m_offset = endOffset; 1201 m_handledNode = false; 1202 m_handledChildren = !endOffset; 1203 1204 m_startNode = startNode; 1205 m_startOffset = startOffset; 1206 m_endNode = endNode; 1207 m_endOffset = endOffset; 1208 1209 #ifndef NDEBUG 1210 // Need this just because of the assert. 1211 m_positionNode = endNode; 1212 #endif 1213 1214 m_lastTextNode = 0; 1215 m_lastCharacter = '\n'; 1216 1217 m_havePassedStartNode = false; 1218 1219 advance(); 1220 } 1221 1222 void SimplifiedBackwardsTextIterator::advance() 1223 { 1224 ASSERT(m_positionNode); 1225 1226 if (m_shouldStop) 1227 return; 1228 1229 if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) { 1230 m_shouldStop = true; 1231 return; 1232 } 1233 1234 m_positionNode = 0; 1235 m_textLength = 0; 1236 1237 while (m_node && !m_havePassedStartNode) { 1238 // Don't handle node if we start iterating at [node, 0]. 1239 if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) { 1240 RenderObject* renderer = m_node->renderer(); 1241 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { 1242 // FIXME: What about CDATA_SECTION_NODE? 1243 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1244 m_handledNode = handleTextNode(); 1245 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1246 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1247 m_handledNode = handleReplacedElement(); 1248 } else { 1249 m_handledNode = handleNonTextNode(); 1250 } 1251 if (m_positionNode) 1252 return; 1253 } 1254 1255 if (!m_handledChildren && m_node->hasChildNodes()) { 1256 m_node = m_node->lastChild(); 1257 pushFullyClippedState(m_fullyClippedStack, m_node); 1258 } else { 1259 // Exit empty containers as we pass over them or containers 1260 // where [container, 0] is where we started iterating. 1261 if (!m_handledNode 1262 && canHaveChildrenForEditing(m_node) 1263 && m_node->parentNode() 1264 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { 1265 exitNode(); 1266 if (m_positionNode) { 1267 m_handledNode = true; 1268 m_handledChildren = true; 1269 return; 1270 } 1271 } 1272 1273 // Exit all other containers. 1274 while (!m_node->previousSibling()) { 1275 if (!advanceRespectingRange(m_node->parentOrShadowHostNode())) 1276 break; 1277 m_fullyClippedStack.pop(); 1278 exitNode(); 1279 if (m_positionNode) { 1280 m_handledNode = true; 1281 m_handledChildren = true; 1282 return; 1283 } 1284 } 1285 1286 m_fullyClippedStack.pop(); 1287 if (advanceRespectingRange(m_node->previousSibling())) 1288 pushFullyClippedState(m_fullyClippedStack, m_node); 1289 else 1290 m_node = 0; 1291 } 1292 1293 // For the purpose of word boundary detection, 1294 // we should iterate all visible text and trailing (collapsed) whitespaces. 1295 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; 1296 m_handledNode = false; 1297 m_handledChildren = false; 1298 1299 if (m_positionNode) 1300 return; 1301 } 1302 } 1303 1304 bool SimplifiedBackwardsTextIterator::handleTextNode() 1305 { 1306 m_lastTextNode = m_node; 1307 1308 int startOffset; 1309 int offsetInNode; 1310 RenderText* renderer = handleFirstLetter(startOffset, offsetInNode); 1311 if (!renderer) 1312 return true; 1313 1314 String text = renderer->text(); 1315 if (!renderer->firstTextBox() && text.length() > 0) 1316 return true; 1317 1318 m_positionEndOffset = m_offset; 1319 m_offset = startOffset + offsetInNode; 1320 m_positionNode = m_node; 1321 m_positionStartOffset = m_offset; 1322 1323 ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length())); 1324 ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); 1325 ASSERT(m_positionStartOffset <= m_positionEndOffset); 1326 1327 m_textLength = m_positionEndOffset - m_positionStartOffset; 1328 m_textOffset = m_positionStartOffset - offsetInNode; 1329 m_textContainer = text; 1330 m_singleCharacterBuffer = 0; 1331 RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length()); 1332 1333 m_lastCharacter = text[m_positionEndOffset - 1]; 1334 1335 return !m_shouldHandleFirstLetter; 1336 } 1337 1338 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode) 1339 { 1340 RenderText* renderer = toRenderText(m_node->renderer()); 1341 startOffset = (m_node == m_startNode) ? m_startOffset : 0; 1342 1343 if (!renderer->isTextFragment()) { 1344 offsetInNode = 0; 1345 return renderer; 1346 } 1347 1348 RenderTextFragment* fragment = toRenderTextFragment(renderer); 1349 int offsetAfterFirstLetter = fragment->start(); 1350 if (startOffset >= offsetAfterFirstLetter) { 1351 ASSERT(!m_shouldHandleFirstLetter); 1352 offsetInNode = offsetAfterFirstLetter; 1353 return renderer; 1354 } 1355 1356 if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) { 1357 m_shouldHandleFirstLetter = true; 1358 offsetInNode = offsetAfterFirstLetter; 1359 return renderer; 1360 } 1361 1362 m_shouldHandleFirstLetter = false; 1363 offsetInNode = 0; 1364 return firstRenderTextInFirstLetter(fragment->firstLetter()); 1365 } 1366 1367 bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1368 { 1369 unsigned index = m_node->nodeIndex(); 1370 // We want replaced elements to behave like punctuation for boundary 1371 // finding, and to simply take up space for the selection preservation 1372 // code in moveParagraphs, so we use a comma. Unconditionally emit 1373 // here because this iterator is only used for boundary finding. 1374 emitCharacter(',', m_node->parentNode(), index, index + 1); 1375 return true; 1376 } 1377 1378 bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1379 { 1380 // We can use a linefeed in place of a tab because this simple iterator is only used to 1381 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1382 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) { 1383 unsigned index = m_node->nodeIndex(); 1384 // The start of this emitted range is wrong. Ensuring correctness would require 1385 // VisiblePositions and so would be slow. previousBoundary expects this. 1386 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); 1387 } 1388 return true; 1389 } 1390 1391 void SimplifiedBackwardsTextIterator::exitNode() 1392 { 1393 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) { 1394 // The start of this emitted range is wrong. Ensuring correctness would require 1395 // VisiblePositions and so would be slow. previousBoundary expects this. 1396 emitCharacter('\n', m_node, 0, 0); 1397 } 1398 } 1399 1400 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) 1401 { 1402 m_singleCharacterBuffer = c; 1403 m_positionNode = node; 1404 m_positionStartOffset = startOffset; 1405 m_positionEndOffset = endOffset; 1406 m_textOffset = 0; 1407 m_textLength = 1; 1408 m_lastCharacter = c; 1409 } 1410 1411 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1412 { 1413 if (!next) 1414 return false; 1415 m_havePassedStartNode |= m_node == m_startNode; 1416 if (m_havePassedStartNode) 1417 return false; 1418 m_node = next; 1419 return true; 1420 } 1421 1422 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const 1423 { 1424 if (m_positionNode) 1425 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1426 1427 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset); 1428 } 1429 1430 // -------- 1431 1432 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehaviorFlags behavior) 1433 : m_offset(0) 1434 , m_runOffset(0) 1435 , m_atBreak(true) 1436 , m_textIterator(r, behavior) 1437 { 1438 while (!atEnd() && !m_textIterator.length()) 1439 m_textIterator.advance(); 1440 } 1441 1442 PassRefPtr<Range> CharacterIterator::range() const 1443 { 1444 RefPtr<Range> r = m_textIterator.range(); 1445 if (!m_textIterator.atEnd()) { 1446 if (m_textIterator.length() <= 1) { 1447 ASSERT(!m_runOffset); 1448 } else { 1449 Node* n = r->startContainer(); 1450 ASSERT(n == r->endContainer()); 1451 int offset = r->startOffset() + m_runOffset; 1452 r->setStart(n, offset, ASSERT_NO_EXCEPTION); 1453 r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION); 1454 } 1455 } 1456 return r.release(); 1457 } 1458 1459 void CharacterIterator::advance(int count) 1460 { 1461 if (count <= 0) { 1462 ASSERT(!count); 1463 return; 1464 } 1465 1466 m_atBreak = false; 1467 1468 // easy if there is enough left in the current m_textIterator run 1469 int remaining = m_textIterator.length() - m_runOffset; 1470 if (count < remaining) { 1471 m_runOffset += count; 1472 m_offset += count; 1473 return; 1474 } 1475 1476 // exhaust the current m_textIterator run 1477 count -= remaining; 1478 m_offset += remaining; 1479 1480 // move to a subsequent m_textIterator run 1481 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1482 int runLength = m_textIterator.length(); 1483 if (!runLength) { 1484 m_atBreak = true; 1485 } else { 1486 // see whether this is m_textIterator to use 1487 if (count < runLength) { 1488 m_runOffset = count; 1489 m_offset += count; 1490 return; 1491 } 1492 1493 // exhaust this m_textIterator run 1494 count -= runLength; 1495 m_offset += runLength; 1496 } 1497 } 1498 1499 // ran to the end of the m_textIterator... no more runs left 1500 m_atBreak = true; 1501 m_runOffset = 0; 1502 } 1503 1504 String CharacterIterator::string(int numChars) 1505 { 1506 StringBuilder result; 1507 result.reserveCapacity(numChars); 1508 while (numChars > 0 && !atEnd()) { 1509 int runSize = min(numChars, length()); 1510 m_textIterator.appendTextToStringBuilder(result, m_runOffset, runSize); 1511 numChars -= runSize; 1512 advance(runSize); 1513 } 1514 return result.toString(); 1515 } 1516 1517 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) 1518 { 1519 it.advance(offset); 1520 RefPtr<Range> start = it.range(); 1521 1522 if (length > 1) 1523 it.advance(length - 1); 1524 RefPtr<Range> end = it.range(); 1525 1526 return Range::create(start->startContainer()->document(), 1527 start->startContainer(), start->startOffset(), 1528 end->endContainer(), end->endOffset()); 1529 } 1530 1531 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior) 1532 : m_offset(0) 1533 , m_runOffset(0) 1534 , m_atBreak(true) 1535 , m_textIterator(range, behavior) 1536 { 1537 while (!atEnd() && !m_textIterator.length()) 1538 m_textIterator.advance(); 1539 } 1540 1541 PassRefPtr<Range> BackwardsCharacterIterator::range() const 1542 { 1543 RefPtr<Range> r = m_textIterator.range(); 1544 if (!m_textIterator.atEnd()) { 1545 if (m_textIterator.length() <= 1) { 1546 ASSERT(!m_runOffset); 1547 } else { 1548 Node* n = r->startContainer(); 1549 ASSERT(n == r->endContainer()); 1550 int offset = r->endOffset() - m_runOffset; 1551 r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION); 1552 r->setEnd(n, offset, ASSERT_NO_EXCEPTION); 1553 } 1554 } 1555 return r.release(); 1556 } 1557 1558 void BackwardsCharacterIterator::advance(int count) 1559 { 1560 if (count <= 0) { 1561 ASSERT(!count); 1562 return; 1563 } 1564 1565 m_atBreak = false; 1566 1567 int remaining = m_textIterator.length() - m_runOffset; 1568 if (count < remaining) { 1569 m_runOffset += count; 1570 m_offset += count; 1571 return; 1572 } 1573 1574 count -= remaining; 1575 m_offset += remaining; 1576 1577 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1578 int runLength = m_textIterator.length(); 1579 if (!runLength) { 1580 m_atBreak = true; 1581 } else { 1582 if (count < runLength) { 1583 m_runOffset = count; 1584 m_offset += count; 1585 return; 1586 } 1587 1588 count -= runLength; 1589 m_offset += runLength; 1590 } 1591 } 1592 1593 m_atBreak = true; 1594 m_runOffset = 0; 1595 } 1596 1597 // -------- 1598 1599 WordAwareIterator::WordAwareIterator(const Range* range) 1600 : m_didLookAhead(true) // So we consider the first chunk from the text iterator. 1601 , m_textIterator(range) 1602 { 1603 advance(); // Get in position over the first chunk of text. 1604 } 1605 1606 WordAwareIterator::~WordAwareIterator() 1607 { 1608 } 1609 1610 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1611 1612 void WordAwareIterator::advance() 1613 { 1614 m_buffer.clear(); 1615 1616 // If last time we did a look-ahead, start with that looked-ahead chunk now 1617 if (!m_didLookAhead) { 1618 ASSERT(!m_textIterator.atEnd()); 1619 m_textIterator.advance(); 1620 } 1621 m_didLookAhead = false; 1622 1623 // Go to next non-empty chunk. 1624 while (!m_textIterator.atEnd() && !m_textIterator.length()) 1625 m_textIterator.advance(); 1626 1627 m_range = m_textIterator.range(); 1628 1629 if (m_textIterator.atEnd()) 1630 return; 1631 1632 while (1) { 1633 // If this chunk ends in whitespace we can just use it as our chunk. 1634 if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1))) 1635 return; 1636 1637 // If this is the first chunk that failed, save it in m_buffer before look ahead. 1638 if (m_buffer.isEmpty()) 1639 m_textIterator.appendTextTo(m_buffer); 1640 1641 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1642 m_textIterator.advance(); 1643 if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) { 1644 m_didLookAhead = true; 1645 return; 1646 } 1647 1648 // Start gobbling chunks until we get to a suitable stopping point 1649 m_textIterator.appendTextTo(m_buffer); 1650 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION); 1651 } 1652 } 1653 1654 int WordAwareIterator::length() const 1655 { 1656 if (!m_buffer.isEmpty()) 1657 return m_buffer.size(); 1658 return m_textIterator.length(); 1659 } 1660 1661 String WordAwareIterator::substring(unsigned position, unsigned length) const 1662 { 1663 if (!m_buffer.isEmpty()) 1664 return String(m_buffer.data() + position, length); 1665 return m_textIterator.substring(position, length); 1666 } 1667 1668 UChar WordAwareIterator::characterAt(unsigned index) const 1669 { 1670 if (!m_buffer.isEmpty()) 1671 return m_buffer[index]; 1672 return m_textIterator.characterAt(index); 1673 } 1674 1675 // -------- 1676 1677 static const size_t minimumSearchBufferSize = 8192; 1678 1679 #ifndef NDEBUG 1680 static bool searcherInUse; 1681 #endif 1682 1683 static UStringSearch* createSearcher() 1684 { 1685 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1686 // but it doesn't matter exactly what it is, since we don't perform any searches 1687 // without setting both the pattern and the text. 1688 UErrorCode status = U_ZERO_ERROR; 1689 String searchCollatorName = currentSearchLocaleID() + String("@collation=search"); 1690 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status); 1691 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1692 return searcher; 1693 } 1694 1695 static UStringSearch* searcher() 1696 { 1697 static UStringSearch* searcher = createSearcher(); 1698 return searcher; 1699 } 1700 1701 static inline void lockSearcher() 1702 { 1703 #ifndef NDEBUG 1704 ASSERT(!searcherInUse); 1705 searcherInUse = true; 1706 #endif 1707 } 1708 1709 static inline void unlockSearcher() 1710 { 1711 #ifndef NDEBUG 1712 ASSERT(searcherInUse); 1713 searcherInUse = false; 1714 #endif 1715 } 1716 1717 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1718 : m_options(options) 1719 , m_prefixLength(0) 1720 , m_numberOfCharactersJustAppended(0) 1721 , m_atBreak(true) 1722 , m_needsMoreContext(options & AtWordStarts) 1723 , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) 1724 { 1725 ASSERT(!target.isEmpty()); 1726 target.appendTo(m_target); 1727 1728 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1729 // of doing it in a separate replacement pass here, but ICU doesn't offer a way 1730 // to add tailoring on top of the locale-specific tailoring as of this writing. 1731 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); 1732 1733 size_t targetLength = m_target.size(); 1734 m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize)); 1735 m_overlap = m_buffer.capacity() / 4; 1736 1737 if ((m_options & AtWordStarts) && targetLength) { 1738 UChar32 targetFirstCharacter; 1739 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); 1740 // Characters in the separator category never really occur at the beginning of a word, 1741 // so if the target begins with such a character, we just ignore the AtWordStart option. 1742 if (isSeparator(targetFirstCharacter)) { 1743 m_options &= ~AtWordStarts; 1744 m_needsMoreContext = false; 1745 } 1746 } 1747 1748 // Grab the single global searcher. 1749 // If we ever have a reason to do more than once search buffer at once, we'll have 1750 // to move to multiple searchers. 1751 lockSearcher(); 1752 1753 UStringSearch* searcher = WebCore::searcher(); 1754 UCollator* collator = usearch_getCollator(searcher); 1755 1756 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; 1757 if (ucol_getStrength(collator) != strength) { 1758 ucol_setStrength(collator, strength); 1759 usearch_reset(searcher); 1760 } 1761 1762 UErrorCode status = U_ZERO_ERROR; 1763 usearch_setPattern(searcher, m_target.data(), targetLength, &status); 1764 ASSERT(status == U_ZERO_ERROR); 1765 1766 // The kana workaround requires a normalized copy of the target string. 1767 if (m_targetRequiresKanaWorkaround) 1768 normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); 1769 } 1770 1771 inline SearchBuffer::~SearchBuffer() 1772 { 1773 // Leave the static object pointing to a valid string. 1774 UErrorCode status = U_ZERO_ERROR; 1775 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status); 1776 ASSERT(status == U_ZERO_ERROR); 1777 1778 unlockSearcher(); 1779 } 1780 1781 template<typename CharType> 1782 inline void SearchBuffer::append(const CharType* characters, size_t length) 1783 { 1784 ASSERT(length); 1785 1786 if (m_atBreak) { 1787 m_buffer.shrink(0); 1788 m_prefixLength = 0; 1789 m_atBreak = false; 1790 } else if (m_buffer.size() == m_buffer.capacity()) { 1791 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 1792 m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap); 1793 m_buffer.shrink(m_overlap); 1794 } 1795 1796 size_t oldLength = m_buffer.size(); 1797 size_t usableLength = min(m_buffer.capacity() - oldLength, length); 1798 ASSERT(usableLength); 1799 m_buffer.resize(oldLength + usableLength); 1800 UChar* destination = m_buffer.data() + oldLength; 1801 StringImpl::copyChars(destination, characters, usableLength); 1802 foldQuoteMarksAndSoftHyphens(destination, usableLength); 1803 m_numberOfCharactersJustAppended = usableLength; 1804 } 1805 1806 inline bool SearchBuffer::needsMoreContext() const 1807 { 1808 return m_needsMoreContext; 1809 } 1810 1811 inline void SearchBuffer::prependContext(const UChar* characters, size_t length) 1812 { 1813 ASSERT(m_needsMoreContext); 1814 ASSERT(m_prefixLength == m_buffer.size()); 1815 1816 if (!length) 1817 return; 1818 1819 m_atBreak = false; 1820 1821 size_t wordBoundaryContextStart = length; 1822 if (wordBoundaryContextStart) { 1823 U16_BACK_1(characters, 0, wordBoundaryContextStart); 1824 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); 1825 } 1826 1827 size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); 1828 m_buffer.prepend(characters + length - usableLength, usableLength); 1829 m_prefixLength += usableLength; 1830 1831 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 1832 m_needsMoreContext = false; 1833 } 1834 1835 inline bool SearchBuffer::atBreak() const 1836 { 1837 return m_atBreak; 1838 } 1839 1840 inline void SearchBuffer::reachedBreak() 1841 { 1842 m_atBreak = true; 1843 } 1844 1845 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 1846 { 1847 // This function implements the kana workaround. If usearch treats 1848 // it as a match, but we do not want to, then it's a "bad match". 1849 if (!m_targetRequiresKanaWorkaround) 1850 return false; 1851 1852 // Normalize into a match buffer. We reuse a single buffer rather than 1853 // creating a new one each time. 1854 normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch); 1855 1856 return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size()); 1857 } 1858 1859 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 1860 { 1861 ASSERT(m_options & AtWordStarts); 1862 1863 if (!start) 1864 return true; 1865 1866 int size = m_buffer.size(); 1867 int offset = start; 1868 UChar32 firstCharacter; 1869 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 1870 1871 if (m_options & TreatMedialCapitalAsWordStart) { 1872 UChar32 previousCharacter; 1873 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 1874 1875 if (isSeparator(firstCharacter)) { 1876 // The start of a separator run is a word start (".org" in "webkit.org"). 1877 if (!isSeparator(previousCharacter)) 1878 return true; 1879 } else if (isASCIIUpper(firstCharacter)) { 1880 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 1881 if (!isASCIIUpper(previousCharacter)) 1882 return true; 1883 // The last character of an uppercase run followed by a non-separator, non-digit 1884 // is a word start ("Request" in "XMLHTTPRequest"). 1885 offset = start; 1886 U16_FWD_1(m_buffer.data(), offset, size); 1887 UChar32 nextCharacter = 0; 1888 if (offset < size) 1889 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 1890 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 1891 return true; 1892 } else if (isASCIIDigit(firstCharacter)) { 1893 // The start of a digit run is a word start ("2" in "WebKit2"). 1894 if (!isASCIIDigit(previousCharacter)) 1895 return true; 1896 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 1897 // The start of a non-separator, non-uppercase, non-digit run is a word start, 1898 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 1899 return true; 1900 } 1901 } 1902 1903 // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes 1904 // a word, so treat the position before any CJK character as a word start. 1905 if (Font::isCJKIdeographOrSymbol(firstCharacter)) 1906 return true; 1907 1908 size_t wordBreakSearchStart = start + length; 1909 while (wordBreakSearchStart > start) 1910 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); 1911 return wordBreakSearchStart == start; 1912 } 1913 1914 inline size_t SearchBuffer::search(size_t& start) 1915 { 1916 size_t size = m_buffer.size(); 1917 if (m_atBreak) { 1918 if (!size) 1919 return 0; 1920 } else { 1921 if (size != m_buffer.capacity()) 1922 return 0; 1923 } 1924 1925 UStringSearch* searcher = WebCore::searcher(); 1926 1927 UErrorCode status = U_ZERO_ERROR; 1928 usearch_setText(searcher, m_buffer.data(), size, &status); 1929 ASSERT(status == U_ZERO_ERROR); 1930 1931 usearch_setOffset(searcher, m_prefixLength, &status); 1932 ASSERT(status == U_ZERO_ERROR); 1933 1934 int matchStart = usearch_next(searcher, &status); 1935 ASSERT(status == U_ZERO_ERROR); 1936 1937 nextMatch: 1938 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 1939 ASSERT(matchStart == USEARCH_DONE); 1940 return 0; 1941 } 1942 1943 // Matches that start in the overlap area are only tentative. 1944 // The same match may appear later, matching more characters, 1945 // possibly including a combining character that's not yet in the buffer. 1946 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 1947 size_t overlap = m_overlap; 1948 if (m_options & AtWordStarts) { 1949 // Ensure that there is sufficient context before matchStart the next time around for 1950 // determining if it is at a word boundary. 1951 int wordBoundaryContextStart = matchStart; 1952 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 1953 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); 1954 overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart)); 1955 } 1956 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 1957 m_prefixLength -= min(m_prefixLength, size - overlap); 1958 m_buffer.shrink(overlap); 1959 return 0; 1960 } 1961 1962 size_t matchedLength = usearch_getMatchedLength(searcher); 1963 ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); 1964 1965 // If this match is "bad", move on to the next match. 1966 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { 1967 matchStart = usearch_next(searcher, &status); 1968 ASSERT(status == U_ZERO_ERROR); 1969 goto nextMatch; 1970 } 1971 1972 size_t newSize = size - (matchStart + 1); 1973 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 1974 m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1); 1975 m_buffer.shrink(newSize); 1976 1977 start = size - matchStart; 1978 return matchedLength; 1979 } 1980 1981 // -------- 1982 1983 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) 1984 { 1985 int length = 0; 1986 for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) 1987 length += it.length(); 1988 1989 return length; 1990 } 1991 1992 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 1993 { 1994 CharacterIterator entireRangeIterator(entireRange); 1995 return characterSubrange(entireRangeIterator, characterOffset, characterCount); 1996 } 1997 1998 // -------- 1999 2000 String plainText(const Range* r, TextIteratorBehaviorFlags behavior) 2001 { 2002 // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192 2003 static const unsigned initialCapacity = 1 << 15; 2004 2005 unsigned bufferLength = 0; 2006 StringBuilder builder; 2007 builder.reserveCapacity(initialCapacity); 2008 2009 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { 2010 it.appendTextToStringBuilder(builder); 2011 bufferLength += it.length(); 2012 } 2013 2014 if (!bufferLength) 2015 return emptyString(); 2016 2017 return builder.toString(); 2018 } 2019 2020 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward) 2021 { 2022 RefPtr<Range> result = range->cloneRange(ASSERT_NO_EXCEPTION); 2023 result->collapse(!forward, ASSERT_NO_EXCEPTION); 2024 return result.release(); 2025 } 2026 2027 static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) 2028 { 2029 matchStart = 0; 2030 size_t matchLength = 0; 2031 2032 SearchBuffer buffer(target, options); 2033 2034 if (buffer.needsMoreContext()) { 2035 RefPtr<Range> startRange = it.range(); 2036 RefPtr<Range> beforeStartRange = startRange->ownerDocument().createRange(); 2037 beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION); 2038 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2039 Vector<UChar, 1024> characters; 2040 backwardsIterator.prependTextTo(characters); 2041 buffer.prependContext(characters.data(), characters.size()); 2042 if (!buffer.needsMoreContext()) 2043 break; 2044 } 2045 } 2046 2047 while (!it.atEnd()) { 2048 it.appendTextTo(buffer); 2049 it.advance(buffer.numberOfCharactersJustAppended()); 2050 tryAgain: 2051 size_t matchStartOffset; 2052 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2053 // Note that we found a match, and where we found it. 2054 size_t lastCharacterInBufferOffset = it.characterOffset(); 2055 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2056 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2057 matchLength = newMatchLength; 2058 // If searching forward, stop on the first match. 2059 // If searching backward, don't stop, so we end up with the last match. 2060 if (!(options & Backwards)) 2061 break; 2062 goto tryAgain; 2063 } 2064 if (it.atBreak() && !buffer.atBreak()) { 2065 buffer.reachedBreak(); 2066 goto tryAgain; 2067 } 2068 } 2069 2070 return matchLength; 2071 } 2072 2073 PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) 2074 { 2075 // CharacterIterator requires renderers to be up-to-date 2076 range->ownerDocument().updateLayout(); 2077 2078 // First, find the text. 2079 size_t matchStart; 2080 size_t matchLength; 2081 { 2082 CharacterIterator findIterator(range, TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots); 2083 matchLength = findPlainText(findIterator, target, options, matchStart); 2084 if (!matchLength) 2085 return collapsedToBoundary(range, !(options & Backwards)); 2086 } 2087 2088 // Then, find the document position of the start and the end of the text. 2089 CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots); 2090 return characterSubrange(computeRangeIterator, matchStart, matchLength); 2091 } 2092 2093 } 2094