Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
      3  * Copyright (C) 2005 Alexey Proskuryakov.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/editing/TextIterator.h"
     29 
     30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
     31 #include "core/HTMLNames.h"
     32 #include "core/dom/Document.h"
     33 #include "core/dom/NodeTraversal.h"
     34 #include "core/dom/shadow/ShadowRoot.h"
     35 #include "core/editing/VisiblePosition.h"
     36 #include "core/editing/VisibleUnits.h"
     37 #include "core/editing/htmlediting.h"
     38 #include "core/frame/FrameView.h"
     39 #include "core/html/HTMLElement.h"
     40 #include "core/html/HTMLTextFormControlElement.h"
     41 #include "core/rendering/InlineTextBox.h"
     42 #include "core/rendering/RenderImage.h"
     43 #include "core/rendering/RenderTableCell.h"
     44 #include "core/rendering/RenderTableRow.h"
     45 #include "core/rendering/RenderTextControl.h"
     46 #include "core/rendering/RenderTextFragment.h"
     47 #include "platform/fonts/Character.h"
     48 #include "platform/fonts/Font.h"
     49 #include "platform/text/TextBoundaries.h"
     50 #include "platform/text/TextBreakIteratorInternalICU.h"
     51 #include "platform/text/UnicodeUtilities.h"
     52 #include "wtf/text/CString.h"
     53 #include "wtf/text/StringBuilder.h"
     54 #include "wtf/unicode/CharacterNames.h"
     55 #include <unicode/usearch.h>
     56 #include <unicode/utf16.h>
     57 
     58 using namespace WTF::Unicode;
     59 
     60 namespace blink {
     61 
     62 using namespace HTMLNames;
     63 
     64 // Buffer that knows how to compare with a search target.
     65 // Keeps enough of the previous text to be able to search in the future, but no more.
     66 // Non-breaking spaces are always equal to normal spaces.
     67 // Case folding is also done if the CaseInsensitive option is specified.
     68 // Matches are further filtered if the AtWordStarts option is specified, although some
     69 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
     70 class SearchBuffer {
     71     WTF_MAKE_NONCOPYABLE(SearchBuffer);
     72 public:
     73     SearchBuffer(const String& target, FindOptions);
     74     ~SearchBuffer();
     75 
     76     // Returns number of characters appended; guaranteed to be in the range [1, length].
     77     template<typename CharType>
     78     void append(const CharType*, size_t length);
     79     size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
     80 
     81     bool needsMoreContext() const;
     82     void prependContext(const UChar*, size_t length);
     83     void reachedBreak();
     84 
     85     // Result is the size in characters of what was found.
     86     // And <startOffset> is the number of characters back to the start of what was found.
     87     size_t search(size_t& startOffset);
     88     bool atBreak() const;
     89 
     90 private:
     91     bool isBadMatch(const UChar*, size_t length) const;
     92     bool isWordStartMatch(size_t start, size_t length) const;
     93 
     94     Vector<UChar> m_target;
     95     FindOptions m_options;
     96 
     97     Vector<UChar> m_buffer;
     98     size_t m_overlap;
     99     size_t m_prefixLength;
    100     size_t m_numberOfCharactersJustAppended;
    101     bool m_atBreak;
    102     bool m_needsMoreContext;
    103 
    104     bool m_targetRequiresKanaWorkaround;
    105     Vector<UChar> m_normalizedTarget;
    106     mutable Vector<UChar> m_normalizedMatch;
    107 };
    108 
    109 // --------
    110 
    111 static const unsigned bitsInWord = sizeof(unsigned) * 8;
    112 static const unsigned bitInWordMask = bitsInWord - 1;
    113 
    114 BitStack::BitStack()
    115     : m_size(0)
    116 {
    117 }
    118 
    119 BitStack::~BitStack()
    120 {
    121 }
    122 
    123 void BitStack::push(bool bit)
    124 {
    125     unsigned index = m_size / bitsInWord;
    126     unsigned shift = m_size & bitInWordMask;
    127     if (!shift && index == m_words.size()) {
    128         m_words.grow(index + 1);
    129         m_words[index] = 0;
    130     }
    131     unsigned& word = m_words[index];
    132     unsigned mask = 1U << shift;
    133     if (bit)
    134         word |= mask;
    135     else
    136         word &= ~mask;
    137     ++m_size;
    138 }
    139 
    140 void BitStack::pop()
    141 {
    142     if (m_size)
    143         --m_size;
    144 }
    145 
    146 bool BitStack::top() const
    147 {
    148     if (!m_size)
    149         return false;
    150     unsigned shift = (m_size - 1) & bitInWordMask;
    151     return m_words.last() & (1U << shift);
    152 }
    153 
    154 unsigned BitStack::size() const
    155 {
    156     return m_size;
    157 }
    158 
    159 // --------
    160 
    161 #if ENABLE(ASSERT)
    162 
    163 static unsigned depthCrossingShadowBoundaries(Node* node)
    164 {
    165     unsigned depth = 0;
    166     for (ContainerNode* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
    167         ++depth;
    168     return depth;
    169 }
    170 
    171 #endif
    172 
    173 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
    174 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
    175 {
    176     if (!rangeEndContainer)
    177         return 0;
    178     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
    179         if (Node* next = NodeTraversal::childAt(*rangeEndContainer, rangeEndOffset))
    180             return next;
    181     }
    182     for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
    183         if (Node* next = node->nextSibling())
    184             return next;
    185     }
    186     return 0;
    187 }
    188 
    189 // --------
    190 
    191 static inline bool fullyClipsContents(Node* node)
    192 {
    193     RenderObject* renderer = node->renderer();
    194     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
    195         return false;
    196     return toRenderBox(renderer)->size().isEmpty();
    197 }
    198 
    199 static inline bool ignoresContainerClip(Node* node)
    200 {
    201     RenderObject* renderer = node->renderer();
    202     if (!renderer || renderer->isText())
    203         return false;
    204     return renderer->style()->hasOutOfFlowPosition();
    205 }
    206 
    207 static void pushFullyClippedState(BitStack& stack, Node* node)
    208 {
    209     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
    210 
    211     // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364>
    212     // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if
    213     // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches
    214     // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics
    215     // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox).
    216     //
    217     // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering
    218     // primitives, not with DOM primitives.
    219 
    220     // Push true if this node full clips its contents, or if a parent already has fully
    221     // clipped and this is not a node that ignores its container's clip.
    222     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
    223 }
    224 
    225 static void setUpFullyClippedStack(BitStack& stack, Node* node)
    226 {
    227     // Put the nodes in a vector so we can iterate in reverse order.
    228     WillBeHeapVector<RawPtrWillBeMember<ContainerNode>, 100> ancestry;
    229     for (ContainerNode* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
    230         ancestry.append(parent);
    231 
    232     // Call pushFullyClippedState on each node starting with the earliest ancestor.
    233     size_t size = ancestry.size();
    234     for (size_t i = 0; i < size; ++i)
    235         pushFullyClippedState(stack, ancestry[size - i - 1]);
    236     pushFullyClippedState(stack, node);
    237 
    238     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
    239 }
    240 
    241 // --------
    242 
    243 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior)
    244     : m_startContainer(nullptr)
    245     , m_startOffset(0)
    246     , m_endContainer(nullptr)
    247     , m_endOffset(0)
    248     , m_positionNode(nullptr)
    249     , m_textLength(0)
    250     , m_needsAnotherNewline(false)
    251     , m_textBox(0)
    252     , m_remainingTextBox(0)
    253     , m_firstLetterText(nullptr)
    254     , m_lastTextNode(nullptr)
    255     , m_lastTextNodeEndedWithCollapsedSpace(false)
    256     , m_lastCharacter(0)
    257     , m_sortedTextBoxesPosition(0)
    258     , m_hasEmitted(false)
    259     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
    260     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
    261     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
    262     , m_handledFirstLetter(false)
    263     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
    264     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
    265     , m_shouldStop(false)
    266     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
    267     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
    268     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
    269 {
    270     if (range)
    271         initialize(range->startPosition(), range->endPosition());
    272 }
    273 
    274 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
    275     : m_startContainer(nullptr)
    276     , m_startOffset(0)
    277     , m_endContainer(nullptr)
    278     , m_endOffset(0)
    279     , m_positionNode(nullptr)
    280     , m_textLength(0)
    281     , m_needsAnotherNewline(false)
    282     , m_textBox(0)
    283     , m_remainingTextBox(0)
    284     , m_firstLetterText(nullptr)
    285     , m_lastTextNode(nullptr)
    286     , m_lastTextNodeEndedWithCollapsedSpace(false)
    287     , m_lastCharacter(0)
    288     , m_sortedTextBoxesPosition(0)
    289     , m_hasEmitted(false)
    290     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
    291     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
    292     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
    293     , m_handledFirstLetter(false)
    294     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
    295     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
    296     , m_shouldStop(false)
    297     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
    298     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
    299     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
    300 {
    301     initialize(start, end);
    302 }
    303 
    304 void TextIterator::initialize(const Position& start, const Position& end)
    305 {
    306     ASSERT(comparePositions(start, end) <= 0);
    307 
    308     // Get and validate |start| and |end|.
    309     Node* startContainer = start.containerNode();
    310     if (!startContainer)
    311         return;
    312     int startOffset = start.computeOffsetInContainerNode();
    313     Node* endContainer = end.containerNode();
    314     if (!endContainer)
    315         return;
    316     int endOffset = end.computeOffsetInContainerNode();
    317 
    318     // Remember the range - this does not change.
    319     m_startContainer = startContainer;
    320     m_startOffset = startOffset;
    321     m_endContainer = endContainer;
    322     m_endOffset = endOffset;
    323 
    324     // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from
    325     // the common ancestor tree scope.
    326     const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope());
    327     ASSERT(commonAncestorTreeScope);
    328     m_shadowDepth = 0;
    329     for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope())
    330         ++m_shadowDepth;
    331 
    332     // Set up the current node for processing.
    333     if (startContainer->offsetInCharacters())
    334         m_node = startContainer;
    335     else if (Node* child = NodeTraversal::childAt(*startContainer, startOffset))
    336         m_node = child;
    337     else if (!startOffset)
    338         m_node = startContainer;
    339     else
    340         m_node = NodeTraversal::nextSkippingChildren(*startContainer);
    341 
    342     if (!m_node)
    343         return;
    344 
    345     m_node->document().updateLayoutIgnorePendingStylesheets();
    346 
    347     setUpFullyClippedStack(m_fullyClippedStack, m_node);
    348     m_offset = m_node == m_startContainer ? m_startOffset : 0;
    349     m_iterationProgress = HandledNone;
    350 
    351     // Calculate first out of bounds node.
    352     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
    353 
    354     // Identify the first run.
    355     advance();
    356 }
    357 
    358 TextIterator::~TextIterator()
    359 {
    360 }
    361 
    362 bool TextIterator::isInsideReplacedElement() const
    363 {
    364     if (atEnd() || length() != 1 || !m_node)
    365         return false;
    366 
    367     RenderObject* renderer = m_node->renderer();
    368     return renderer && renderer->isReplaced();
    369 }
    370 
    371 void TextIterator::advance()
    372 {
    373     if (m_shouldStop)
    374         return;
    375 
    376     ASSERT(!m_node || !m_node->document().needsRenderTreeUpdate());
    377 
    378     // reset the run information
    379     m_positionNode = nullptr;
    380     m_textLength = 0;
    381 
    382     // handle remembered node that needed a newline after the text node's newline
    383     if (m_needsAnotherNewline) {
    384         // Emit the extra newline, and position it *inside* m_node, after m_node's
    385         // contents, in case it's a block, in the same way that we position the first
    386         // newline. The range for the emitted newline should start where the line
    387         // break begins.
    388         // FIXME: It would be cleaner if we emitted two newlines during the last
    389         // iteration, instead of using m_needsAnotherNewline.
    390         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
    391         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    392         m_needsAnotherNewline = false;
    393         return;
    394     }
    395 
    396     if (!m_textBox && m_remainingTextBox) {
    397         m_textBox = m_remainingTextBox;
    398         m_remainingTextBox = 0;
    399         m_firstLetterText = nullptr;
    400         m_offset = 0;
    401     }
    402     // handle remembered text box
    403     if (m_textBox) {
    404         handleTextBox();
    405         if (m_positionNode)
    406             return;
    407     }
    408 
    409     while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) {
    410         if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
    411             m_shouldStop = true;
    412 
    413         // if the range ends at offset 0 of an element, represent the
    414         // position, but not the content, of that element e.g. if the
    415         // node is a blockflow element, emit a newline that
    416         // precedes the element
    417         if (m_node == m_endContainer && !m_endOffset) {
    418             representNodeOffsetZero();
    419             m_node = nullptr;
    420             return;
    421         }
    422 
    423         RenderObject* renderer = m_node->renderer();
    424         if (!renderer) {
    425             if (m_node->isShadowRoot()) {
    426                 // A shadow root doesn't have a renderer, but we want to visit children anyway.
    427                 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress;
    428             } else {
    429                 m_iterationProgress = HandledChildren;
    430             }
    431         } else {
    432             // Enter author shadow roots, from youngest, if any and if necessary.
    433             if (m_iterationProgress < HandledAuthorShadowRoots) {
    434                 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) {
    435                     ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot();
    436                     ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot);
    437                     m_node = youngestShadowRoot;
    438                     m_iterationProgress = HandledNone;
    439                     ++m_shadowDepth;
    440                     pushFullyClippedState(m_fullyClippedStack, m_node);
    441                     continue;
    442                 }
    443 
    444                 m_iterationProgress = HandledAuthorShadowRoots;
    445             }
    446 
    447             // Enter user-agent shadow root, if necessary.
    448             if (m_iterationProgress < HandledUserAgentShadowRoot) {
    449                 if (m_entersTextControls && renderer->isTextControl()) {
    450                     ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot();
    451                     ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
    452                     m_node = userAgentShadowRoot;
    453                     m_iterationProgress = HandledNone;
    454                     ++m_shadowDepth;
    455                     pushFullyClippedState(m_fullyClippedStack, m_node);
    456                     continue;
    457                 }
    458                 m_iterationProgress = HandledUserAgentShadowRoot;
    459             }
    460 
    461             // Handle the current node according to its type.
    462             if (m_iterationProgress < HandledNode) {
    463                 bool handledNode = false;
    464                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE?
    465                     handledNode = handleTextNode();
    466                 } else if (renderer && (renderer->isImage() || renderer->isWidget()
    467                     || (m_node && m_node->isHTMLElement()
    468                     && (isHTMLFormControlElement(toHTMLElement(*m_node))
    469                     || isHTMLLegendElement(toHTMLElement(*m_node))
    470                     || isHTMLMeterElement(toHTMLElement(*m_node))
    471                     || isHTMLProgressElement(toHTMLElement(*m_node)))))) {
    472                     handledNode = handleReplacedElement();
    473                 } else {
    474                     handledNode = handleNonTextNode();
    475                 }
    476                 if (handledNode)
    477                     m_iterationProgress = HandledNode;
    478                 if (m_positionNode)
    479                     return;
    480             }
    481         }
    482 
    483         // Find a new current node to handle in depth-first manner,
    484         // calling exitNode() as we come back thru a parent node.
    485         //
    486         // 1. Iterate over child nodes, if we haven't done yet.
    487         Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0;
    488         m_offset = 0;
    489         if (!next) {
    490             // 2. If we've already iterated children or they are not available, go to the next sibling node.
    491             next = m_node->nextSibling();
    492             if (!next) {
    493                 // 3. If we are at the last child, go up the node tree until we find a next sibling.
    494                 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode;
    495                 ContainerNode* parentNode = m_node->parentNode();
    496                 while (!next && parentNode) {
    497                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
    498                         return;
    499                     bool haveRenderer = m_node->renderer();
    500                     m_node = parentNode;
    501                     m_fullyClippedStack.pop();
    502                     parentNode = m_node->parentNode();
    503                     if (haveRenderer)
    504                         exitNode();
    505                     if (m_positionNode) {
    506                         m_iterationProgress = HandledChildren;
    507                         return;
    508                     }
    509                     next = m_node->nextSibling();
    510                 }
    511 
    512                 if (!next && !parentNode && m_shadowDepth > 0) {
    513                     // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next
    514                     // sibling shadow root, if any.
    515                     ShadowRoot* shadowRoot = toShadowRoot(m_node);
    516                     if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
    517                         ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot();
    518                         if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
    519                             m_fullyClippedStack.pop();
    520                             m_node = nextShadowRoot;
    521                             m_iterationProgress = HandledNone;
    522                             // m_shadowDepth is unchanged since we exit from a shadow root and enter another.
    523                             pushFullyClippedState(m_fullyClippedStack, m_node);
    524                         } else {
    525                             // We are the last shadow root; exit from here and go back to where we were.
    526                             m_node = shadowRoot->host();
    527                             m_iterationProgress = HandledAuthorShadowRoots;
    528                             --m_shadowDepth;
    529                             m_fullyClippedStack.pop();
    530                         }
    531                     } else {
    532                         // If we are in a user-agent shadow root, then go back to the host.
    533                         ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
    534                         m_node = shadowRoot->host();
    535                         m_iterationProgress = HandledUserAgentShadowRoot;
    536                         --m_shadowDepth;
    537                         m_fullyClippedStack.pop();
    538                     }
    539                     m_handledFirstLetter = false;
    540                     m_firstLetterText = nullptr;
    541                     continue;
    542                 }
    543             }
    544             m_fullyClippedStack.pop();
    545         }
    546 
    547         // set the new current node
    548         m_node = next;
    549         if (m_node)
    550             pushFullyClippedState(m_fullyClippedStack, m_node);
    551         m_iterationProgress = HandledNone;
    552         m_handledFirstLetter = false;
    553         m_firstLetterText = nullptr;
    554 
    555         // how would this ever be?
    556         if (m_positionNode)
    557             return;
    558     }
    559 }
    560 
    561 UChar TextIterator::characterAt(unsigned index) const
    562 {
    563     ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
    564     if (!(index < static_cast<unsigned>(length())))
    565         return 0;
    566 
    567     if (m_singleCharacterBuffer) {
    568         ASSERT(!index);
    569         ASSERT(length() == 1);
    570         return m_singleCharacterBuffer;
    571     }
    572 
    573     return string()[positionStartOffset() + index];
    574 }
    575 
    576 String TextIterator::substring(unsigned position, unsigned length) const
    577 {
    578     ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length()));
    579     ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
    580     if (!length)
    581         return emptyString();
    582     if (m_singleCharacterBuffer) {
    583         ASSERT(!position);
    584         ASSERT(length == 1);
    585         return String(&m_singleCharacterBuffer, 1);
    586     }
    587     return string().substring(positionStartOffset() + position, length);
    588 }
    589 
    590 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
    591 {
    592     unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
    593     if (!lengthToAppend)
    594         return;
    595     if (m_singleCharacterBuffer) {
    596         ASSERT(!position);
    597         builder.append(m_singleCharacterBuffer);
    598     } else {
    599         builder.append(string(), positionStartOffset() + position, lengthToAppend);
    600     }
    601 }
    602 
    603 bool TextIterator::handleTextNode()
    604 {
    605     if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
    606         return false;
    607 
    608     Text* textNode = toText(m_node);
    609     RenderText* renderer = textNode->renderer();
    610 
    611     m_lastTextNode = textNode;
    612     String str = renderer->text();
    613 
    614     // handle pre-formatted text
    615     if (!renderer->style()->collapseWhiteSpace()) {
    616         int runStart = m_offset;
    617         if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
    618             emitCharacter(space, textNode, 0, runStart, runStart);
    619             return false;
    620         }
    621         if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
    622             handleTextNodeFirstLetter(toRenderTextFragment(renderer));
    623             if (m_firstLetterText) {
    624                 String firstLetter = m_firstLetterText->text();
    625                 emitText(textNode, m_firstLetterText, m_offset, m_offset + firstLetter.length());
    626                 m_firstLetterText = nullptr;
    627                 m_textBox = 0;
    628                 return false;
    629             }
    630         }
    631         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    632             return false;
    633         int strLength = str.length();
    634         int end = (textNode == m_endContainer) ? m_endOffset : INT_MAX;
    635         int runEnd = std::min(strLength, end);
    636 
    637         if (runStart >= runEnd)
    638             return true;
    639 
    640         emitText(textNode, textNode->renderer(), runStart, runEnd);
    641         return true;
    642     }
    643 
    644     if (renderer->firstTextBox())
    645         m_textBox = renderer->firstTextBox();
    646 
    647     bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
    648     if (shouldHandleFirstLetter)
    649         handleTextNodeFirstLetter(toRenderTextFragment(renderer));
    650 
    651     if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
    652         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    653             return false;
    654         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
    655         return true;
    656     }
    657 
    658     if (m_firstLetterText)
    659         renderer = m_firstLetterText;
    660 
    661     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    662     if (renderer->containsReversedText()) {
    663         m_sortedTextBoxes.clear();
    664         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
    665             m_sortedTextBoxes.append(textBox);
    666         }
    667         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
    668         m_sortedTextBoxesPosition = 0;
    669         m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
    670     }
    671 
    672     handleTextBox();
    673     return true;
    674 }
    675 
    676 void TextIterator::handleTextBox()
    677 {
    678     RenderText* renderer = m_firstLetterText ? m_firstLetterText.get() : toRenderText(m_node->renderer());
    679     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
    680         m_textBox = 0;
    681         return;
    682     }
    683     String str = renderer->text();
    684     unsigned start = m_offset;
    685     unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
    686     while (m_textBox) {
    687         unsigned textBoxStart = m_textBox->start();
    688         unsigned runStart = std::max(textBoxStart, start);
    689 
    690         // Check for collapsed space at the start of this run.
    691         InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
    692         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
    693             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
    694         if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) {
    695             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
    696                 unsigned spaceRunStart = runStart - 1;
    697                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
    698                     --spaceRunStart;
    699                 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
    700             } else {
    701                 emitCharacter(space, m_node, 0, runStart, runStart);
    702             }
    703             return;
    704         }
    705         unsigned textBoxEnd = textBoxStart + m_textBox->len();
    706         unsigned runEnd = std::min(textBoxEnd, end);
    707 
    708         // Determine what the next text box will be, but don't advance yet
    709         InlineTextBox* nextTextBox = 0;
    710         if (renderer->containsReversedText()) {
    711             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
    712                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
    713         } else {
    714             nextTextBox = m_textBox->nextTextBox();
    715         }
    716         ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
    717 
    718         if (runStart < runEnd) {
    719             // Handle either a single newline character (which becomes a space),
    720             // or a run of characters that does not include a newline.
    721             // This effectively translates newlines to spaces without copying the text.
    722             if (str[runStart] == '\n') {
    723                 emitCharacter(space, m_node, 0, runStart, runStart + 1);
    724                 m_offset = runStart + 1;
    725             } else {
    726                 size_t subrunEnd = str.find('\n', runStart);
    727                 if (subrunEnd == kNotFound || subrunEnd > runEnd)
    728                     subrunEnd = runEnd;
    729 
    730                 m_offset = subrunEnd;
    731                 emitText(m_node, renderer, runStart, subrunEnd);
    732             }
    733 
    734             // If we are doing a subrun that doesn't go to the end of the text box,
    735             // come back again to finish handling this text box; don't advance to the next one.
    736             if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
    737                 return;
    738 
    739             // Advance and return
    740             unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
    741             if (nextRunStart > runEnd)
    742                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
    743             m_textBox = nextTextBox;
    744             if (renderer->containsReversedText())
    745                 ++m_sortedTextBoxesPosition;
    746             return;
    747         }
    748         // Advance and continue
    749         m_textBox = nextTextBox;
    750         if (renderer->containsReversedText())
    751             ++m_sortedTextBoxesPosition;
    752     }
    753     if (!m_textBox && m_remainingTextBox) {
    754         m_textBox = m_remainingTextBox;
    755         m_remainingTextBox = 0;
    756         m_firstLetterText = nullptr;
    757         m_offset = 0;
    758         handleTextBox();
    759     }
    760 }
    761 
    762 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter)
    763 {
    764     if (!firstLetter)
    765         return 0;
    766 
    767     // FIXME: Should this check descendent objects?
    768     for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) {
    769         if (current->isText())
    770             return toRenderText(current);
    771     }
    772     return 0;
    773 }
    774 
    775 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
    776 {
    777     if (renderer->firstLetter()) {
    778         RenderBoxModelObject* r = renderer->firstLetter();
    779         if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    780             return;
    781         if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
    782             m_handledFirstLetter = true;
    783             m_remainingTextBox = m_textBox;
    784             m_textBox = firstLetter->firstTextBox();
    785             m_sortedTextBoxes.clear();
    786             m_firstLetterText = firstLetter;
    787         }
    788     }
    789     m_handledFirstLetter = true;
    790 }
    791 
    792 bool TextIterator::handleReplacedElement()
    793 {
    794     if (m_fullyClippedStack.top())
    795         return false;
    796 
    797     RenderObject* renderer = m_node->renderer();
    798     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    799         return false;
    800 
    801     if (m_emitsObjectReplacementCharacter) {
    802         emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
    803         return true;
    804     }
    805 
    806     if (m_lastTextNodeEndedWithCollapsedSpace) {
    807         emitCharacter(space, m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
    808         return false;
    809     }
    810 
    811     if (m_entersTextControls && renderer->isTextControl()) {
    812         // The shadow tree should be already visited.
    813         return true;
    814     }
    815 
    816     m_hasEmitted = true;
    817 
    818     if (m_emitsCharactersBetweenAllVisiblePositions) {
    819         // We want replaced elements to behave like punctuation for boundary
    820         // finding, and to simply take up space for the selection preservation
    821         // code in moveParagraphs, so we use a comma.
    822         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
    823         return true;
    824     }
    825 
    826     m_positionNode = m_node->parentNode();
    827     m_positionOffsetBaseNode = m_node;
    828     m_positionStartOffset = 0;
    829     m_positionEndOffset = 1;
    830     m_singleCharacterBuffer = 0;
    831 
    832     if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
    833         m_text = toRenderImage(renderer)->altText();
    834         if (!m_text.isEmpty()) {
    835             m_textLength = m_text.length();
    836             m_lastCharacter = m_text[m_textLength - 1];
    837             return true;
    838         }
    839     }
    840 
    841     m_textLength = 0;
    842     m_lastCharacter = 0;
    843 
    844     return true;
    845 }
    846 
    847 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
    848 {
    849     if (renderer->style()->visibility() == VISIBLE)
    850         return true;
    851     if (renderer->isTextFragment()) {
    852         RenderTextFragment* fragment = toRenderTextFragment(renderer);
    853         if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
    854             return true;
    855     }
    856     return false;
    857 }
    858 
    859 static bool shouldEmitTabBeforeNode(Node* node)
    860 {
    861     RenderObject* r = node->renderer();
    862 
    863     // Table cells are delimited by tabs.
    864     if (!r || !isTableCell(node))
    865         return false;
    866 
    867     // Want a tab before every cell other than the first one
    868     RenderTableCell* rc = toRenderTableCell(r);
    869     RenderTable* t = rc->table();
    870     return t && (t->cellBefore(rc) || t->cellAbove(rc));
    871 }
    872 
    873 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
    874 {
    875     RenderObject* renderer = node->renderer();
    876 
    877     if (renderer ? !renderer->isBR() : !isHTMLBRElement(node))
    878         return false;
    879     return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost()));
    880 }
    881 
    882 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node)
    883 {
    884     // Block flow (versus inline flow) is represented by having
    885     // a newline both before and after the element.
    886     RenderObject* r = node.renderer();
    887     if (!r) {
    888         return (node.hasTagName(blockquoteTag)
    889             || node.hasTagName(ddTag)
    890             || node.hasTagName(divTag)
    891             || node.hasTagName(dlTag)
    892             || node.hasTagName(dtTag)
    893             || node.hasTagName(h1Tag)
    894             || node.hasTagName(h2Tag)
    895             || node.hasTagName(h3Tag)
    896             || node.hasTagName(h4Tag)
    897             || node.hasTagName(h5Tag)
    898             || node.hasTagName(h6Tag)
    899             || node.hasTagName(hrTag)
    900             || node.hasTagName(liTag)
    901             || node.hasTagName(listingTag)
    902             || node.hasTagName(olTag)
    903             || node.hasTagName(pTag)
    904             || node.hasTagName(preTag)
    905             || node.hasTagName(trTag)
    906             || node.hasTagName(ulTag));
    907     }
    908 
    909     // Need to make an exception for option and optgroup, because we want to
    910     // keep the legacy behavior before we added renderers to them.
    911     if (isHTMLOptionElement(node) || isHTMLOptGroupElement(node))
    912         return false;
    913 
    914     // Need to make an exception for table cells, because they are blocks, but we
    915     // want them tab-delimited rather than having newlines before and after.
    916     if (isTableCell(&node))
    917         return false;
    918 
    919     // Need to make an exception for table row elements, because they are neither
    920     // "inline" or "RenderBlock", but we want newlines for them.
    921     if (r->isTableRow()) {
    922         RenderTable* t = toRenderTableRow(r)->table();
    923         if (t && !t->isInline())
    924             return true;
    925     }
    926 
    927     return !r->isInline() && r->isRenderBlock()
    928         && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
    929 }
    930 
    931 static bool shouldEmitNewlineAfterNode(Node& node)
    932 {
    933     // FIXME: It should be better but slower to create a VisiblePosition here.
    934     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
    935         return false;
    936     // Check if this is the very last renderer in the document.
    937     // If so, then we should not emit a newline.
    938     Node* next = &node;
    939     do {
    940         next = NodeTraversal::nextSkippingChildren(*next);
    941         if (next && next->renderer())
    942             return true;
    943     } while (next);
    944     return false;
    945 }
    946 
    947 static bool shouldEmitNewlineBeforeNode(Node& node)
    948 {
    949     return shouldEmitNewlinesBeforeAndAfterNode(node);
    950 }
    951 
    952 static bool shouldEmitExtraNewlineForNode(Node* node)
    953 {
    954     // When there is a significant collapsed bottom margin, emit an extra
    955     // newline for a more realistic result. We end up getting the right
    956     // result even without margin collapsing. For example: <div><p>text</p></div>
    957     // will work right even if both the <div> and the <p> have bottom margins.
    958     RenderObject* r = node->renderer();
    959     if (!r || !r->isBox())
    960         return false;
    961 
    962     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
    963     // not to do this at all
    964     if (node->hasTagName(h1Tag)
    965         || node->hasTagName(h2Tag)
    966         || node->hasTagName(h3Tag)
    967         || node->hasTagName(h4Tag)
    968         || node->hasTagName(h5Tag)
    969         || node->hasTagName(h6Tag)
    970         || node->hasTagName(pTag)) {
    971         RenderStyle* style = r->style();
    972         if (style) {
    973             int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
    974             int fontSize = style->fontDescription().computedPixelSize();
    975             if (bottomMargin * 2 >= fontSize)
    976                 return true;
    977         }
    978     }
    979 
    980     return false;
    981 }
    982 
    983 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
    984 {
    985     const String& text = renderer->text();
    986     int length = text.length();
    987     for (int i = textEnd; i < length; ++i) {
    988         if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
    989             return i - textEnd;
    990     }
    991 
    992     return length - textEnd;
    993 }
    994 
    995 static int maxOffsetIncludingCollapsedSpaces(Node* node)
    996 {
    997     int offset = caretMaxOffset(node);
    998 
    999     if (node->renderer() && node->renderer()->isText())
   1000         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
   1001 
   1002     return offset;
   1003 }
   1004 
   1005 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
   1006 bool TextIterator::shouldRepresentNodeOffsetZero()
   1007 {
   1008     if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTableElement(m_node))
   1009         return true;
   1010 
   1011     // Leave element positioned flush with start of a paragraph
   1012     // (e.g. do not insert tab before a table cell at the start of a paragraph)
   1013     if (m_lastCharacter == '\n')
   1014         return false;
   1015 
   1016     // Otherwise, show the position if we have emitted any characters
   1017     if (m_hasEmitted)
   1018         return true;
   1019 
   1020     // We've not emitted anything yet. Generally, there is no need for any positioning then.
   1021     // The only exception is when the element is visually not in the same line as
   1022     // the start of the range (e.g. the range starts at the end of the previous paragraph).
   1023     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
   1024     // make quicker checks to possibly avoid that. Another check that we could make is
   1025     // is whether the inline vs block flow changed since the previous visible element.
   1026     // I think we're already in a special enough case that that won't be needed, tho.
   1027 
   1028     // No character needed if this is the first node in the range.
   1029     if (m_node == m_startContainer)
   1030         return false;
   1031 
   1032     // If we are outside the start container's subtree, assume we need to emit.
   1033     // FIXME: m_startContainer could be an inline block
   1034     if (!m_node->isDescendantOf(m_startContainer))
   1035         return true;
   1036 
   1037     // If we started as m_startContainer offset 0 and the current node is a descendant of
   1038     // the start container, we already had enough context to correctly decide whether to
   1039     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
   1040     // so don't second guess that now.
   1041     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
   1042     // immaterial since we likely would have already emitted something by now.
   1043     if (!m_startOffset)
   1044         return false;
   1045 
   1046     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
   1047     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
   1048     // we would create VisiblePositions on every call to this function without this check.
   1049     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
   1050         || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node)))
   1051         return false;
   1052 
   1053     // The startPos.isNotNull() check is needed because the start could be before the body,
   1054     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
   1055     // The currPos.isNotNull() check is needed because positions in non-HTML content
   1056     // (like SVG) do not have visible positions, and we don't want to emit for them either.
   1057     VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
   1058     VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
   1059     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
   1060 }
   1061 
   1062 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
   1063 {
   1064     return isRenderedTableElement(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
   1065 }
   1066 
   1067 void TextIterator::representNodeOffsetZero()
   1068 {
   1069     // Emit a character to show the positioning of m_node.
   1070 
   1071     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
   1072     // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
   1073     // on m_node to see if it necessitates emitting a character first and will early return
   1074     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
   1075     if (shouldEmitTabBeforeNode(m_node)) {
   1076         if (shouldRepresentNodeOffsetZero())
   1077             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
   1078     } else if (shouldEmitNewlineBeforeNode(*m_node)) {
   1079         if (shouldRepresentNodeOffsetZero())
   1080             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
   1081     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
   1082         if (shouldRepresentNodeOffsetZero())
   1083             emitCharacter(space, m_node->parentNode(), m_node, 0, 0);
   1084     }
   1085 }
   1086 
   1087 bool TextIterator::handleNonTextNode()
   1088 {
   1089     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
   1090         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
   1091     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
   1092         emitCharacter(space, m_node->parentNode(), m_node, 0, 1);
   1093     else
   1094         representNodeOffsetZero();
   1095 
   1096     return true;
   1097 }
   1098 
   1099 void TextIterator::flushPositionOffsets() const
   1100 {
   1101     if (!m_positionOffsetBaseNode)
   1102         return;
   1103     int index = m_positionOffsetBaseNode->nodeIndex();
   1104     m_positionStartOffset += index;
   1105     m_positionEndOffset += index;
   1106     m_positionOffsetBaseNode = nullptr;
   1107 }
   1108 
   1109 void TextIterator::exitNode()
   1110 {
   1111     // prevent emitting a newline when exiting a collapsed block at beginning of the range
   1112     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
   1113     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
   1114     // therefore look like a blank line.
   1115     if (!m_hasEmitted)
   1116         return;
   1117 
   1118     // Emit with a position *inside* m_node, after m_node's contents, in
   1119     // case it is a block, because the run should start where the
   1120     // emitted character is positioned visually.
   1121     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
   1122     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
   1123     // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
   1124     // TextIterator in _web_attributedStringFromRange.
   1125     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
   1126     if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) {
   1127         // use extra newline to represent margin bottom, as needed
   1128         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
   1129 
   1130         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
   1131         // contain a VisiblePosition when doing selection preservation.
   1132         if (m_lastCharacter != '\n') {
   1133             // insert a newline with a position following this block's contents.
   1134             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
   1135             // remember whether to later add a newline for the current node
   1136             ASSERT(!m_needsAnotherNewline);
   1137             m_needsAnotherNewline = addNewline;
   1138         } else if (addNewline) {
   1139             // insert a newline with a position following this block's contents.
   1140             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
   1141         }
   1142     }
   1143 
   1144     // If nothing was emitted, see if we need to emit a space.
   1145     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
   1146         emitCharacter(space, baseNode->parentNode(), baseNode, 1, 1);
   1147 }
   1148 
   1149 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
   1150 {
   1151     m_hasEmitted = true;
   1152 
   1153     // remember information with which to construct the TextIterator::range()
   1154     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
   1155     m_positionNode = textNode;
   1156     m_positionOffsetBaseNode = offsetBaseNode;
   1157     m_positionStartOffset = textStartOffset;
   1158     m_positionEndOffset = textEndOffset;
   1159 
   1160     // remember information with which to construct the TextIterator::characters() and length()
   1161     m_singleCharacterBuffer = c;
   1162     ASSERT(m_singleCharacterBuffer);
   1163     m_textLength = 1;
   1164 
   1165     // remember some iteration state
   1166     m_lastTextNodeEndedWithCollapsedSpace = false;
   1167     m_lastCharacter = c;
   1168 }
   1169 
   1170 void TextIterator::emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset)
   1171 {
   1172     m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text();
   1173     ASSERT(!m_text.isEmpty());
   1174     ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
   1175     ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
   1176     ASSERT(textStartOffset <= textEndOffset);
   1177 
   1178     m_positionNode = textNode;
   1179     m_positionOffsetBaseNode = nullptr;
   1180     m_positionStartOffset = textStartOffset;
   1181     m_positionEndOffset = textEndOffset;
   1182     m_singleCharacterBuffer = 0;
   1183     m_textLength = textEndOffset - textStartOffset;
   1184     m_lastCharacter = m_text[textEndOffset - 1];
   1185 
   1186     m_lastTextNodeEndedWithCollapsedSpace = false;
   1187     m_hasEmitted = true;
   1188 }
   1189 
   1190 PassRefPtrWillBeRawPtr<Range> TextIterator::createRange() const
   1191 {
   1192     // use the current run information, if we have it
   1193     if (m_positionNode) {
   1194         flushPositionOffsets();
   1195         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1196     }
   1197 
   1198     // otherwise, return the end of the overall range we were given
   1199     if (m_endContainer)
   1200         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
   1201 
   1202     return nullptr;
   1203 }
   1204 
   1205 Document* TextIterator::ownerDocument() const
   1206 {
   1207     if (m_positionNode)
   1208         return &m_positionNode->document();
   1209     if (m_endContainer)
   1210         return &m_endContainer->document();
   1211     return 0;
   1212 }
   1213 
   1214 Node* TextIterator::node() const
   1215 {
   1216     if (m_positionNode || m_endContainer) {
   1217         Node* node = startContainer();
   1218         if (node->offsetInCharacters())
   1219             return node;
   1220         return NodeTraversal::childAt(*node, startOffset());
   1221     }
   1222     return 0;
   1223 }
   1224 
   1225 int TextIterator::startOffset() const
   1226 {
   1227     if (m_positionNode) {
   1228         flushPositionOffsets();
   1229         return m_positionStartOffset;
   1230     }
   1231     ASSERT(m_endContainer);
   1232     return m_endOffset;
   1233 }
   1234 
   1235 int TextIterator::endOffset() const
   1236 {
   1237     if (m_positionNode) {
   1238         flushPositionOffsets();
   1239         return m_positionEndOffset;
   1240     }
   1241     ASSERT(m_endContainer);
   1242     return m_endOffset;
   1243 }
   1244 
   1245 Node* TextIterator::startContainer() const
   1246 {
   1247     if (m_positionNode) {
   1248         return m_positionNode;
   1249     }
   1250     ASSERT(m_endContainer);
   1251     return m_endContainer;
   1252 }
   1253 
   1254 Node* TextIterator::endContainer() const
   1255 {
   1256     return startContainer();
   1257 }
   1258 
   1259 Position TextIterator::startPosition() const
   1260 {
   1261     return createLegacyEditingPosition(startContainer(), startOffset());
   1262 }
   1263 
   1264 Position TextIterator::endPosition() const
   1265 {
   1266     return createLegacyEditingPosition(endContainer(), endOffset());
   1267 }
   1268 
   1269 // --------
   1270 
   1271 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior)
   1272     : m_node(nullptr)
   1273     , m_offset(0)
   1274     , m_handledNode(false)
   1275     , m_handledChildren(false)
   1276     , m_startNode(nullptr)
   1277     , m_startOffset(0)
   1278     , m_endNode(nullptr)
   1279     , m_endOffset(0)
   1280     , m_positionNode(nullptr)
   1281     , m_positionStartOffset(0)
   1282     , m_positionEndOffset(0)
   1283     , m_textOffset(0)
   1284     , m_textLength(0)
   1285     , m_lastTextNode(nullptr)
   1286     , m_lastCharacter(0)
   1287     , m_singleCharacterBuffer(0)
   1288     , m_havePassedStartNode(false)
   1289     , m_shouldHandleFirstLetter(false)
   1290     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
   1291     , m_shouldStop(false)
   1292     , m_emitsOriginalText(false)
   1293 {
   1294     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
   1295 
   1296     if (!r)
   1297         return;
   1298 
   1299     Node* startNode = r->startContainer();
   1300     if (!startNode)
   1301         return;
   1302     Node* endNode = r->endContainer();
   1303     int startOffset = r->startOffset();
   1304     int endOffset = r->endOffset();
   1305 
   1306     init(startNode, endNode, startOffset, endOffset);
   1307 }
   1308 
   1309 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
   1310     : m_node(nullptr)
   1311     , m_offset(0)
   1312     , m_handledNode(false)
   1313     , m_handledChildren(false)
   1314     , m_startNode(nullptr)
   1315     , m_startOffset(0)
   1316     , m_endNode(nullptr)
   1317     , m_endOffset(0)
   1318     , m_positionNode(nullptr)
   1319     , m_positionStartOffset(0)
   1320     , m_positionEndOffset(0)
   1321     , m_textOffset(0)
   1322     , m_textLength(0)
   1323     , m_lastTextNode(nullptr)
   1324     , m_lastCharacter(0)
   1325     , m_singleCharacterBuffer(0)
   1326     , m_havePassedStartNode(false)
   1327     , m_shouldHandleFirstLetter(false)
   1328     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
   1329     , m_shouldStop(false)
   1330     , m_emitsOriginalText(false)
   1331 {
   1332     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
   1333 
   1334     Node* startNode = start.deprecatedNode();
   1335     if (!startNode)
   1336         return;
   1337     Node* endNode = end.deprecatedNode();
   1338     int startOffset = start.deprecatedEditingOffset();
   1339     int endOffset = end.deprecatedEditingOffset();
   1340 
   1341     init(startNode, endNode, startOffset, endOffset);
   1342 }
   1343 
   1344 void SimplifiedBackwardsTextIterator::init(Node* startNode, Node* endNode, int startOffset, int endOffset)
   1345 {
   1346     if (!startNode->offsetInCharacters() && startOffset >= 0) {
   1347         // NodeTraversal::childAt() will return 0 if the offset is out of range. We rely on this behavior
   1348         // instead of calling countChildren() to avoid traversing the children twice.
   1349         if (Node* childAtOffset = NodeTraversal::childAt(*startNode, startOffset)) {
   1350             startNode = childAtOffset;
   1351             startOffset = 0;
   1352         }
   1353     }
   1354     if (!endNode->offsetInCharacters() && endOffset > 0) {
   1355         // NodeTraversal::childAt() will return 0 if the offset is out of range. We rely on this behavior
   1356         // instead of calling countChildren() to avoid traversing the children twice.
   1357         if (Node* childAtOffset = NodeTraversal::childAt(*endNode, endOffset - 1)) {
   1358             endNode = childAtOffset;
   1359             endOffset = lastOffsetInNode(endNode);
   1360         }
   1361     }
   1362 
   1363     m_node = endNode;
   1364     setUpFullyClippedStack(m_fullyClippedStack, m_node);
   1365     m_offset = endOffset;
   1366     m_handledNode = false;
   1367     m_handledChildren = !endOffset;
   1368 
   1369     m_startNode = startNode;
   1370     m_startOffset = startOffset;
   1371     m_endNode = endNode;
   1372     m_endOffset = endOffset;
   1373 
   1374 #if ENABLE(ASSERT)
   1375     // Need this just because of the assert.
   1376     m_positionNode = endNode;
   1377 #endif
   1378 
   1379     m_lastTextNode = nullptr;
   1380     m_lastCharacter = '\n';
   1381 
   1382     m_havePassedStartNode = false;
   1383 
   1384     advance();
   1385 }
   1386 
   1387 void SimplifiedBackwardsTextIterator::advance()
   1388 {
   1389     ASSERT(m_positionNode);
   1390 
   1391     if (m_shouldStop)
   1392         return;
   1393 
   1394     if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
   1395         m_shouldStop = true;
   1396         return;
   1397     }
   1398 
   1399     m_positionNode = nullptr;
   1400     m_textLength = 0;
   1401 
   1402     while (m_node && !m_havePassedStartNode) {
   1403         // Don't handle node if we start iterating at [node, 0].
   1404         if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) {
   1405             RenderObject* renderer = m_node->renderer();
   1406             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
   1407                 // FIXME: What about CDATA_SECTION_NODE?
   1408                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1409                     m_handledNode = handleTextNode();
   1410             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
   1411                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1412                     m_handledNode = handleReplacedElement();
   1413             } else {
   1414                 m_handledNode = handleNonTextNode();
   1415             }
   1416             if (m_positionNode)
   1417                 return;
   1418         }
   1419 
   1420         if (!m_handledChildren && m_node->hasChildren()) {
   1421             m_node = m_node->lastChild();
   1422             pushFullyClippedState(m_fullyClippedStack, m_node);
   1423         } else {
   1424             // Exit empty containers as we pass over them or containers
   1425             // where [container, 0] is where we started iterating.
   1426             if (!m_handledNode
   1427                 && canHaveChildrenForEditing(m_node)
   1428                 && m_node->parentNode()
   1429                 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
   1430                 exitNode();
   1431                 if (m_positionNode) {
   1432                     m_handledNode = true;
   1433                     m_handledChildren = true;
   1434                     return;
   1435                 }
   1436             }
   1437 
   1438             // Exit all other containers.
   1439             while (!m_node->previousSibling()) {
   1440                 if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
   1441                     break;
   1442                 m_fullyClippedStack.pop();
   1443                 exitNode();
   1444                 if (m_positionNode) {
   1445                     m_handledNode = true;
   1446                     m_handledChildren = true;
   1447                     return;
   1448                 }
   1449             }
   1450 
   1451             m_fullyClippedStack.pop();
   1452             if (advanceRespectingRange(m_node->previousSibling()))
   1453                 pushFullyClippedState(m_fullyClippedStack, m_node);
   1454             else
   1455                 m_node = nullptr;
   1456         }
   1457 
   1458         // For the purpose of word boundary detection,
   1459         // we should iterate all visible text and trailing (collapsed) whitespaces.
   1460         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
   1461         m_handledNode = false;
   1462         m_handledChildren = false;
   1463 
   1464         if (m_positionNode)
   1465             return;
   1466     }
   1467 }
   1468 
   1469 bool SimplifiedBackwardsTextIterator::handleTextNode()
   1470 {
   1471     m_lastTextNode = toText(m_node);
   1472 
   1473     int startOffset;
   1474     int offsetInNode;
   1475     RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
   1476     if (!renderer)
   1477         return true;
   1478 
   1479     String text = renderer->text();
   1480     if (!renderer->firstTextBox() && text.length() > 0)
   1481         return true;
   1482 
   1483     m_positionEndOffset = m_offset;
   1484     m_offset = startOffset + offsetInNode;
   1485     m_positionNode = m_node;
   1486     m_positionStartOffset = m_offset;
   1487 
   1488     ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
   1489     ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
   1490     ASSERT(m_positionStartOffset <= m_positionEndOffset);
   1491 
   1492     m_textLength = m_positionEndOffset - m_positionStartOffset;
   1493     m_textOffset = m_positionStartOffset - offsetInNode;
   1494     m_textContainer = text;
   1495     m_singleCharacterBuffer = 0;
   1496     RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
   1497 
   1498     m_lastCharacter = text[m_positionEndOffset - 1];
   1499 
   1500     return !m_shouldHandleFirstLetter;
   1501 }
   1502 
   1503 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
   1504 {
   1505     RenderText* renderer = toRenderText(m_node->renderer());
   1506     startOffset = (m_node == m_startNode) ? m_startOffset : 0;
   1507 
   1508     if (!renderer->isTextFragment()) {
   1509         offsetInNode = 0;
   1510         return renderer;
   1511     }
   1512 
   1513     RenderTextFragment* fragment = toRenderTextFragment(renderer);
   1514     int offsetAfterFirstLetter = fragment->start();
   1515     if (startOffset >= offsetAfterFirstLetter) {
   1516         ASSERT(!m_shouldHandleFirstLetter);
   1517         offsetInNode = offsetAfterFirstLetter;
   1518         return renderer;
   1519     }
   1520 
   1521     if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
   1522         m_shouldHandleFirstLetter = true;
   1523         offsetInNode = offsetAfterFirstLetter;
   1524         return renderer;
   1525     }
   1526 
   1527     m_shouldHandleFirstLetter = false;
   1528     offsetInNode = 0;
   1529     RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter());
   1530 
   1531     m_offset = firstLetterRenderer->caretMaxOffset();
   1532     m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset);
   1533 
   1534     return firstLetterRenderer;
   1535 }
   1536 
   1537 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
   1538 {
   1539     unsigned index = m_node->nodeIndex();
   1540     // We want replaced elements to behave like punctuation for boundary
   1541     // finding, and to simply take up space for the selection preservation
   1542     // code in moveParagraphs, so we use a comma. Unconditionally emit
   1543     // here because this iterator is only used for boundary finding.
   1544     emitCharacter(',', m_node->parentNode(), index, index + 1);
   1545     return true;
   1546 }
   1547 
   1548 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
   1549 {
   1550     // We can use a linefeed in place of a tab because this simple iterator is only used to
   1551     // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
   1552     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
   1553         unsigned index = m_node->nodeIndex();
   1554         // The start of this emitted range is wrong. Ensuring correctness would require
   1555         // VisiblePositions and so would be slow. previousBoundary expects this.
   1556         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
   1557     }
   1558     return true;
   1559 }
   1560 
   1561 void SimplifiedBackwardsTextIterator::exitNode()
   1562 {
   1563     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
   1564         // The start of this emitted range is wrong. Ensuring correctness would require
   1565         // VisiblePositions and so would be slow. previousBoundary expects this.
   1566         emitCharacter('\n', m_node, 0, 0);
   1567     }
   1568 }
   1569 
   1570 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
   1571 {
   1572     m_singleCharacterBuffer = c;
   1573     m_positionNode = node;
   1574     m_positionStartOffset = startOffset;
   1575     m_positionEndOffset = endOffset;
   1576     m_textOffset = 0;
   1577     m_textLength = 1;
   1578     m_lastCharacter = c;
   1579 }
   1580 
   1581 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
   1582 {
   1583     if (!next)
   1584         return false;
   1585     m_havePassedStartNode |= m_node == m_startNode;
   1586     if (m_havePassedStartNode)
   1587         return false;
   1588     m_node = next;
   1589     return true;
   1590 }
   1591 
   1592 Node* SimplifiedBackwardsTextIterator::startContainer() const
   1593 {
   1594     if (m_positionNode)
   1595         return m_positionNode;
   1596     return m_startNode;
   1597 }
   1598 
   1599 int SimplifiedBackwardsTextIterator::endOffset() const
   1600 {
   1601     if (m_positionNode)
   1602         return m_positionEndOffset;
   1603     return m_startOffset;
   1604 }
   1605 
   1606 Position SimplifiedBackwardsTextIterator::startPosition() const
   1607 {
   1608     if (m_positionNode)
   1609         return createLegacyEditingPosition(m_positionNode, m_positionStartOffset);
   1610     return createLegacyEditingPosition(m_startNode, m_startOffset);
   1611 }
   1612 
   1613 Position SimplifiedBackwardsTextIterator::endPosition() const
   1614 {
   1615     if (m_positionNode)
   1616         return createLegacyEditingPosition(m_positionNode, m_positionEndOffset);
   1617     return createLegacyEditingPosition(m_startNode, m_startOffset);
   1618 }
   1619 
   1620 // --------
   1621 
   1622 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
   1623     : m_offset(0)
   1624     , m_runOffset(0)
   1625     , m_atBreak(true)
   1626     , m_textIterator(range, behavior)
   1627 {
   1628     initialize();
   1629 }
   1630 
   1631 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
   1632     : m_offset(0)
   1633     , m_runOffset(0)
   1634     , m_atBreak(true)
   1635     , m_textIterator(start, end, behavior)
   1636 {
   1637     initialize();
   1638 }
   1639 
   1640 void CharacterIterator::initialize()
   1641 {
   1642     while (!atEnd() && !m_textIterator.length())
   1643         m_textIterator.advance();
   1644 }
   1645 
   1646 PassRefPtrWillBeRawPtr<Range> CharacterIterator::createRange() const
   1647 {
   1648     RefPtrWillBeRawPtr<Range> r = m_textIterator.createRange();
   1649     if (!m_textIterator.atEnd()) {
   1650         if (m_textIterator.length() <= 1) {
   1651             ASSERT(!m_runOffset);
   1652         } else {
   1653             Node* n = r->startContainer();
   1654             ASSERT(n == r->endContainer());
   1655             int offset = r->startOffset() + m_runOffset;
   1656             r->setStart(n, offset, ASSERT_NO_EXCEPTION);
   1657             r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
   1658         }
   1659     }
   1660     return r.release();
   1661 }
   1662 
   1663 Document* CharacterIterator::ownerDocument() const
   1664 {
   1665     return m_textIterator.ownerDocument();
   1666 }
   1667 
   1668 Node* CharacterIterator::startContainer() const
   1669 {
   1670     return m_textIterator.startContainer();
   1671 }
   1672 
   1673 Node* CharacterIterator::endContainer() const
   1674 {
   1675     return m_textIterator.endContainer();
   1676 }
   1677 
   1678 int CharacterIterator::startOffset() const
   1679 {
   1680     if (!m_textIterator.atEnd()) {
   1681         if (m_textIterator.length() > 1)
   1682             return m_textIterator.startOffset() + m_runOffset;
   1683         ASSERT(!m_runOffset);
   1684     }
   1685     return m_textIterator.startOffset();
   1686 }
   1687 
   1688 int CharacterIterator::endOffset() const
   1689 {
   1690     if (!m_textIterator.atEnd()) {
   1691         if (m_textIterator.length() > 1)
   1692             return m_textIterator.startOffset() + m_runOffset + 1;
   1693         ASSERT(!m_runOffset);
   1694     }
   1695     return m_textIterator.endOffset();
   1696 }
   1697 
   1698 Position CharacterIterator::startPosition() const
   1699 {
   1700     if (!m_textIterator.atEnd()) {
   1701         if (m_textIterator.length() > 1) {
   1702             Node* n = m_textIterator.startContainer();
   1703             int offset = m_textIterator.startOffset() + m_runOffset;
   1704             return createLegacyEditingPosition(n, offset);
   1705         }
   1706         ASSERT(!m_runOffset);
   1707     }
   1708     return m_textIterator.startPosition();
   1709 }
   1710 
   1711 Position CharacterIterator::endPosition() const
   1712 {
   1713     if (!m_textIterator.atEnd()) {
   1714         if (m_textIterator.length() > 1) {
   1715             Node* n = m_textIterator.startContainer();
   1716             int offset = m_textIterator.startOffset() + m_runOffset;
   1717             return createLegacyEditingPosition(n, offset + 1);
   1718         }
   1719         ASSERT(!m_runOffset);
   1720     }
   1721     return m_textIterator.endPosition();
   1722 }
   1723 
   1724 void CharacterIterator::advance(int count)
   1725 {
   1726     if (count <= 0) {
   1727         ASSERT(!count);
   1728         return;
   1729     }
   1730 
   1731     m_atBreak = false;
   1732 
   1733     // easy if there is enough left in the current m_textIterator run
   1734     int remaining = m_textIterator.length() - m_runOffset;
   1735     if (count < remaining) {
   1736         m_runOffset += count;
   1737         m_offset += count;
   1738         return;
   1739     }
   1740 
   1741     // exhaust the current m_textIterator run
   1742     count -= remaining;
   1743     m_offset += remaining;
   1744 
   1745     // move to a subsequent m_textIterator run
   1746     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1747         int runLength = m_textIterator.length();
   1748         if (!runLength) {
   1749             m_atBreak = true;
   1750         } else {
   1751             // see whether this is m_textIterator to use
   1752             if (count < runLength) {
   1753                 m_runOffset = count;
   1754                 m_offset += count;
   1755                 return;
   1756             }
   1757 
   1758             // exhaust this m_textIterator run
   1759             count -= runLength;
   1760             m_offset += runLength;
   1761         }
   1762     }
   1763 
   1764     // ran to the end of the m_textIterator... no more runs left
   1765     m_atBreak = true;
   1766     m_runOffset = 0;
   1767 }
   1768 
   1769 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition)
   1770 {
   1771     it.advance(offset);
   1772     startPosition = it.startPosition();
   1773 
   1774     if (length > 1)
   1775         it.advance(length - 1);
   1776     endPosition = it.endPosition();
   1777 }
   1778 
   1779 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
   1780     : m_offset(0)
   1781     , m_runOffset(0)
   1782     , m_atBreak(true)
   1783     , m_textIterator(range, behavior)
   1784 {
   1785     while (!atEnd() && !m_textIterator.length())
   1786         m_textIterator.advance();
   1787 }
   1788 
   1789 BackwardsCharacterIterator::BackwardsCharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
   1790     : m_offset(0)
   1791     , m_runOffset(0)
   1792     , m_atBreak(true)
   1793     , m_textIterator(start, end, behavior)
   1794 {
   1795     while (!atEnd() && !m_textIterator.length())
   1796         m_textIterator.advance();
   1797 }
   1798 
   1799 Position BackwardsCharacterIterator::endPosition() const
   1800 {
   1801     if (!m_textIterator.atEnd()) {
   1802         if (m_textIterator.length() > 1) {
   1803             Node* n = m_textIterator.startContainer();
   1804             return createLegacyEditingPosition(n, m_textIterator.endOffset() - m_runOffset);
   1805         }
   1806         ASSERT(!m_runOffset);
   1807     }
   1808     return m_textIterator.endPosition();
   1809 }
   1810 
   1811 void BackwardsCharacterIterator::advance(int count)
   1812 {
   1813     if (count <= 0) {
   1814         ASSERT(!count);
   1815         return;
   1816     }
   1817 
   1818     m_atBreak = false;
   1819 
   1820     int remaining = m_textIterator.length() - m_runOffset;
   1821     if (count < remaining) {
   1822         m_runOffset += count;
   1823         m_offset += count;
   1824         return;
   1825     }
   1826 
   1827     count -= remaining;
   1828     m_offset += remaining;
   1829 
   1830     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1831         int runLength = m_textIterator.length();
   1832         if (!runLength) {
   1833             m_atBreak = true;
   1834         } else {
   1835             if (count < runLength) {
   1836                 m_runOffset = count;
   1837                 m_offset += count;
   1838                 return;
   1839             }
   1840 
   1841             count -= runLength;
   1842             m_offset += runLength;
   1843         }
   1844     }
   1845 
   1846     m_atBreak = true;
   1847     m_runOffset = 0;
   1848 }
   1849 
   1850 // --------
   1851 
   1852 WordAwareIterator::WordAwareIterator(const Position& start, const Position& end)
   1853     : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
   1854     , m_textIterator(start, end)
   1855 {
   1856     advance(); // Get in position over the first chunk of text.
   1857 }
   1858 
   1859 WordAwareIterator::~WordAwareIterator()
   1860 {
   1861 }
   1862 
   1863 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
   1864 
   1865 void WordAwareIterator::advance()
   1866 {
   1867     m_buffer.clear();
   1868 
   1869     // If last time we did a look-ahead, start with that looked-ahead chunk now
   1870     if (!m_didLookAhead) {
   1871         ASSERT(!m_textIterator.atEnd());
   1872         m_textIterator.advance();
   1873     }
   1874     m_didLookAhead = false;
   1875 
   1876     // Go to next non-empty chunk.
   1877     while (!m_textIterator.atEnd() && !m_textIterator.length())
   1878         m_textIterator.advance();
   1879 
   1880     if (m_textIterator.atEnd())
   1881         return;
   1882 
   1883     while (1) {
   1884         // If this chunk ends in whitespace we can just use it as our chunk.
   1885         if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
   1886             return;
   1887 
   1888         // If this is the first chunk that failed, save it in m_buffer before look ahead.
   1889         if (m_buffer.isEmpty())
   1890             m_textIterator.appendTextTo(m_buffer);
   1891 
   1892         // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
   1893         m_textIterator.advance();
   1894         if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
   1895             m_didLookAhead = true;
   1896             return;
   1897         }
   1898 
   1899         // Start gobbling chunks until we get to a suitable stopping point
   1900         m_textIterator.appendTextTo(m_buffer);
   1901     }
   1902 }
   1903 
   1904 int WordAwareIterator::length() const
   1905 {
   1906     if (!m_buffer.isEmpty())
   1907         return m_buffer.size();
   1908     return m_textIterator.length();
   1909 }
   1910 
   1911 String WordAwareIterator::substring(unsigned position, unsigned length) const
   1912 {
   1913     if (!m_buffer.isEmpty())
   1914         return String(m_buffer.data() + position, length);
   1915     return m_textIterator.substring(position, length);
   1916 }
   1917 
   1918 UChar WordAwareIterator::characterAt(unsigned index) const
   1919 {
   1920     if (!m_buffer.isEmpty())
   1921         return m_buffer[index];
   1922     return m_textIterator.characterAt(index);
   1923 }
   1924 
   1925 // --------
   1926 
   1927 static const size_t minimumSearchBufferSize = 8192;
   1928 
   1929 #if ENABLE(ASSERT)
   1930 static bool searcherInUse;
   1931 #endif
   1932 
   1933 static UStringSearch* createSearcher()
   1934 {
   1935     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
   1936     // but it doesn't matter exactly what it is, since we don't perform any searches
   1937     // without setting both the pattern and the text.
   1938     UErrorCode status = U_ZERO_ERROR;
   1939     String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
   1940     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
   1941     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
   1942     return searcher;
   1943 }
   1944 
   1945 static UStringSearch* searcher()
   1946 {
   1947     static UStringSearch* searcher = createSearcher();
   1948     return searcher;
   1949 }
   1950 
   1951 static inline void lockSearcher()
   1952 {
   1953 #if ENABLE(ASSERT)
   1954     ASSERT(!searcherInUse);
   1955     searcherInUse = true;
   1956 #endif
   1957 }
   1958 
   1959 static inline void unlockSearcher()
   1960 {
   1961 #if ENABLE(ASSERT)
   1962     ASSERT(searcherInUse);
   1963     searcherInUse = false;
   1964 #endif
   1965 }
   1966 
   1967 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
   1968     : m_options(options)
   1969     , m_prefixLength(0)
   1970     , m_numberOfCharactersJustAppended(0)
   1971     , m_atBreak(true)
   1972     , m_needsMoreContext(options & AtWordStarts)
   1973     , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
   1974 {
   1975     ASSERT(!target.isEmpty());
   1976     target.appendTo(m_target);
   1977 
   1978     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
   1979     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
   1980     // to add tailoring on top of the locale-specific tailoring as of this writing.
   1981     foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
   1982 
   1983     size_t targetLength = m_target.size();
   1984     m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
   1985     m_overlap = m_buffer.capacity() / 4;
   1986 
   1987     if ((m_options & AtWordStarts) && targetLength) {
   1988         UChar32 targetFirstCharacter;
   1989         U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
   1990         // Characters in the separator category never really occur at the beginning of a word,
   1991         // so if the target begins with such a character, we just ignore the AtWordStart option.
   1992         if (isSeparator(targetFirstCharacter)) {
   1993             m_options &= ~AtWordStarts;
   1994             m_needsMoreContext = false;
   1995         }
   1996     }
   1997 
   1998     // Grab the single global searcher.
   1999     // If we ever have a reason to do more than once search buffer at once, we'll have
   2000     // to move to multiple searchers.
   2001     lockSearcher();
   2002 
   2003     UStringSearch* searcher = blink::searcher();
   2004     UCollator* collator = usearch_getCollator(searcher);
   2005 
   2006     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
   2007     if (ucol_getStrength(collator) != strength) {
   2008         ucol_setStrength(collator, strength);
   2009         usearch_reset(searcher);
   2010     }
   2011 
   2012     UErrorCode status = U_ZERO_ERROR;
   2013     usearch_setPattern(searcher, m_target.data(), targetLength, &status);
   2014     ASSERT(status == U_ZERO_ERROR);
   2015 
   2016     // The kana workaround requires a normalized copy of the target string.
   2017     if (m_targetRequiresKanaWorkaround)
   2018         normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
   2019 }
   2020 
   2021 inline SearchBuffer::~SearchBuffer()
   2022 {
   2023     // Leave the static object pointing to a valid string.
   2024     UErrorCode status = U_ZERO_ERROR;
   2025     usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
   2026     ASSERT(status == U_ZERO_ERROR);
   2027 
   2028     unlockSearcher();
   2029 }
   2030 
   2031 template<typename CharType>
   2032 inline void SearchBuffer::append(const CharType* characters, size_t length)
   2033 {
   2034     ASSERT(length);
   2035 
   2036     if (m_atBreak) {
   2037         m_buffer.shrink(0);
   2038         m_prefixLength = 0;
   2039         m_atBreak = false;
   2040     } else if (m_buffer.size() == m_buffer.capacity()) {
   2041         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
   2042         m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
   2043         m_buffer.shrink(m_overlap);
   2044     }
   2045 
   2046     size_t oldLength = m_buffer.size();
   2047     size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
   2048     ASSERT(usableLength);
   2049     m_buffer.resize(oldLength + usableLength);
   2050     UChar* destination = m_buffer.data() + oldLength;
   2051     StringImpl::copyChars(destination, characters, usableLength);
   2052     foldQuoteMarksAndSoftHyphens(destination, usableLength);
   2053     m_numberOfCharactersJustAppended = usableLength;
   2054 }
   2055 
   2056 inline bool SearchBuffer::needsMoreContext() const
   2057 {
   2058     return m_needsMoreContext;
   2059 }
   2060 
   2061 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
   2062 {
   2063     ASSERT(m_needsMoreContext);
   2064     ASSERT(m_prefixLength == m_buffer.size());
   2065 
   2066     if (!length)
   2067         return;
   2068 
   2069     m_atBreak = false;
   2070 
   2071     size_t wordBoundaryContextStart = length;
   2072     if (wordBoundaryContextStart) {
   2073         U16_BACK_1(characters, 0, wordBoundaryContextStart);
   2074         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
   2075     }
   2076 
   2077     size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
   2078     m_buffer.prepend(characters + length - usableLength, usableLength);
   2079     m_prefixLength += usableLength;
   2080 
   2081     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
   2082         m_needsMoreContext = false;
   2083 }
   2084 
   2085 inline bool SearchBuffer::atBreak() const
   2086 {
   2087     return m_atBreak;
   2088 }
   2089 
   2090 inline void SearchBuffer::reachedBreak()
   2091 {
   2092     m_atBreak = true;
   2093 }
   2094 
   2095 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
   2096 {
   2097     // This function implements the kana workaround. If usearch treats
   2098     // it as a match, but we do not want to, then it's a "bad match".
   2099     if (!m_targetRequiresKanaWorkaround)
   2100         return false;
   2101 
   2102     // Normalize into a match buffer. We reuse a single buffer rather than
   2103     // creating a new one each time.
   2104     normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);
   2105 
   2106     return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
   2107 }
   2108 
   2109 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
   2110 {
   2111     ASSERT(m_options & AtWordStarts);
   2112 
   2113     if (!start)
   2114         return true;
   2115 
   2116     int size = m_buffer.size();
   2117     int offset = start;
   2118     UChar32 firstCharacter;
   2119     U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
   2120 
   2121     if (m_options & TreatMedialCapitalAsWordStart) {
   2122         UChar32 previousCharacter;
   2123         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
   2124 
   2125         if (isSeparator(firstCharacter)) {
   2126             // The start of a separator run is a word start (".org" in "webkit.org").
   2127             if (!isSeparator(previousCharacter))
   2128                 return true;
   2129         } else if (isASCIIUpper(firstCharacter)) {
   2130             // The start of an uppercase run is a word start ("Kit" in "WebKit").
   2131             if (!isASCIIUpper(previousCharacter))
   2132                 return true;
   2133             // The last character of an uppercase run followed by a non-separator, non-digit
   2134             // is a word start ("Request" in "XMLHTTPRequest").
   2135             offset = start;
   2136             U16_FWD_1(m_buffer.data(), offset, size);
   2137             UChar32 nextCharacter = 0;
   2138             if (offset < size)
   2139                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
   2140             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
   2141                 return true;
   2142         } else if (isASCIIDigit(firstCharacter)) {
   2143             // The start of a digit run is a word start ("2" in "WebKit2").
   2144             if (!isASCIIDigit(previousCharacter))
   2145                 return true;
   2146         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
   2147             // The start of a non-separator, non-uppercase, non-digit run is a word start,
   2148             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
   2149             return true;
   2150         }
   2151     }
   2152 
   2153     // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
   2154     // a word, so treat the position before any CJK character as a word start.
   2155     if (Character::isCJKIdeographOrSymbol(firstCharacter))
   2156         return true;
   2157 
   2158     size_t wordBreakSearchStart = start + length;
   2159     while (wordBreakSearchStart > start)
   2160         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
   2161     return wordBreakSearchStart == start;
   2162 }
   2163 
   2164 inline size_t SearchBuffer::search(size_t& start)
   2165 {
   2166     size_t size = m_buffer.size();
   2167     if (m_atBreak) {
   2168         if (!size)
   2169             return 0;
   2170     } else {
   2171         if (size != m_buffer.capacity())
   2172             return 0;
   2173     }
   2174 
   2175     UStringSearch* searcher = blink::searcher();
   2176 
   2177     UErrorCode status = U_ZERO_ERROR;
   2178     usearch_setText(searcher, m_buffer.data(), size, &status);
   2179     ASSERT(status == U_ZERO_ERROR);
   2180 
   2181     usearch_setOffset(searcher, m_prefixLength, &status);
   2182     ASSERT(status == U_ZERO_ERROR);
   2183 
   2184     int matchStart = usearch_next(searcher, &status);
   2185     ASSERT(status == U_ZERO_ERROR);
   2186 
   2187 nextMatch:
   2188     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
   2189         ASSERT(matchStart == USEARCH_DONE);
   2190         return 0;
   2191     }
   2192 
   2193     // Matches that start in the overlap area are only tentative.
   2194     // The same match may appear later, matching more characters,
   2195     // possibly including a combining character that's not yet in the buffer.
   2196     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
   2197         size_t overlap = m_overlap;
   2198         if (m_options & AtWordStarts) {
   2199             // Ensure that there is sufficient context before matchStart the next time around for
   2200             // determining if it is at a word boundary.
   2201             int wordBoundaryContextStart = matchStart;
   2202             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
   2203             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
   2204             overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
   2205         }
   2206         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
   2207         m_prefixLength -= std::min(m_prefixLength, size - overlap);
   2208         m_buffer.shrink(overlap);
   2209         return 0;
   2210     }
   2211 
   2212     size_t matchedLength = usearch_getMatchedLength(searcher);
   2213     ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
   2214 
   2215     // If this match is "bad", move on to the next match.
   2216     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
   2217         matchStart = usearch_next(searcher, &status);
   2218         ASSERT(status == U_ZERO_ERROR);
   2219         goto nextMatch;
   2220     }
   2221 
   2222     size_t newSize = size - (matchStart + 1);
   2223     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
   2224     m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
   2225     m_buffer.shrink(newSize);
   2226 
   2227     start = size - matchStart;
   2228     return matchedLength;
   2229 }
   2230 
   2231 // --------
   2232 
   2233 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
   2234 {
   2235     int length = 0;
   2236     TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
   2237     if (forSelectionPreservation)
   2238         behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
   2239     for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance())
   2240         length += it.length();
   2241 
   2242     return length;
   2243 }
   2244 
   2245 int TextIterator::rangeLength(const Position& start, const Position& end, bool forSelectionPreservation)
   2246 {
   2247     int length = 0;
   2248     TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
   2249     if (forSelectionPreservation)
   2250         behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
   2251     for (TextIterator it(start, end, behaviorFlags); !it.atEnd(); it.advance())
   2252         length += it.length();
   2253 
   2254     return length;
   2255 }
   2256 
   2257 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
   2258 {
   2259     CharacterIterator entireRangeIterator(entireRange);
   2260     Position start;
   2261     Position end;
   2262     calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
   2263     return Range::create(entireRange->ownerDocument(), start, end);
   2264 }
   2265 
   2266 void TextIterator::subrange(Position& start, Position& end, int characterOffset, int characterCount)
   2267 {
   2268     CharacterIterator entireRangeIterator(start, end);
   2269     calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
   2270 }
   2271 
   2272 // --------
   2273 
   2274 static String createPlainText(TextIterator& it)
   2275 {
   2276     // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
   2277     static const unsigned initialCapacity = 1 << 15;
   2278 
   2279     unsigned bufferLength = 0;
   2280     StringBuilder builder;
   2281     builder.reserveCapacity(initialCapacity);
   2282 
   2283     for (; !it.atEnd(); it.advance()) {
   2284         it.appendTextToStringBuilder(builder);
   2285         bufferLength += it.length();
   2286     }
   2287 
   2288     if (!bufferLength)
   2289         return emptyString();
   2290 
   2291     return builder.toString();
   2292 }
   2293 
   2294 String plainText(const Range* r, TextIteratorBehaviorFlags behavior)
   2295 {
   2296     TextIterator it(r, behavior);
   2297     return createPlainText(it);
   2298 }
   2299 
   2300 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
   2301 {
   2302     TextIterator it(start, end, behavior);
   2303     return createPlainText(it);
   2304 }
   2305 
   2306 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward)
   2307 {
   2308     RefPtrWillBeRawPtr<Range> result = range->cloneRange();
   2309     result->collapse(!forward);
   2310     return result.release();
   2311 }
   2312 
   2313 // Check if there's any unpaird surrogate code point.
   2314 // Non-character code points are not checked.
   2315 static bool isValidUTF16(const String& s)
   2316 {
   2317     if (s.is8Bit())
   2318         return true;
   2319     const UChar* ustr = s.characters16();
   2320     size_t length = s.length();
   2321     size_t position = 0;
   2322     while (position < length) {
   2323         UChar32 character;
   2324         U16_NEXT(ustr, position, length, character);
   2325         if (U_IS_SURROGATE(character))
   2326             return false;
   2327     }
   2328     return true;
   2329 }
   2330 
   2331 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
   2332 {
   2333     matchStart = 0;
   2334     size_t matchLength = 0;
   2335 
   2336     if (!isValidUTF16(target))
   2337         return 0;
   2338 
   2339     SearchBuffer buffer(target, options);
   2340 
   2341     if (buffer.needsMoreContext()) {
   2342         RefPtrWillBeRawPtr<Range> beforeStartRange = it.ownerDocument()->createRange();
   2343         beforeStartRange->setEnd(it.startContainer(), it.startOffset(), IGNORE_EXCEPTION);
   2344         for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
   2345             Vector<UChar, 1024> characters;
   2346             backwardsIterator.prependTextTo(characters);
   2347             buffer.prependContext(characters.data(), characters.size());
   2348             if (!buffer.needsMoreContext())
   2349                 break;
   2350         }
   2351     }
   2352 
   2353     while (!it.atEnd()) {
   2354         it.appendTextTo(buffer);
   2355         it.advance(buffer.numberOfCharactersJustAppended());
   2356 tryAgain:
   2357         size_t matchStartOffset;
   2358         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
   2359             // Note that we found a match, and where we found it.
   2360             size_t lastCharacterInBufferOffset = it.characterOffset();
   2361             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
   2362             matchStart = lastCharacterInBufferOffset - matchStartOffset;
   2363             matchLength = newMatchLength;
   2364             // If searching forward, stop on the first match.
   2365             // If searching backward, don't stop, so we end up with the last match.
   2366             if (!(options & Backwards))
   2367                 break;
   2368             goto tryAgain;
   2369         }
   2370         if (it.atBreak() && !buffer.atBreak()) {
   2371             buffer.reachedBreak();
   2372             goto tryAgain;
   2373         }
   2374     }
   2375 
   2376     return matchLength;
   2377 }
   2378 
   2379 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots;
   2380 
   2381 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
   2382 {
   2383     // First, find the text.
   2384     size_t matchStart;
   2385     size_t matchLength;
   2386     {
   2387         CharacterIterator findIterator(range, iteratorFlagsForFindPlainText);
   2388         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
   2389         if (!matchLength)
   2390             return collapsedToBoundary(range, !(options & Backwards));
   2391     }
   2392 
   2393     // Then, find the document position of the start and the end of the text.
   2394     CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText);
   2395     Position resultStart;
   2396     Position resultEnd;
   2397     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
   2398     return Range::create(range->ownerDocument(), resultStart, resultEnd);
   2399 }
   2400 
   2401 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd)
   2402 {
   2403     resultStart.clear();
   2404     resultEnd.clear();
   2405     // CharacterIterator requires renderers to be up-to-date.
   2406     if (!inputStart.inDocument())
   2407         return;
   2408     ASSERT(inputStart.document() == inputEnd.document());
   2409 
   2410     // FIXME: Reduce the code duplication with above (but how?).
   2411     size_t matchStart;
   2412     size_t matchLength;
   2413     {
   2414         CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
   2415         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
   2416         if (!matchLength) {
   2417             const Position& collapseTo = options & Backwards ? inputStart : inputEnd;
   2418             resultStart = collapseTo;
   2419             resultEnd = collapseTo;
   2420             return;
   2421         }
   2422     }
   2423 
   2424     CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
   2425     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
   2426 }
   2427 
   2428 }
   2429