Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
      3  * Copyright (C) 2005 Alexey Proskuryakov.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/editing/TextIterator.h"
     29 
     30 #include "bindings/v8/ExceptionStatePlaceholder.h"
     31 #include "core/HTMLNames.h"
     32 #include "core/dom/Document.h"
     33 #include "core/dom/NodeTraversal.h"
     34 #include "core/dom/shadow/ShadowRoot.h"
     35 #include "core/editing/VisiblePosition.h"
     36 #include "core/editing/VisibleUnits.h"
     37 #include "core/editing/htmlediting.h"
     38 #include "core/html/HTMLElement.h"
     39 #include "core/html/HTMLTextFormControlElement.h"
     40 #include "core/rendering/InlineTextBox.h"
     41 #include "core/rendering/RenderImage.h"
     42 #include "core/rendering/RenderTableCell.h"
     43 #include "core/rendering/RenderTableRow.h"
     44 #include "core/rendering/RenderTextControl.h"
     45 #include "core/rendering/RenderTextFragment.h"
     46 #include "platform/fonts/Character.h"
     47 #include "platform/fonts/Font.h"
     48 #include "platform/text/TextBoundaries.h"
     49 #include "platform/text/TextBreakIteratorInternalICU.h"
     50 #include "platform/text/UnicodeUtilities.h"
     51 #include "wtf/text/CString.h"
     52 #include "wtf/text/StringBuilder.h"
     53 #include "wtf/unicode/CharacterNames.h"
     54 #include <unicode/usearch.h>
     55 #include <unicode/utf16.h>
     56 
     57 using namespace WTF::Unicode;
     58 
     59 namespace WebCore {
     60 
     61 using namespace HTMLNames;
     62 
     63 // Buffer that knows how to compare with a search target.
     64 // Keeps enough of the previous text to be able to search in the future, but no more.
     65 // Non-breaking spaces are always equal to normal spaces.
     66 // Case folding is also done if the CaseInsensitive option is specified.
     67 // Matches are further filtered if the AtWordStarts option is specified, although some
     68 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
     69 class SearchBuffer {
     70     WTF_MAKE_NONCOPYABLE(SearchBuffer);
     71 public:
     72     SearchBuffer(const String& target, FindOptions);
     73     ~SearchBuffer();
     74 
     75     // Returns number of characters appended; guaranteed to be in the range [1, length].
     76     template<typename CharType>
     77     void append(const CharType*, size_t length);
     78     size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
     79 
     80     bool needsMoreContext() const;
     81     void prependContext(const UChar*, size_t length);
     82     void reachedBreak();
     83 
     84     // Result is the size in characters of what was found.
     85     // And <startOffset> is the number of characters back to the start of what was found.
     86     size_t search(size_t& startOffset);
     87     bool atBreak() const;
     88 
     89 private:
     90     bool isBadMatch(const UChar*, size_t length) const;
     91     bool isWordStartMatch(size_t start, size_t length) const;
     92 
     93     Vector<UChar> m_target;
     94     FindOptions m_options;
     95 
     96     Vector<UChar> m_buffer;
     97     size_t m_overlap;
     98     size_t m_prefixLength;
     99     size_t m_numberOfCharactersJustAppended;
    100     bool m_atBreak;
    101     bool m_needsMoreContext;
    102 
    103     bool m_targetRequiresKanaWorkaround;
    104     Vector<UChar> m_normalizedTarget;
    105     mutable Vector<UChar> m_normalizedMatch;
    106 };
    107 
    108 // --------
    109 
    110 static const unsigned bitsInWord = sizeof(unsigned) * 8;
    111 static const unsigned bitInWordMask = bitsInWord - 1;
    112 
    113 BitStack::BitStack()
    114     : m_size(0)
    115 {
    116 }
    117 
    118 BitStack::~BitStack()
    119 {
    120 }
    121 
    122 void BitStack::push(bool bit)
    123 {
    124     unsigned index = m_size / bitsInWord;
    125     unsigned shift = m_size & bitInWordMask;
    126     if (!shift && index == m_words.size()) {
    127         m_words.grow(index + 1);
    128         m_words[index] = 0;
    129     }
    130     unsigned& word = m_words[index];
    131     unsigned mask = 1U << shift;
    132     if (bit)
    133         word |= mask;
    134     else
    135         word &= ~mask;
    136     ++m_size;
    137 }
    138 
    139 void BitStack::pop()
    140 {
    141     if (m_size)
    142         --m_size;
    143 }
    144 
    145 bool BitStack::top() const
    146 {
    147     if (!m_size)
    148         return false;
    149     unsigned shift = (m_size - 1) & bitInWordMask;
    150     return m_words.last() & (1U << shift);
    151 }
    152 
    153 unsigned BitStack::size() const
    154 {
    155     return m_size;
    156 }
    157 
    158 // --------
    159 
    160 #if ASSERT_ENABLED
    161 
    162 static unsigned depthCrossingShadowBoundaries(Node* node)
    163 {
    164     unsigned depth = 0;
    165     for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
    166         ++depth;
    167     return depth;
    168 }
    169 
    170 #endif
    171 
    172 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
    173 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
    174 {
    175     if (!rangeEndContainer)
    176         return 0;
    177     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
    178         if (Node* next = rangeEndContainer->traverseToChildAt(rangeEndOffset))
    179             return next;
    180     }
    181     for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
    182         if (Node* next = node->nextSibling())
    183             return next;
    184     }
    185     return 0;
    186 }
    187 
    188 // --------
    189 
    190 static inline bool fullyClipsContents(Node* node)
    191 {
    192     RenderObject* renderer = node->renderer();
    193     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
    194         return false;
    195     return toRenderBox(renderer)->size().isEmpty();
    196 }
    197 
    198 static inline bool ignoresContainerClip(Node* node)
    199 {
    200     RenderObject* renderer = node->renderer();
    201     if (!renderer || renderer->isText())
    202         return false;
    203     return renderer->style()->hasOutOfFlowPosition();
    204 }
    205 
    206 static void pushFullyClippedState(BitStack& stack, Node* node)
    207 {
    208     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
    209 
    210     // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364>
    211     // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if
    212     // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches
    213     // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics
    214     // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox).
    215     //
    216     // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering
    217     // primitives, not with DOM primitives.
    218 
    219     // Push true if this node full clips its contents, or if a parent already has fully
    220     // clipped and this is not a node that ignores its container's clip.
    221     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
    222 }
    223 
    224 static void setUpFullyClippedStack(BitStack& stack, Node* node)
    225 {
    226     // Put the nodes in a vector so we can iterate in reverse order.
    227     Vector<Node*, 100> ancestry;
    228     for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
    229         ancestry.append(parent);
    230 
    231     // Call pushFullyClippedState on each node starting with the earliest ancestor.
    232     size_t size = ancestry.size();
    233     for (size_t i = 0; i < size; ++i)
    234         pushFullyClippedState(stack, ancestry[size - i - 1]);
    235     pushFullyClippedState(stack, node);
    236 
    237     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
    238 }
    239 
    240 // --------
    241 
    242 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior)
    243     : m_startContainer(nullptr)
    244     , m_startOffset(0)
    245     , m_endContainer(nullptr)
    246     , m_endOffset(0)
    247     , m_positionNode(nullptr)
    248     , m_textLength(0)
    249     , m_needsAnotherNewline(false)
    250     , m_textBox(0)
    251     , m_remainingTextBox(0)
    252     , m_firstLetterText(0)
    253     , m_lastTextNode(nullptr)
    254     , m_lastTextNodeEndedWithCollapsedSpace(false)
    255     , m_lastCharacter(0)
    256     , m_sortedTextBoxesPosition(0)
    257     , m_hasEmitted(false)
    258     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
    259     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
    260     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
    261     , m_handledFirstLetter(false)
    262     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
    263     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
    264     , m_shouldStop(false)
    265     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
    266     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
    267     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
    268 {
    269     if (range)
    270         initialize(range->startPosition(), range->endPosition());
    271 }
    272 
    273 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
    274     : m_startContainer(nullptr)
    275     , m_startOffset(0)
    276     , m_endContainer(nullptr)
    277     , m_endOffset(0)
    278     , m_positionNode(nullptr)
    279     , m_textLength(0)
    280     , m_needsAnotherNewline(false)
    281     , m_textBox(0)
    282     , m_remainingTextBox(0)
    283     , m_firstLetterText(0)
    284     , m_lastTextNode(nullptr)
    285     , m_lastTextNodeEndedWithCollapsedSpace(false)
    286     , m_lastCharacter(0)
    287     , m_sortedTextBoxesPosition(0)
    288     , m_hasEmitted(false)
    289     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
    290     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
    291     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
    292     , m_handledFirstLetter(false)
    293     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
    294     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
    295     , m_shouldStop(false)
    296     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
    297     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
    298     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
    299 {
    300     initialize(start, end);
    301 }
    302 
    303 void TextIterator::initialize(const Position& start, const Position& end)
    304 {
    305     ASSERT(comparePositions(start, end) <= 0);
    306 
    307     // Get and validate |start| and |end|.
    308     Node* startContainer = start.containerNode();
    309     if (!startContainer)
    310         return;
    311     int startOffset = start.computeOffsetInContainerNode();
    312     Node* endContainer = end.containerNode();
    313     if (!endContainer)
    314         return;
    315     int endOffset = end.computeOffsetInContainerNode();
    316 
    317     // Remember the range - this does not change.
    318     m_startContainer = startContainer;
    319     m_startOffset = startOffset;
    320     m_endContainer = endContainer;
    321     m_endOffset = endOffset;
    322 
    323     // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from
    324     // the common ancestor tree scope.
    325     const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope());
    326     ASSERT(commonAncestorTreeScope);
    327     m_shadowDepth = 0;
    328     for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope())
    329         ++m_shadowDepth;
    330 
    331     // Set up the current node for processing.
    332     if (startContainer->offsetInCharacters())
    333         m_node = startContainer;
    334     else if (Node* child = startContainer->traverseToChildAt(startOffset))
    335         m_node = child;
    336     else if (!startOffset)
    337         m_node = startContainer;
    338     else
    339         m_node = NodeTraversal::nextSkippingChildren(*startContainer);
    340 
    341     if (!m_node)
    342         return;
    343 
    344     setUpFullyClippedStack(m_fullyClippedStack, m_node);
    345     m_offset = m_node == m_startContainer ? m_startOffset : 0;
    346     m_iterationProgress = HandledNone;
    347 
    348     // Calculate first out of bounds node.
    349     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
    350 
    351     // Identify the first run.
    352     advance();
    353 }
    354 
    355 TextIterator::~TextIterator()
    356 {
    357 }
    358 
    359 void TextIterator::advance()
    360 {
    361     if (m_shouldStop)
    362         return;
    363 
    364     // reset the run information
    365     m_positionNode = nullptr;
    366     m_textLength = 0;
    367 
    368     // handle remembered node that needed a newline after the text node's newline
    369     if (m_needsAnotherNewline) {
    370         // Emit the extra newline, and position it *inside* m_node, after m_node's
    371         // contents, in case it's a block, in the same way that we position the first
    372         // newline. The range for the emitted newline should start where the line
    373         // break begins.
    374         // FIXME: It would be cleaner if we emitted two newlines during the last
    375         // iteration, instead of using m_needsAnotherNewline.
    376         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
    377         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    378         m_needsAnotherNewline = false;
    379         return;
    380     }
    381 
    382     if (!m_textBox && m_remainingTextBox) {
    383         m_textBox = m_remainingTextBox;
    384         m_remainingTextBox = 0;
    385         m_firstLetterText = 0;
    386         m_offset = 0;
    387     }
    388     // handle remembered text box
    389     if (m_textBox) {
    390         handleTextBox();
    391         if (m_positionNode)
    392             return;
    393     }
    394 
    395     while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) {
    396         if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
    397             m_shouldStop = true;
    398 
    399         // if the range ends at offset 0 of an element, represent the
    400         // position, but not the content, of that element e.g. if the
    401         // node is a blockflow element, emit a newline that
    402         // precedes the element
    403         if (m_node == m_endContainer && !m_endOffset) {
    404             representNodeOffsetZero();
    405             m_node = nullptr;
    406             return;
    407         }
    408 
    409         RenderObject* renderer = m_node->renderer();
    410         if (!renderer) {
    411             if (m_node->isShadowRoot()) {
    412                 // A shadow root doesn't have a renderer, but we want to visit children anyway.
    413                 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress;
    414             } else {
    415                 m_iterationProgress = HandledChildren;
    416             }
    417         } else {
    418             // Enter author shadow roots, from youngest, if any and if necessary.
    419             if (m_iterationProgress < HandledAuthorShadowRoots) {
    420                 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) {
    421                     ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot();
    422                     ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot);
    423                     m_node = youngestShadowRoot;
    424                     m_iterationProgress = HandledNone;
    425                     ++m_shadowDepth;
    426                     pushFullyClippedState(m_fullyClippedStack, m_node);
    427                     continue;
    428                 }
    429 
    430                 m_iterationProgress = HandledAuthorShadowRoots;
    431             }
    432 
    433             // Enter user-agent shadow root, if necessary.
    434             if (m_iterationProgress < HandledUserAgentShadowRoot) {
    435                 if (m_entersTextControls && renderer->isTextControl()) {
    436                     ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot();
    437                     ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
    438                     m_node = userAgentShadowRoot;
    439                     m_iterationProgress = HandledNone;
    440                     ++m_shadowDepth;
    441                     pushFullyClippedState(m_fullyClippedStack, m_node);
    442                     continue;
    443                 }
    444                 m_iterationProgress = HandledUserAgentShadowRoot;
    445             }
    446 
    447             // Handle the current node according to its type.
    448             if (m_iterationProgress < HandledNode) {
    449                 bool handledNode = false;
    450                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE?
    451                     handledNode = handleTextNode();
    452                 } else if (renderer && (renderer->isImage() || renderer->isWidget()
    453                     || (m_node && m_node->isElementNode()
    454                     && (toElement(m_node)->isFormControlElement()
    455                     || isHTMLLegendElement(toElement(*m_node))
    456                     || isHTMLMeterElement(toElement(*m_node))
    457                     || isHTMLProgressElement(toElement(*m_node)))))) {
    458                     handledNode = handleReplacedElement();
    459                 } else {
    460                     handledNode = handleNonTextNode();
    461                 }
    462                 if (handledNode)
    463                     m_iterationProgress = HandledNode;
    464                 if (m_positionNode)
    465                     return;
    466             }
    467         }
    468 
    469         // Find a new current node to handle in depth-first manner,
    470         // calling exitNode() as we come back thru a parent node.
    471         //
    472         // 1. Iterate over child nodes, if we haven't done yet.
    473         Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0;
    474         m_offset = 0;
    475         if (!next) {
    476             // 2. If we've already iterated children or they are not available, go to the next sibling node.
    477             next = m_node->nextSibling();
    478             if (!next) {
    479                 // 3. If we are at the last child, go up the node tree until we find a next sibling.
    480                 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode;
    481                 Node* parentNode = m_node->parentNode();
    482                 while (!next && parentNode) {
    483                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
    484                         return;
    485                     bool haveRenderer = m_node->renderer();
    486                     m_node = parentNode;
    487                     m_fullyClippedStack.pop();
    488                     parentNode = m_node->parentNode();
    489                     if (haveRenderer)
    490                         exitNode();
    491                     if (m_positionNode) {
    492                         m_iterationProgress = HandledChildren;
    493                         return;
    494                     }
    495                     next = m_node->nextSibling();
    496                 }
    497 
    498                 if (!next && !parentNode && m_shadowDepth > 0) {
    499                     // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next
    500                     // sibling shadow root, if any.
    501                     ShadowRoot* shadowRoot = toShadowRoot(m_node);
    502                     if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
    503                         ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot();
    504                         if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
    505                             m_fullyClippedStack.pop();
    506                             m_node = nextShadowRoot;
    507                             m_iterationProgress = HandledNone;
    508                             // m_shadowDepth is unchanged since we exit from a shadow root and enter another.
    509                             pushFullyClippedState(m_fullyClippedStack, m_node);
    510                         } else {
    511                             // We are the last shadow root; exit from here and go back to where we were.
    512                             m_node = shadowRoot->host();
    513                             m_iterationProgress = HandledAuthorShadowRoots;
    514                             --m_shadowDepth;
    515                             m_fullyClippedStack.pop();
    516                         }
    517                     } else {
    518                         // If we are in a user-agent shadow root, then go back to the host.
    519                         ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
    520                         m_node = shadowRoot->host();
    521                         m_iterationProgress = HandledUserAgentShadowRoot;
    522                         --m_shadowDepth;
    523                         m_fullyClippedStack.pop();
    524                     }
    525                     m_handledFirstLetter = false;
    526                     m_firstLetterText = 0;
    527                     continue;
    528                 }
    529             }
    530             m_fullyClippedStack.pop();
    531         }
    532 
    533         // set the new current node
    534         m_node = next;
    535         if (m_node)
    536             pushFullyClippedState(m_fullyClippedStack, m_node);
    537         m_iterationProgress = HandledNone;
    538         m_handledFirstLetter = false;
    539         m_firstLetterText = 0;
    540 
    541         // how would this ever be?
    542         if (m_positionNode)
    543             return;
    544     }
    545 }
    546 
    547 UChar TextIterator::characterAt(unsigned index) const
    548 {
    549     ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
    550     if (!(index < static_cast<unsigned>(length())))
    551         return 0;
    552 
    553     if (m_singleCharacterBuffer) {
    554         ASSERT(!index);
    555         ASSERT(length() == 1);
    556         return m_singleCharacterBuffer;
    557     }
    558 
    559     return string()[startOffset() + index];
    560 }
    561 
    562 String TextIterator::substring(unsigned position, unsigned length) const
    563 {
    564     ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length()));
    565     ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
    566     if (!length)
    567         return emptyString();
    568     if (m_singleCharacterBuffer) {
    569         ASSERT(!position);
    570         ASSERT(length == 1);
    571         return String(&m_singleCharacterBuffer, 1);
    572     }
    573     return string().substring(startOffset() + position, length);
    574 }
    575 
    576 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
    577 {
    578     unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
    579     if (!lengthToAppend)
    580         return;
    581     if (m_singleCharacterBuffer) {
    582         ASSERT(!position);
    583         builder.append(m_singleCharacterBuffer);
    584     } else {
    585         builder.append(string(), startOffset() + position, lengthToAppend);
    586     }
    587 }
    588 
    589 bool TextIterator::handleTextNode()
    590 {
    591     if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
    592         return false;
    593 
    594     RenderText* renderer = toRenderText(m_node->renderer());
    595 
    596     m_lastTextNode = m_node;
    597     String str = renderer->text();
    598 
    599     // handle pre-formatted text
    600     if (!renderer->style()->collapseWhiteSpace()) {
    601         int runStart = m_offset;
    602         if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
    603             emitCharacter(' ', m_node, 0, runStart, runStart);
    604             return false;
    605         }
    606         if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
    607             handleTextNodeFirstLetter(toRenderTextFragment(renderer));
    608             if (m_firstLetterText) {
    609                 String firstLetter = m_firstLetterText->text();
    610                 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length());
    611                 m_firstLetterText = 0;
    612                 m_textBox = 0;
    613                 return false;
    614             }
    615         }
    616         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    617             return false;
    618         int strLength = str.length();
    619         int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
    620         int runEnd = std::min(strLength, end);
    621 
    622         if (runStart >= runEnd)
    623             return true;
    624 
    625         emitText(m_node, runStart, runEnd);
    626         return true;
    627     }
    628 
    629     if (renderer->firstTextBox())
    630         m_textBox = renderer->firstTextBox();
    631 
    632     bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
    633     if (shouldHandleFirstLetter)
    634         handleTextNodeFirstLetter(toRenderTextFragment(renderer));
    635 
    636     if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
    637         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    638             return false;
    639         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
    640         return true;
    641     }
    642 
    643     if (m_firstLetterText)
    644         renderer = m_firstLetterText;
    645 
    646     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    647     if (renderer->containsReversedText()) {
    648         m_sortedTextBoxes.clear();
    649         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
    650             m_sortedTextBoxes.append(textBox);
    651         }
    652         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
    653         m_sortedTextBoxesPosition = 0;
    654         m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
    655     }
    656 
    657     handleTextBox();
    658     return true;
    659 }
    660 
    661 void TextIterator::handleTextBox()
    662 {
    663     RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer());
    664     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
    665         m_textBox = 0;
    666         return;
    667     }
    668     String str = renderer->text();
    669     unsigned start = m_offset;
    670     unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
    671     while (m_textBox) {
    672         unsigned textBoxStart = m_textBox->start();
    673         unsigned runStart = std::max(textBoxStart, start);
    674 
    675         // Check for collapsed space at the start of this run.
    676         InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
    677         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
    678             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
    679         if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) {
    680             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
    681                 unsigned spaceRunStart = runStart - 1;
    682                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
    683                     --spaceRunStart;
    684                 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
    685             } else {
    686                 emitCharacter(' ', m_node, 0, runStart, runStart);
    687             }
    688             return;
    689         }
    690         unsigned textBoxEnd = textBoxStart + m_textBox->len();
    691         unsigned runEnd = std::min(textBoxEnd, end);
    692 
    693         // Determine what the next text box will be, but don't advance yet
    694         InlineTextBox* nextTextBox = 0;
    695         if (renderer->containsReversedText()) {
    696             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
    697                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
    698         } else {
    699             nextTextBox = m_textBox->nextTextBox();
    700         }
    701         ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
    702 
    703         if (runStart < runEnd) {
    704             // Handle either a single newline character (which becomes a space),
    705             // or a run of characters that does not include a newline.
    706             // This effectively translates newlines to spaces without copying the text.
    707             if (str[runStart] == '\n') {
    708                 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
    709                 m_offset = runStart + 1;
    710             } else {
    711                 size_t subrunEnd = str.find('\n', runStart);
    712                 if (subrunEnd == kNotFound || subrunEnd > runEnd)
    713                     subrunEnd = runEnd;
    714 
    715                 m_offset = subrunEnd;
    716                 emitText(m_node, renderer, runStart, subrunEnd);
    717             }
    718 
    719             // If we are doing a subrun that doesn't go to the end of the text box,
    720             // come back again to finish handling this text box; don't advance to the next one.
    721             if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
    722                 return;
    723 
    724             // Advance and return
    725             unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
    726             if (nextRunStart > runEnd)
    727                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
    728             m_textBox = nextTextBox;
    729             if (renderer->containsReversedText())
    730                 ++m_sortedTextBoxesPosition;
    731             return;
    732         }
    733         // Advance and continue
    734         m_textBox = nextTextBox;
    735         if (renderer->containsReversedText())
    736             ++m_sortedTextBoxesPosition;
    737     }
    738     if (!m_textBox && m_remainingTextBox) {
    739         m_textBox = m_remainingTextBox;
    740         m_remainingTextBox = 0;
    741         m_firstLetterText = 0;
    742         m_offset = 0;
    743         handleTextBox();
    744     }
    745 }
    746 
    747 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter)
    748 {
    749     if (!firstLetter)
    750         return 0;
    751 
    752     // FIXME: Should this check descendent objects?
    753     for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) {
    754         if (current->isText())
    755             return toRenderText(current);
    756     }
    757     return 0;
    758 }
    759 
    760 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
    761 {
    762     if (renderer->firstLetter()) {
    763         RenderBoxModelObject* r = renderer->firstLetter();
    764         if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    765             return;
    766         if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
    767             m_handledFirstLetter = true;
    768             m_remainingTextBox = m_textBox;
    769             m_textBox = firstLetter->firstTextBox();
    770             m_sortedTextBoxes.clear();
    771             m_firstLetterText = firstLetter;
    772         }
    773     }
    774     m_handledFirstLetter = true;
    775 }
    776 
    777 bool TextIterator::handleReplacedElement()
    778 {
    779     if (m_fullyClippedStack.top())
    780         return false;
    781 
    782     RenderObject* renderer = m_node->renderer();
    783     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    784         return false;
    785 
    786     if (m_emitsObjectReplacementCharacter) {
    787         emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
    788         return true;
    789     }
    790 
    791     if (m_lastTextNodeEndedWithCollapsedSpace) {
    792         emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
    793         return false;
    794     }
    795 
    796     if (m_entersTextControls && renderer->isTextControl()) {
    797         // The shadow tree should be already visited.
    798         return true;
    799     }
    800 
    801     m_hasEmitted = true;
    802 
    803     if (m_emitsCharactersBetweenAllVisiblePositions) {
    804         // We want replaced elements to behave like punctuation for boundary
    805         // finding, and to simply take up space for the selection preservation
    806         // code in moveParagraphs, so we use a comma.
    807         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
    808         return true;
    809     }
    810 
    811     m_positionNode = m_node->parentNode();
    812     m_positionOffsetBaseNode = m_node;
    813     m_positionStartOffset = 0;
    814     m_positionEndOffset = 1;
    815     m_singleCharacterBuffer = 0;
    816 
    817     if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
    818         m_text = toRenderImage(renderer)->altText();
    819         if (!m_text.isEmpty()) {
    820             m_textLength = m_text.length();
    821             m_lastCharacter = m_text[m_textLength - 1];
    822             return true;
    823         }
    824     }
    825 
    826     m_textLength = 0;
    827     m_lastCharacter = 0;
    828 
    829     return true;
    830 }
    831 
    832 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
    833 {
    834     if (renderer->style()->visibility() == VISIBLE)
    835         return true;
    836     if (renderer->isTextFragment()) {
    837         RenderTextFragment* fragment = toRenderTextFragment(renderer);
    838         if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
    839             return true;
    840     }
    841     return false;
    842 }
    843 
    844 static bool shouldEmitTabBeforeNode(Node* node)
    845 {
    846     RenderObject* r = node->renderer();
    847 
    848     // Table cells are delimited by tabs.
    849     if (!r || !isTableCell(node))
    850         return false;
    851 
    852     // Want a tab before every cell other than the first one
    853     RenderTableCell* rc = toRenderTableCell(r);
    854     RenderTable* t = rc->table();
    855     return t && (t->cellBefore(rc) || t->cellAbove(rc));
    856 }
    857 
    858 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
    859 {
    860     RenderObject* renderer = node->renderer();
    861 
    862     if (renderer ? !renderer->isBR() : !isHTMLBRElement(node))
    863         return false;
    864     return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost()));
    865 }
    866 
    867 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node)
    868 {
    869     // Block flow (versus inline flow) is represented by having
    870     // a newline both before and after the element.
    871     RenderObject* r = node.renderer();
    872     if (!r) {
    873         return (node.hasTagName(blockquoteTag)
    874             || node.hasTagName(ddTag)
    875             || node.hasTagName(divTag)
    876             || node.hasTagName(dlTag)
    877             || node.hasTagName(dtTag)
    878             || node.hasTagName(h1Tag)
    879             || node.hasTagName(h2Tag)
    880             || node.hasTagName(h3Tag)
    881             || node.hasTagName(h4Tag)
    882             || node.hasTagName(h5Tag)
    883             || node.hasTagName(h6Tag)
    884             || node.hasTagName(hrTag)
    885             || node.hasTagName(liTag)
    886             || node.hasTagName(listingTag)
    887             || node.hasTagName(olTag)
    888             || node.hasTagName(pTag)
    889             || node.hasTagName(preTag)
    890             || node.hasTagName(trTag)
    891             || node.hasTagName(ulTag));
    892     }
    893 
    894     // Need to make an exception for table cells, because they are blocks, but we
    895     // want them tab-delimited rather than having newlines before and after.
    896     if (isTableCell(&node))
    897         return false;
    898 
    899     // Need to make an exception for table row elements, because they are neither
    900     // "inline" or "RenderBlock", but we want newlines for them.
    901     if (r->isTableRow()) {
    902         RenderTable* t = toRenderTableRow(r)->table();
    903         if (t && !t->isInline())
    904             return true;
    905     }
    906 
    907     return !r->isInline() && r->isRenderBlock()
    908         && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
    909 }
    910 
    911 static bool shouldEmitNewlineAfterNode(Node& node)
    912 {
    913     // FIXME: It should be better but slower to create a VisiblePosition here.
    914     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
    915         return false;
    916     // Check if this is the very last renderer in the document.
    917     // If so, then we should not emit a newline.
    918     Node* next = &node;
    919     while ((next = NodeTraversal::nextSkippingChildren(*next))) {
    920         if (next->renderer())
    921             return true;
    922     }
    923     return false;
    924 }
    925 
    926 static bool shouldEmitNewlineBeforeNode(Node& node)
    927 {
    928     return shouldEmitNewlinesBeforeAndAfterNode(node);
    929 }
    930 
    931 static bool shouldEmitExtraNewlineForNode(Node* node)
    932 {
    933     // When there is a significant collapsed bottom margin, emit an extra
    934     // newline for a more realistic result. We end up getting the right
    935     // result even without margin collapsing. For example: <div><p>text</p></div>
    936     // will work right even if both the <div> and the <p> have bottom margins.
    937     RenderObject* r = node->renderer();
    938     if (!r || !r->isBox())
    939         return false;
    940 
    941     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
    942     // not to do this at all
    943     if (node->hasTagName(h1Tag)
    944         || node->hasTagName(h2Tag)
    945         || node->hasTagName(h3Tag)
    946         || node->hasTagName(h4Tag)
    947         || node->hasTagName(h5Tag)
    948         || node->hasTagName(h6Tag)
    949         || node->hasTagName(pTag)) {
    950         RenderStyle* style = r->style();
    951         if (style) {
    952             int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
    953             int fontSize = style->fontDescription().computedPixelSize();
    954             if (bottomMargin * 2 >= fontSize)
    955                 return true;
    956         }
    957     }
    958 
    959     return false;
    960 }
    961 
    962 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
    963 {
    964     const String& text = renderer->text();
    965     int length = text.length();
    966     for (int i = textEnd; i < length; ++i) {
    967         if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
    968             return i - textEnd;
    969     }
    970 
    971     return length - textEnd;
    972 }
    973 
    974 static int maxOffsetIncludingCollapsedSpaces(Node* node)
    975 {
    976     int offset = caretMaxOffset(node);
    977 
    978     if (node->renderer() && node->renderer()->isText())
    979         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
    980 
    981     return offset;
    982 }
    983 
    984 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
    985 bool TextIterator::shouldRepresentNodeOffsetZero()
    986 {
    987     if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTable(m_node))
    988         return true;
    989 
    990     // Leave element positioned flush with start of a paragraph
    991     // (e.g. do not insert tab before a table cell at the start of a paragraph)
    992     if (m_lastCharacter == '\n')
    993         return false;
    994 
    995     // Otherwise, show the position if we have emitted any characters
    996     if (m_hasEmitted)
    997         return true;
    998 
    999     // We've not emitted anything yet. Generally, there is no need for any positioning then.
   1000     // The only exception is when the element is visually not in the same line as
   1001     // the start of the range (e.g. the range starts at the end of the previous paragraph).
   1002     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
   1003     // make quicker checks to possibly avoid that. Another check that we could make is
   1004     // is whether the inline vs block flow changed since the previous visible element.
   1005     // I think we're already in a special enough case that that won't be needed, tho.
   1006 
   1007     // No character needed if this is the first node in the range.
   1008     if (m_node == m_startContainer)
   1009         return false;
   1010 
   1011     // If we are outside the start container's subtree, assume we need to emit.
   1012     // FIXME: m_startContainer could be an inline block
   1013     if (!m_node->isDescendantOf(m_startContainer))
   1014         return true;
   1015 
   1016     // If we started as m_startContainer offset 0 and the current node is a descendant of
   1017     // the start container, we already had enough context to correctly decide whether to
   1018     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
   1019     // so don't second guess that now.
   1020     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
   1021     // immaterial since we likely would have already emitted something by now.
   1022     if (!m_startOffset)
   1023         return false;
   1024 
   1025     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
   1026     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
   1027     // we would create VisiblePositions on every call to this function without this check.
   1028     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
   1029         || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node)))
   1030         return false;
   1031 
   1032     // The startPos.isNotNull() check is needed because the start could be before the body,
   1033     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
   1034     // The currPos.isNotNull() check is needed because positions in non-HTML content
   1035     // (like SVG) do not have visible positions, and we don't want to emit for them either.
   1036     VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
   1037     VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
   1038     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
   1039 }
   1040 
   1041 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
   1042 {
   1043     return isRenderedTable(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
   1044 }
   1045 
   1046 void TextIterator::representNodeOffsetZero()
   1047 {
   1048     // Emit a character to show the positioning of m_node.
   1049 
   1050     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
   1051     // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
   1052     // on m_node to see if it necessitates emitting a character first and will early return
   1053     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
   1054     if (shouldEmitTabBeforeNode(m_node)) {
   1055         if (shouldRepresentNodeOffsetZero())
   1056             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
   1057     } else if (shouldEmitNewlineBeforeNode(*m_node)) {
   1058         if (shouldRepresentNodeOffsetZero())
   1059             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
   1060     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
   1061         if (shouldRepresentNodeOffsetZero())
   1062             emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
   1063     }
   1064 }
   1065 
   1066 bool TextIterator::handleNonTextNode()
   1067 {
   1068     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
   1069         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
   1070     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
   1071         emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
   1072     else
   1073         representNodeOffsetZero();
   1074 
   1075     return true;
   1076 }
   1077 
   1078 void TextIterator::exitNode()
   1079 {
   1080     // prevent emitting a newline when exiting a collapsed block at beginning of the range
   1081     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
   1082     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
   1083     // therefore look like a blank line.
   1084     if (!m_hasEmitted)
   1085         return;
   1086 
   1087     // Emit with a position *inside* m_node, after m_node's contents, in
   1088     // case it is a block, because the run should start where the
   1089     // emitted character is positioned visually.
   1090     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
   1091     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
   1092     // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
   1093     // TextIterator in _web_attributedStringFromRange.
   1094     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
   1095     if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) {
   1096         // use extra newline to represent margin bottom, as needed
   1097         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
   1098 
   1099         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
   1100         // contain a VisiblePosition when doing selection preservation.
   1101         if (m_lastCharacter != '\n') {
   1102             // insert a newline with a position following this block's contents.
   1103             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
   1104             // remember whether to later add a newline for the current node
   1105             ASSERT(!m_needsAnotherNewline);
   1106             m_needsAnotherNewline = addNewline;
   1107         } else if (addNewline) {
   1108             // insert a newline with a position following this block's contents.
   1109             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
   1110         }
   1111     }
   1112 
   1113     // If nothing was emitted, see if we need to emit a space.
   1114     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
   1115         emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
   1116 }
   1117 
   1118 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
   1119 {
   1120     m_hasEmitted = true;
   1121 
   1122     // remember information with which to construct the TextIterator::range()
   1123     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
   1124     m_positionNode = textNode;
   1125     m_positionOffsetBaseNode = offsetBaseNode;
   1126     m_positionStartOffset = textStartOffset;
   1127     m_positionEndOffset = textEndOffset;
   1128 
   1129     // remember information with which to construct the TextIterator::characters() and length()
   1130     m_singleCharacterBuffer = c;
   1131     ASSERT(m_singleCharacterBuffer);
   1132     m_textLength = 1;
   1133 
   1134     // remember some iteration state
   1135     m_lastTextNodeEndedWithCollapsedSpace = false;
   1136     m_lastCharacter = c;
   1137 }
   1138 
   1139 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset)
   1140 {
   1141     RenderText* renderer = toRenderText(renderObject);
   1142     m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text();
   1143     ASSERT(!m_text.isEmpty());
   1144     ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
   1145     ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
   1146     ASSERT(textStartOffset <= textEndOffset);
   1147 
   1148     m_positionNode = textNode;
   1149     m_positionOffsetBaseNode = nullptr;
   1150     m_positionStartOffset = textStartOffset;
   1151     m_positionEndOffset = textEndOffset;
   1152     m_singleCharacterBuffer = 0;
   1153     m_textLength = textEndOffset - textStartOffset;
   1154     m_lastCharacter = m_text[textEndOffset - 1];
   1155 
   1156     m_lastTextNodeEndedWithCollapsedSpace = false;
   1157     m_hasEmitted = true;
   1158 }
   1159 
   1160 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
   1161 {
   1162     emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset);
   1163 }
   1164 
   1165 PassRefPtrWillBeRawPtr<Range> TextIterator::range() const
   1166 {
   1167     // use the current run information, if we have it
   1168     if (m_positionNode) {
   1169         if (m_positionOffsetBaseNode) {
   1170             int index = m_positionOffsetBaseNode->nodeIndex();
   1171             m_positionStartOffset += index;
   1172             m_positionEndOffset += index;
   1173             m_positionOffsetBaseNode = nullptr;
   1174         }
   1175         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1176     }
   1177 
   1178     // otherwise, return the end of the overall range we were given
   1179     if (m_endContainer)
   1180         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
   1181 
   1182     return nullptr;
   1183 }
   1184 
   1185 Node* TextIterator::node() const
   1186 {
   1187     RefPtrWillBeRawPtr<Range> textRange = range();
   1188     if (!textRange)
   1189         return 0;
   1190 
   1191     Node* node = textRange->startContainer();
   1192     if (!node)
   1193         return 0;
   1194     if (node->offsetInCharacters())
   1195         return node;
   1196 
   1197     return node->traverseToChildAt(textRange->startOffset());
   1198 }
   1199 
   1200 // --------
   1201 
   1202 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior)
   1203     : m_node(nullptr)
   1204     , m_offset(0)
   1205     , m_handledNode(false)
   1206     , m_handledChildren(false)
   1207     , m_startNode(nullptr)
   1208     , m_startOffset(0)
   1209     , m_endNode(nullptr)
   1210     , m_endOffset(0)
   1211     , m_positionNode(nullptr)
   1212     , m_positionStartOffset(0)
   1213     , m_positionEndOffset(0)
   1214     , m_textOffset(0)
   1215     , m_textLength(0)
   1216     , m_lastTextNode(nullptr)
   1217     , m_lastCharacter(0)
   1218     , m_singleCharacterBuffer(0)
   1219     , m_havePassedStartNode(false)
   1220     , m_shouldHandleFirstLetter(false)
   1221     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
   1222     , m_shouldStop(false)
   1223     , m_emitsOriginalText(false)
   1224 {
   1225     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
   1226 
   1227     if (!r)
   1228         return;
   1229 
   1230     Node* startNode = r->startContainer();
   1231     if (!startNode)
   1232         return;
   1233     Node* endNode = r->endContainer();
   1234     int startOffset = r->startOffset();
   1235     int endOffset = r->endOffset();
   1236 
   1237     if (!startNode->offsetInCharacters() && startOffset >= 0) {
   1238         // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior
   1239         // instead of calling countChildren() to avoid traversing the children twice.
   1240         if (Node* childAtOffset = startNode->traverseToChildAt(startOffset)) {
   1241             startNode = childAtOffset;
   1242             startOffset = 0;
   1243         }
   1244     }
   1245     if (!endNode->offsetInCharacters() && endOffset > 0) {
   1246         // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior
   1247         // instead of calling countChildren() to avoid traversing the children twice.
   1248         if (Node* childAtOffset = endNode->traverseToChildAt(endOffset - 1)) {
   1249             endNode = childAtOffset;
   1250             endOffset = lastOffsetInNode(endNode);
   1251         }
   1252     }
   1253 
   1254     m_node = endNode;
   1255     setUpFullyClippedStack(m_fullyClippedStack, m_node);
   1256     m_offset = endOffset;
   1257     m_handledNode = false;
   1258     m_handledChildren = !endOffset;
   1259 
   1260     m_startNode = startNode;
   1261     m_startOffset = startOffset;
   1262     m_endNode = endNode;
   1263     m_endOffset = endOffset;
   1264 
   1265 #ifndef NDEBUG
   1266     // Need this just because of the assert.
   1267     m_positionNode = endNode;
   1268 #endif
   1269 
   1270     m_lastTextNode = nullptr;
   1271     m_lastCharacter = '\n';
   1272 
   1273     m_havePassedStartNode = false;
   1274 
   1275     advance();
   1276 }
   1277 
   1278 void SimplifiedBackwardsTextIterator::advance()
   1279 {
   1280     ASSERT(m_positionNode);
   1281 
   1282     if (m_shouldStop)
   1283         return;
   1284 
   1285     if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
   1286         m_shouldStop = true;
   1287         return;
   1288     }
   1289 
   1290     m_positionNode = nullptr;
   1291     m_textLength = 0;
   1292 
   1293     while (m_node && !m_havePassedStartNode) {
   1294         // Don't handle node if we start iterating at [node, 0].
   1295         if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) {
   1296             RenderObject* renderer = m_node->renderer();
   1297             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
   1298                 // FIXME: What about CDATA_SECTION_NODE?
   1299                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1300                     m_handledNode = handleTextNode();
   1301             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
   1302                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1303                     m_handledNode = handleReplacedElement();
   1304             } else {
   1305                 m_handledNode = handleNonTextNode();
   1306             }
   1307             if (m_positionNode)
   1308                 return;
   1309         }
   1310 
   1311         if (!m_handledChildren && m_node->hasChildren()) {
   1312             m_node = m_node->lastChild();
   1313             pushFullyClippedState(m_fullyClippedStack, m_node);
   1314         } else {
   1315             // Exit empty containers as we pass over them or containers
   1316             // where [container, 0] is where we started iterating.
   1317             if (!m_handledNode
   1318                 && canHaveChildrenForEditing(m_node)
   1319                 && m_node->parentNode()
   1320                 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
   1321                 exitNode();
   1322                 if (m_positionNode) {
   1323                     m_handledNode = true;
   1324                     m_handledChildren = true;
   1325                     return;
   1326                 }
   1327             }
   1328 
   1329             // Exit all other containers.
   1330             while (!m_node->previousSibling()) {
   1331                 if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
   1332                     break;
   1333                 m_fullyClippedStack.pop();
   1334                 exitNode();
   1335                 if (m_positionNode) {
   1336                     m_handledNode = true;
   1337                     m_handledChildren = true;
   1338                     return;
   1339                 }
   1340             }
   1341 
   1342             m_fullyClippedStack.pop();
   1343             if (advanceRespectingRange(m_node->previousSibling()))
   1344                 pushFullyClippedState(m_fullyClippedStack, m_node);
   1345             else
   1346                 m_node = nullptr;
   1347         }
   1348 
   1349         // For the purpose of word boundary detection,
   1350         // we should iterate all visible text and trailing (collapsed) whitespaces.
   1351         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
   1352         m_handledNode = false;
   1353         m_handledChildren = false;
   1354 
   1355         if (m_positionNode)
   1356             return;
   1357     }
   1358 }
   1359 
   1360 bool SimplifiedBackwardsTextIterator::handleTextNode()
   1361 {
   1362     m_lastTextNode = m_node;
   1363 
   1364     int startOffset;
   1365     int offsetInNode;
   1366     RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
   1367     if (!renderer)
   1368         return true;
   1369 
   1370     String text = renderer->text();
   1371     if (!renderer->firstTextBox() && text.length() > 0)
   1372         return true;
   1373 
   1374     m_positionEndOffset = m_offset;
   1375     m_offset = startOffset + offsetInNode;
   1376     m_positionNode = m_node;
   1377     m_positionStartOffset = m_offset;
   1378 
   1379     ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
   1380     ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
   1381     ASSERT(m_positionStartOffset <= m_positionEndOffset);
   1382 
   1383     m_textLength = m_positionEndOffset - m_positionStartOffset;
   1384     m_textOffset = m_positionStartOffset - offsetInNode;
   1385     m_textContainer = text;
   1386     m_singleCharacterBuffer = 0;
   1387     RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
   1388 
   1389     m_lastCharacter = text[m_positionEndOffset - 1];
   1390 
   1391     return !m_shouldHandleFirstLetter;
   1392 }
   1393 
   1394 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
   1395 {
   1396     RenderText* renderer = toRenderText(m_node->renderer());
   1397     startOffset = (m_node == m_startNode) ? m_startOffset : 0;
   1398 
   1399     if (!renderer->isTextFragment()) {
   1400         offsetInNode = 0;
   1401         return renderer;
   1402     }
   1403 
   1404     RenderTextFragment* fragment = toRenderTextFragment(renderer);
   1405     int offsetAfterFirstLetter = fragment->start();
   1406     if (startOffset >= offsetAfterFirstLetter) {
   1407         ASSERT(!m_shouldHandleFirstLetter);
   1408         offsetInNode = offsetAfterFirstLetter;
   1409         return renderer;
   1410     }
   1411 
   1412     if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
   1413         m_shouldHandleFirstLetter = true;
   1414         offsetInNode = offsetAfterFirstLetter;
   1415         return renderer;
   1416     }
   1417 
   1418     m_shouldHandleFirstLetter = false;
   1419     offsetInNode = 0;
   1420     RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter());
   1421 
   1422     m_offset = firstLetterRenderer->caretMaxOffset();
   1423     m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset);
   1424 
   1425     return firstLetterRenderer;
   1426 }
   1427 
   1428 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
   1429 {
   1430     unsigned index = m_node->nodeIndex();
   1431     // We want replaced elements to behave like punctuation for boundary
   1432     // finding, and to simply take up space for the selection preservation
   1433     // code in moveParagraphs, so we use a comma. Unconditionally emit
   1434     // here because this iterator is only used for boundary finding.
   1435     emitCharacter(',', m_node->parentNode(), index, index + 1);
   1436     return true;
   1437 }
   1438 
   1439 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
   1440 {
   1441     // We can use a linefeed in place of a tab because this simple iterator is only used to
   1442     // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
   1443     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
   1444         unsigned index = m_node->nodeIndex();
   1445         // The start of this emitted range is wrong. Ensuring correctness would require
   1446         // VisiblePositions and so would be slow. previousBoundary expects this.
   1447         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
   1448     }
   1449     return true;
   1450 }
   1451 
   1452 void SimplifiedBackwardsTextIterator::exitNode()
   1453 {
   1454     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
   1455         // The start of this emitted range is wrong. Ensuring correctness would require
   1456         // VisiblePositions and so would be slow. previousBoundary expects this.
   1457         emitCharacter('\n', m_node, 0, 0);
   1458     }
   1459 }
   1460 
   1461 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
   1462 {
   1463     m_singleCharacterBuffer = c;
   1464     m_positionNode = node;
   1465     m_positionStartOffset = startOffset;
   1466     m_positionEndOffset = endOffset;
   1467     m_textOffset = 0;
   1468     m_textLength = 1;
   1469     m_lastCharacter = c;
   1470 }
   1471 
   1472 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
   1473 {
   1474     if (!next)
   1475         return false;
   1476     m_havePassedStartNode |= m_node == m_startNode;
   1477     if (m_havePassedStartNode)
   1478         return false;
   1479     m_node = next;
   1480     return true;
   1481 }
   1482 
   1483 PassRefPtrWillBeRawPtr<Range> SimplifiedBackwardsTextIterator::range() const
   1484 {
   1485     if (m_positionNode)
   1486         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1487 
   1488     return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
   1489 }
   1490 
   1491 // --------
   1492 
   1493 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
   1494     : m_offset(0)
   1495     , m_runOffset(0)
   1496     , m_atBreak(true)
   1497     , m_textIterator(range, behavior)
   1498 {
   1499     initialize();
   1500 }
   1501 
   1502 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
   1503     : m_offset(0)
   1504     , m_runOffset(0)
   1505     , m_atBreak(true)
   1506     , m_textIterator(start, end, behavior)
   1507 {
   1508     initialize();
   1509 }
   1510 
   1511 void CharacterIterator::initialize()
   1512 {
   1513     while (!atEnd() && !m_textIterator.length())
   1514         m_textIterator.advance();
   1515 }
   1516 
   1517 PassRefPtrWillBeRawPtr<Range> CharacterIterator::range() const
   1518 {
   1519     RefPtrWillBeRawPtr<Range> r = m_textIterator.range();
   1520     if (!m_textIterator.atEnd()) {
   1521         if (m_textIterator.length() <= 1) {
   1522             ASSERT(!m_runOffset);
   1523         } else {
   1524             Node* n = r->startContainer();
   1525             ASSERT(n == r->endContainer());
   1526             int offset = r->startOffset() + m_runOffset;
   1527             r->setStart(n, offset, ASSERT_NO_EXCEPTION);
   1528             r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
   1529         }
   1530     }
   1531     return r.release();
   1532 }
   1533 
   1534 void CharacterIterator::advance(int count)
   1535 {
   1536     if (count <= 0) {
   1537         ASSERT(!count);
   1538         return;
   1539     }
   1540 
   1541     m_atBreak = false;
   1542 
   1543     // easy if there is enough left in the current m_textIterator run
   1544     int remaining = m_textIterator.length() - m_runOffset;
   1545     if (count < remaining) {
   1546         m_runOffset += count;
   1547         m_offset += count;
   1548         return;
   1549     }
   1550 
   1551     // exhaust the current m_textIterator run
   1552     count -= remaining;
   1553     m_offset += remaining;
   1554 
   1555     // move to a subsequent m_textIterator run
   1556     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1557         int runLength = m_textIterator.length();
   1558         if (!runLength) {
   1559             m_atBreak = true;
   1560         } else {
   1561             // see whether this is m_textIterator to use
   1562             if (count < runLength) {
   1563                 m_runOffset = count;
   1564                 m_offset += count;
   1565                 return;
   1566             }
   1567 
   1568             // exhaust this m_textIterator run
   1569             count -= runLength;
   1570             m_offset += runLength;
   1571         }
   1572     }
   1573 
   1574     // ran to the end of the m_textIterator... no more runs left
   1575     m_atBreak = true;
   1576     m_runOffset = 0;
   1577 }
   1578 
   1579 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition)
   1580 {
   1581     it.advance(offset);
   1582     RefPtrWillBeRawPtr<Range> start = it.range();
   1583     startPosition = start->startPosition();
   1584 
   1585     if (length > 1)
   1586         it.advance(length - 1);
   1587     RefPtrWillBeRawPtr<Range> end = it.range();
   1588     endPosition = end->endPosition();
   1589 }
   1590 
   1591 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
   1592     : m_offset(0)
   1593     , m_runOffset(0)
   1594     , m_atBreak(true)
   1595     , m_textIterator(range, behavior)
   1596 {
   1597     while (!atEnd() && !m_textIterator.length())
   1598         m_textIterator.advance();
   1599 }
   1600 
   1601 PassRefPtrWillBeRawPtr<Range> BackwardsCharacterIterator::range() const
   1602 {
   1603     RefPtrWillBeRawPtr<Range> r = m_textIterator.range();
   1604     if (!m_textIterator.atEnd()) {
   1605         if (m_textIterator.length() <= 1) {
   1606             ASSERT(!m_runOffset);
   1607         } else {
   1608             Node* n = r->startContainer();
   1609             ASSERT(n == r->endContainer());
   1610             int offset = r->endOffset() - m_runOffset;
   1611             r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION);
   1612             r->setEnd(n, offset, ASSERT_NO_EXCEPTION);
   1613         }
   1614     }
   1615     return r.release();
   1616 }
   1617 
   1618 void BackwardsCharacterIterator::advance(int count)
   1619 {
   1620     if (count <= 0) {
   1621         ASSERT(!count);
   1622         return;
   1623     }
   1624 
   1625     m_atBreak = false;
   1626 
   1627     int remaining = m_textIterator.length() - m_runOffset;
   1628     if (count < remaining) {
   1629         m_runOffset += count;
   1630         m_offset += count;
   1631         return;
   1632     }
   1633 
   1634     count -= remaining;
   1635     m_offset += remaining;
   1636 
   1637     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1638         int runLength = m_textIterator.length();
   1639         if (!runLength) {
   1640             m_atBreak = true;
   1641         } else {
   1642             if (count < runLength) {
   1643                 m_runOffset = count;
   1644                 m_offset += count;
   1645                 return;
   1646             }
   1647 
   1648             count -= runLength;
   1649             m_offset += runLength;
   1650         }
   1651     }
   1652 
   1653     m_atBreak = true;
   1654     m_runOffset = 0;
   1655 }
   1656 
   1657 // --------
   1658 
   1659 WordAwareIterator::WordAwareIterator(const Range* range)
   1660     : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
   1661     , m_textIterator(range)
   1662 {
   1663     advance(); // Get in position over the first chunk of text.
   1664 }
   1665 
   1666 WordAwareIterator::~WordAwareIterator()
   1667 {
   1668 }
   1669 
   1670 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
   1671 
   1672 void WordAwareIterator::advance()
   1673 {
   1674     m_buffer.clear();
   1675 
   1676     // If last time we did a look-ahead, start with that looked-ahead chunk now
   1677     if (!m_didLookAhead) {
   1678         ASSERT(!m_textIterator.atEnd());
   1679         m_textIterator.advance();
   1680     }
   1681     m_didLookAhead = false;
   1682 
   1683     // Go to next non-empty chunk.
   1684     while (!m_textIterator.atEnd() && !m_textIterator.length())
   1685         m_textIterator.advance();
   1686 
   1687     m_range = m_textIterator.range();
   1688 
   1689     if (m_textIterator.atEnd())
   1690         return;
   1691 
   1692     while (1) {
   1693         // If this chunk ends in whitespace we can just use it as our chunk.
   1694         if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
   1695             return;
   1696 
   1697         // If this is the first chunk that failed, save it in m_buffer before look ahead.
   1698         if (m_buffer.isEmpty())
   1699             m_textIterator.appendTextTo(m_buffer);
   1700 
   1701         // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
   1702         m_textIterator.advance();
   1703         if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
   1704             m_didLookAhead = true;
   1705             return;
   1706         }
   1707 
   1708         // Start gobbling chunks until we get to a suitable stopping point
   1709         m_textIterator.appendTextTo(m_buffer);
   1710         m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION);
   1711     }
   1712 }
   1713 
   1714 int WordAwareIterator::length() const
   1715 {
   1716     if (!m_buffer.isEmpty())
   1717         return m_buffer.size();
   1718     return m_textIterator.length();
   1719 }
   1720 
   1721 String WordAwareIterator::substring(unsigned position, unsigned length) const
   1722 {
   1723     if (!m_buffer.isEmpty())
   1724         return String(m_buffer.data() + position, length);
   1725     return m_textIterator.substring(position, length);
   1726 }
   1727 
   1728 UChar WordAwareIterator::characterAt(unsigned index) const
   1729 {
   1730     if (!m_buffer.isEmpty())
   1731         return m_buffer[index];
   1732     return m_textIterator.characterAt(index);
   1733 }
   1734 
   1735 // --------
   1736 
   1737 static const size_t minimumSearchBufferSize = 8192;
   1738 
   1739 #ifndef NDEBUG
   1740 static bool searcherInUse;
   1741 #endif
   1742 
   1743 static UStringSearch* createSearcher()
   1744 {
   1745     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
   1746     // but it doesn't matter exactly what it is, since we don't perform any searches
   1747     // without setting both the pattern and the text.
   1748     UErrorCode status = U_ZERO_ERROR;
   1749     String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
   1750     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
   1751     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
   1752     return searcher;
   1753 }
   1754 
   1755 static UStringSearch* searcher()
   1756 {
   1757     static UStringSearch* searcher = createSearcher();
   1758     return searcher;
   1759 }
   1760 
   1761 static inline void lockSearcher()
   1762 {
   1763 #ifndef NDEBUG
   1764     ASSERT(!searcherInUse);
   1765     searcherInUse = true;
   1766 #endif
   1767 }
   1768 
   1769 static inline void unlockSearcher()
   1770 {
   1771 #ifndef NDEBUG
   1772     ASSERT(searcherInUse);
   1773     searcherInUse = false;
   1774 #endif
   1775 }
   1776 
   1777 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
   1778     : m_options(options)
   1779     , m_prefixLength(0)
   1780     , m_numberOfCharactersJustAppended(0)
   1781     , m_atBreak(true)
   1782     , m_needsMoreContext(options & AtWordStarts)
   1783     , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
   1784 {
   1785     ASSERT(!target.isEmpty());
   1786     target.appendTo(m_target);
   1787 
   1788     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
   1789     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
   1790     // to add tailoring on top of the locale-specific tailoring as of this writing.
   1791     foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
   1792 
   1793     size_t targetLength = m_target.size();
   1794     m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
   1795     m_overlap = m_buffer.capacity() / 4;
   1796 
   1797     if ((m_options & AtWordStarts) && targetLength) {
   1798         UChar32 targetFirstCharacter;
   1799         U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
   1800         // Characters in the separator category never really occur at the beginning of a word,
   1801         // so if the target begins with such a character, we just ignore the AtWordStart option.
   1802         if (isSeparator(targetFirstCharacter)) {
   1803             m_options &= ~AtWordStarts;
   1804             m_needsMoreContext = false;
   1805         }
   1806     }
   1807 
   1808     // Grab the single global searcher.
   1809     // If we ever have a reason to do more than once search buffer at once, we'll have
   1810     // to move to multiple searchers.
   1811     lockSearcher();
   1812 
   1813     UStringSearch* searcher = WebCore::searcher();
   1814     UCollator* collator = usearch_getCollator(searcher);
   1815 
   1816     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
   1817     if (ucol_getStrength(collator) != strength) {
   1818         ucol_setStrength(collator, strength);
   1819         usearch_reset(searcher);
   1820     }
   1821 
   1822     UErrorCode status = U_ZERO_ERROR;
   1823     usearch_setPattern(searcher, m_target.data(), targetLength, &status);
   1824     ASSERT(status == U_ZERO_ERROR);
   1825 
   1826     // The kana workaround requires a normalized copy of the target string.
   1827     if (m_targetRequiresKanaWorkaround)
   1828         normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
   1829 }
   1830 
   1831 inline SearchBuffer::~SearchBuffer()
   1832 {
   1833     // Leave the static object pointing to a valid string.
   1834     UErrorCode status = U_ZERO_ERROR;
   1835     usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
   1836     ASSERT(status == U_ZERO_ERROR);
   1837 
   1838     unlockSearcher();
   1839 }
   1840 
   1841 template<typename CharType>
   1842 inline void SearchBuffer::append(const CharType* characters, size_t length)
   1843 {
   1844     ASSERT(length);
   1845 
   1846     if (m_atBreak) {
   1847         m_buffer.shrink(0);
   1848         m_prefixLength = 0;
   1849         m_atBreak = false;
   1850     } else if (m_buffer.size() == m_buffer.capacity()) {
   1851         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
   1852         m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
   1853         m_buffer.shrink(m_overlap);
   1854     }
   1855 
   1856     size_t oldLength = m_buffer.size();
   1857     size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
   1858     ASSERT(usableLength);
   1859     m_buffer.resize(oldLength + usableLength);
   1860     UChar* destination = m_buffer.data() + oldLength;
   1861     StringImpl::copyChars(destination, characters, usableLength);
   1862     foldQuoteMarksAndSoftHyphens(destination, usableLength);
   1863     m_numberOfCharactersJustAppended = usableLength;
   1864 }
   1865 
   1866 inline bool SearchBuffer::needsMoreContext() const
   1867 {
   1868     return m_needsMoreContext;
   1869 }
   1870 
   1871 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
   1872 {
   1873     ASSERT(m_needsMoreContext);
   1874     ASSERT(m_prefixLength == m_buffer.size());
   1875 
   1876     if (!length)
   1877         return;
   1878 
   1879     m_atBreak = false;
   1880 
   1881     size_t wordBoundaryContextStart = length;
   1882     if (wordBoundaryContextStart) {
   1883         U16_BACK_1(characters, 0, wordBoundaryContextStart);
   1884         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
   1885     }
   1886 
   1887     size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
   1888     m_buffer.prepend(characters + length - usableLength, usableLength);
   1889     m_prefixLength += usableLength;
   1890 
   1891     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
   1892         m_needsMoreContext = false;
   1893 }
   1894 
   1895 inline bool SearchBuffer::atBreak() const
   1896 {
   1897     return m_atBreak;
   1898 }
   1899 
   1900 inline void SearchBuffer::reachedBreak()
   1901 {
   1902     m_atBreak = true;
   1903 }
   1904 
   1905 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
   1906 {
   1907     // This function implements the kana workaround. If usearch treats
   1908     // it as a match, but we do not want to, then it's a "bad match".
   1909     if (!m_targetRequiresKanaWorkaround)
   1910         return false;
   1911 
   1912     // Normalize into a match buffer. We reuse a single buffer rather than
   1913     // creating a new one each time.
   1914     normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);
   1915 
   1916     return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
   1917 }
   1918 
   1919 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
   1920 {
   1921     ASSERT(m_options & AtWordStarts);
   1922 
   1923     if (!start)
   1924         return true;
   1925 
   1926     int size = m_buffer.size();
   1927     int offset = start;
   1928     UChar32 firstCharacter;
   1929     U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
   1930 
   1931     if (m_options & TreatMedialCapitalAsWordStart) {
   1932         UChar32 previousCharacter;
   1933         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
   1934 
   1935         if (isSeparator(firstCharacter)) {
   1936             // The start of a separator run is a word start (".org" in "webkit.org").
   1937             if (!isSeparator(previousCharacter))
   1938                 return true;
   1939         } else if (isASCIIUpper(firstCharacter)) {
   1940             // The start of an uppercase run is a word start ("Kit" in "WebKit").
   1941             if (!isASCIIUpper(previousCharacter))
   1942                 return true;
   1943             // The last character of an uppercase run followed by a non-separator, non-digit
   1944             // is a word start ("Request" in "XMLHTTPRequest").
   1945             offset = start;
   1946             U16_FWD_1(m_buffer.data(), offset, size);
   1947             UChar32 nextCharacter = 0;
   1948             if (offset < size)
   1949                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
   1950             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
   1951                 return true;
   1952         } else if (isASCIIDigit(firstCharacter)) {
   1953             // The start of a digit run is a word start ("2" in "WebKit2").
   1954             if (!isASCIIDigit(previousCharacter))
   1955                 return true;
   1956         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
   1957             // The start of a non-separator, non-uppercase, non-digit run is a word start,
   1958             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
   1959             return true;
   1960         }
   1961     }
   1962 
   1963     // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
   1964     // a word, so treat the position before any CJK character as a word start.
   1965     if (Character::isCJKIdeographOrSymbol(firstCharacter))
   1966         return true;
   1967 
   1968     size_t wordBreakSearchStart = start + length;
   1969     while (wordBreakSearchStart > start)
   1970         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
   1971     return wordBreakSearchStart == start;
   1972 }
   1973 
   1974 inline size_t SearchBuffer::search(size_t& start)
   1975 {
   1976     size_t size = m_buffer.size();
   1977     if (m_atBreak) {
   1978         if (!size)
   1979             return 0;
   1980     } else {
   1981         if (size != m_buffer.capacity())
   1982             return 0;
   1983     }
   1984 
   1985     UStringSearch* searcher = WebCore::searcher();
   1986 
   1987     UErrorCode status = U_ZERO_ERROR;
   1988     usearch_setText(searcher, m_buffer.data(), size, &status);
   1989     ASSERT(status == U_ZERO_ERROR);
   1990 
   1991     usearch_setOffset(searcher, m_prefixLength, &status);
   1992     ASSERT(status == U_ZERO_ERROR);
   1993 
   1994     int matchStart = usearch_next(searcher, &status);
   1995     ASSERT(status == U_ZERO_ERROR);
   1996 
   1997 nextMatch:
   1998     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
   1999         ASSERT(matchStart == USEARCH_DONE);
   2000         return 0;
   2001     }
   2002 
   2003     // Matches that start in the overlap area are only tentative.
   2004     // The same match may appear later, matching more characters,
   2005     // possibly including a combining character that's not yet in the buffer.
   2006     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
   2007         size_t overlap = m_overlap;
   2008         if (m_options & AtWordStarts) {
   2009             // Ensure that there is sufficient context before matchStart the next time around for
   2010             // determining if it is at a word boundary.
   2011             int wordBoundaryContextStart = matchStart;
   2012             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
   2013             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
   2014             overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
   2015         }
   2016         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
   2017         m_prefixLength -= std::min(m_prefixLength, size - overlap);
   2018         m_buffer.shrink(overlap);
   2019         return 0;
   2020     }
   2021 
   2022     size_t matchedLength = usearch_getMatchedLength(searcher);
   2023     ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
   2024 
   2025     // If this match is "bad", move on to the next match.
   2026     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
   2027         matchStart = usearch_next(searcher, &status);
   2028         ASSERT(status == U_ZERO_ERROR);
   2029         goto nextMatch;
   2030     }
   2031 
   2032     size_t newSize = size - (matchStart + 1);
   2033     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
   2034     m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
   2035     m_buffer.shrink(newSize);
   2036 
   2037     start = size - matchStart;
   2038     return matchedLength;
   2039 }
   2040 
   2041 // --------
   2042 
   2043 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
   2044 {
   2045     int length = 0;
   2046     TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
   2047     if (forSelectionPreservation)
   2048         behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
   2049     for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance())
   2050         length += it.length();
   2051 
   2052     return length;
   2053 }
   2054 
   2055 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
   2056 {
   2057     CharacterIterator entireRangeIterator(entireRange);
   2058     Position start;
   2059     Position end;
   2060     calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
   2061     return Range::create(entireRange->ownerDocument(), start, end);
   2062 }
   2063 
   2064 // --------
   2065 
   2066 String plainText(const Range* r, TextIteratorBehaviorFlags behavior)
   2067 {
   2068     // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
   2069     static const unsigned initialCapacity = 1 << 15;
   2070 
   2071     unsigned bufferLength = 0;
   2072     StringBuilder builder;
   2073     builder.reserveCapacity(initialCapacity);
   2074 
   2075     for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
   2076         it.appendTextToStringBuilder(builder);
   2077         bufferLength += it.length();
   2078     }
   2079 
   2080     if (!bufferLength)
   2081         return emptyString();
   2082 
   2083     return builder.toString();
   2084 }
   2085 
   2086 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward)
   2087 {
   2088     RefPtrWillBeRawPtr<Range> result = range->cloneRange();
   2089     result->collapse(!forward);
   2090     return result.release();
   2091 }
   2092 
   2093 // Check if there's any unpaird surrogate code point.
   2094 // Non-character code points are not checked.
   2095 static bool isValidUTF16(const String& s)
   2096 {
   2097     if (s.is8Bit())
   2098         return true;
   2099     const UChar* ustr = s.characters16();
   2100     size_t length = s.length();
   2101     size_t position = 0;
   2102     while (position < length) {
   2103         UChar32 character;
   2104         U16_NEXT(ustr, position, length, character);
   2105         if (U_IS_SURROGATE(character))
   2106             return false;
   2107     }
   2108     return true;
   2109 }
   2110 
   2111 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
   2112 {
   2113     matchStart = 0;
   2114     size_t matchLength = 0;
   2115 
   2116     if (!isValidUTF16(target))
   2117         return 0;
   2118 
   2119     SearchBuffer buffer(target, options);
   2120 
   2121     if (buffer.needsMoreContext()) {
   2122         RefPtrWillBeRawPtr<Range> startRange = it.range();
   2123         RefPtrWillBeRawPtr<Range> beforeStartRange = startRange->ownerDocument().createRange();
   2124         beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION);
   2125         for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
   2126             Vector<UChar, 1024> characters;
   2127             backwardsIterator.prependTextTo(characters);
   2128             buffer.prependContext(characters.data(), characters.size());
   2129             if (!buffer.needsMoreContext())
   2130                 break;
   2131         }
   2132     }
   2133 
   2134     while (!it.atEnd()) {
   2135         it.appendTextTo(buffer);
   2136         it.advance(buffer.numberOfCharactersJustAppended());
   2137 tryAgain:
   2138         size_t matchStartOffset;
   2139         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
   2140             // Note that we found a match, and where we found it.
   2141             size_t lastCharacterInBufferOffset = it.characterOffset();
   2142             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
   2143             matchStart = lastCharacterInBufferOffset - matchStartOffset;
   2144             matchLength = newMatchLength;
   2145             // If searching forward, stop on the first match.
   2146             // If searching backward, don't stop, so we end up with the last match.
   2147             if (!(options & Backwards))
   2148                 break;
   2149             goto tryAgain;
   2150         }
   2151         if (it.atBreak() && !buffer.atBreak()) {
   2152             buffer.reachedBreak();
   2153             goto tryAgain;
   2154         }
   2155     }
   2156 
   2157     return matchLength;
   2158 }
   2159 
   2160 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots;
   2161 
   2162 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
   2163 {
   2164     // CharacterIterator requires renderers to be up-to-date
   2165     range->ownerDocument().updateLayout();
   2166 
   2167     // First, find the text.
   2168     size_t matchStart;
   2169     size_t matchLength;
   2170     {
   2171         CharacterIterator findIterator(range, iteratorFlagsForFindPlainText);
   2172         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
   2173         if (!matchLength)
   2174             return collapsedToBoundary(range, !(options & Backwards));
   2175     }
   2176 
   2177     // Then, find the document position of the start and the end of the text.
   2178     CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText);
   2179     Position resultStart;
   2180     Position resultEnd;
   2181     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
   2182     return Range::create(range->ownerDocument(), resultStart, resultEnd);
   2183 }
   2184 
   2185 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd)
   2186 {
   2187     resultStart.clear();
   2188     resultEnd.clear();
   2189     // CharacterIterator requires renderers to be up-to-date.
   2190     if (!inputStart.inDocument())
   2191         return;
   2192     ASSERT(inputStart.document() == inputEnd.document());
   2193     inputStart.document()->updateLayout();
   2194 
   2195     // FIXME: Reduce the code duplication with above (but how?).
   2196     size_t matchStart;
   2197     size_t matchLength;
   2198     {
   2199         CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
   2200         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
   2201         if (!matchLength) {
   2202             const Position& collapseTo = options & Backwards ? inputStart : inputEnd;
   2203             resultStart = collapseTo;
   2204             resultEnd = collapseTo;
   2205             return;
   2206         }
   2207     }
   2208 
   2209     CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
   2210     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
   2211 }
   2212 
   2213 }
   2214