Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
      3  * Copyright (C) 2005 Alexey Proskuryakov.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/editing/TextIterator.h"
     29 
     30 #include "HTMLNames.h"
     31 #include "bindings/v8/ExceptionStatePlaceholder.h"
     32 #include "core/dom/Document.h"
     33 #include "core/dom/NodeTraversal.h"
     34 #include "core/dom/Range.h"
     35 #include "core/dom/shadow/ShadowRoot.h"
     36 #include "core/editing/VisiblePosition.h"
     37 #include "core/editing/VisibleUnits.h"
     38 #include "core/editing/htmlediting.h"
     39 #include "core/html/HTMLElement.h"
     40 #include "core/html/HTMLTextFormControlElement.h"
     41 #include "core/platform/graphics/Font.h"
     42 #include "core/platform/text/TextBoundaries.h"
     43 #include "core/platform/text/TextBreakIteratorInternalICU.h"
     44 #include "core/rendering/InlineTextBox.h"
     45 #include "core/rendering/RenderImage.h"
     46 #include "core/rendering/RenderTableCell.h"
     47 #include "core/rendering/RenderTableRow.h"
     48 #include "core/rendering/RenderTextControl.h"
     49 #include "core/rendering/RenderTextFragment.h"
     50 #include "wtf/text/CString.h"
     51 #include "wtf/text/StringBuilder.h"
     52 #include "wtf/unicode/CharacterNames.h"
     53 #include <unicode/usearch.h>
     54 
     55 using namespace WTF::Unicode;
     56 using namespace std;
     57 
     58 namespace WebCore {
     59 
     60 using namespace HTMLNames;
     61 
     62 // Buffer that knows how to compare with a search target.
     63 // Keeps enough of the previous text to be able to search in the future, but no more.
     64 // Non-breaking spaces are always equal to normal spaces.
     65 // Case folding is also done if the CaseInsensitive option is specified.
     66 // Matches are further filtered if the AtWordStarts option is specified, although some
     67 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
     68 class SearchBuffer {
     69     WTF_MAKE_NONCOPYABLE(SearchBuffer);
     70 public:
     71     SearchBuffer(const String& target, FindOptions);
     72     ~SearchBuffer();
     73 
     74     // Returns number of characters appended; guaranteed to be in the range [1, length].
     75     template<typename CharType>
     76     void append(const CharType*, size_t length);
     77     size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
     78 
     79     bool needsMoreContext() const;
     80     void prependContext(const UChar*, size_t length);
     81     void reachedBreak();
     82 
     83     // Result is the size in characters of what was found.
     84     // And <startOffset> is the number of characters back to the start of what was found.
     85     size_t search(size_t& startOffset);
     86     bool atBreak() const;
     87 
     88 private:
     89     bool isBadMatch(const UChar*, size_t length) const;
     90     bool isWordStartMatch(size_t start, size_t length) const;
     91 
     92     Vector<UChar> m_target;
     93     FindOptions m_options;
     94 
     95     Vector<UChar> m_buffer;
     96     size_t m_overlap;
     97     size_t m_prefixLength;
     98     size_t m_numberOfCharactersJustAppended;
     99     bool m_atBreak;
    100     bool m_needsMoreContext;
    101 
    102     bool m_targetRequiresKanaWorkaround;
    103     Vector<UChar> m_normalizedTarget;
    104     mutable Vector<UChar> m_normalizedMatch;
    105 };
    106 
    107 // --------
    108 
    109 static const unsigned bitsInWord = sizeof(unsigned) * 8;
    110 static const unsigned bitInWordMask = bitsInWord - 1;
    111 
    112 BitStack::BitStack()
    113     : m_size(0)
    114 {
    115 }
    116 
    117 BitStack::~BitStack()
    118 {
    119 }
    120 
    121 void BitStack::push(bool bit)
    122 {
    123     unsigned index = m_size / bitsInWord;
    124     unsigned shift = m_size & bitInWordMask;
    125     if (!shift && index == m_words.size()) {
    126         m_words.grow(index + 1);
    127         m_words[index] = 0;
    128     }
    129     unsigned& word = m_words[index];
    130     unsigned mask = 1U << shift;
    131     if (bit)
    132         word |= mask;
    133     else
    134         word &= ~mask;
    135     ++m_size;
    136 }
    137 
    138 void BitStack::pop()
    139 {
    140     if (m_size)
    141         --m_size;
    142 }
    143 
    144 bool BitStack::top() const
    145 {
    146     if (!m_size)
    147         return false;
    148     unsigned shift = (m_size - 1) & bitInWordMask;
    149     return m_words.last() & (1U << shift);
    150 }
    151 
    152 unsigned BitStack::size() const
    153 {
    154     return m_size;
    155 }
    156 
    157 // --------
    158 
    159 #if !ASSERT_DISABLED
    160 
    161 static unsigned depthCrossingShadowBoundaries(Node* node)
    162 {
    163     unsigned depth = 0;
    164     for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
    165         ++depth;
    166     return depth;
    167 }
    168 
    169 #endif
    170 
    171 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
    172 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
    173 {
    174     if (!rangeEndContainer)
    175         return 0;
    176     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
    177         if (Node* next = rangeEndContainer->childNode(rangeEndOffset))
    178             return next;
    179     }
    180     for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
    181         if (Node* next = node->nextSibling())
    182             return next;
    183     }
    184     return 0;
    185 }
    186 
    187 // --------
    188 
    189 static inline bool fullyClipsContents(Node* node)
    190 {
    191     RenderObject* renderer = node->renderer();
    192     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
    193         return false;
    194     return toRenderBox(renderer)->size().isEmpty();
    195 }
    196 
    197 static inline bool ignoresContainerClip(Node* node)
    198 {
    199     RenderObject* renderer = node->renderer();
    200     if (!renderer || renderer->isText())
    201         return false;
    202     return renderer->style()->hasOutOfFlowPosition();
    203 }
    204 
    205 static void pushFullyClippedState(BitStack& stack, Node* node)
    206 {
    207     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
    208 
    209     // Push true if this node full clips its contents, or if a parent already has fully
    210     // clipped and this is not a node that ignores its container's clip.
    211     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
    212 }
    213 
    214 static void setUpFullyClippedStack(BitStack& stack, Node* node)
    215 {
    216     // Put the nodes in a vector so we can iterate in reverse order.
    217     Vector<Node*, 100> ancestry;
    218     for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
    219         ancestry.append(parent);
    220 
    221     // Call pushFullyClippedState on each node starting with the earliest ancestor.
    222     size_t size = ancestry.size();
    223     for (size_t i = 0; i < size; ++i)
    224         pushFullyClippedState(stack, ancestry[size - i - 1]);
    225     pushFullyClippedState(stack, node);
    226 
    227     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
    228 }
    229 
    230 // --------
    231 
    232 TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior)
    233     : m_startContainer(0)
    234     , m_startOffset(0)
    235     , m_endContainer(0)
    236     , m_endOffset(0)
    237     , m_positionNode(0)
    238     , m_textLength(0)
    239     , m_remainingTextBox(0)
    240     , m_firstLetterText(0)
    241     , m_sortedTextBoxesPosition(0)
    242     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
    243     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
    244     , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding)
    245     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
    246     , m_handledFirstLetter(false)
    247     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
    248     , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters)
    249     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
    250     , m_shouldStop(false)
    251     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
    252 {
    253     if (!r)
    254         return;
    255 
    256     // get and validate the range endpoints
    257     Node* startContainer = r->startContainer();
    258     if (!startContainer)
    259         return;
    260     int startOffset = r->startOffset();
    261     Node* endContainer = r->endContainer();
    262     int endOffset = r->endOffset();
    263 
    264     // Callers should be handing us well-formed ranges. If we discover that this isn't
    265     // the case, we could consider changing this assertion to an early return.
    266     ASSERT(r->boundaryPointsValid());
    267 
    268     // remember range - this does not change
    269     m_startContainer = startContainer;
    270     m_startOffset = startOffset;
    271     m_endContainer = endContainer;
    272     m_endOffset = endOffset;
    273 
    274     // set up the current node for processing
    275     m_node = r->firstNode();
    276     if (!m_node)
    277         return;
    278     setUpFullyClippedStack(m_fullyClippedStack, m_node);
    279     m_offset = m_node == m_startContainer ? m_startOffset : 0;
    280     m_handledNode = false;
    281     m_handledChildren = false;
    282 
    283     // calculate first out of bounds node
    284     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
    285 
    286     // initialize node processing state
    287     m_needsAnotherNewline = false;
    288     m_textBox = 0;
    289 
    290     // initialize record of previous node processing
    291     m_hasEmitted = false;
    292     m_lastTextNode = 0;
    293     m_lastTextNodeEndedWithCollapsedSpace = false;
    294     m_lastCharacter = 0;
    295 
    296 #ifndef NDEBUG
    297     // need this just because of the assert in advance()
    298     m_positionNode = m_node;
    299 #endif
    300 
    301     // identify the first run
    302     advance();
    303 }
    304 
    305 TextIterator::~TextIterator()
    306 {
    307 }
    308 
    309 void TextIterator::advance()
    310 {
    311     if (m_shouldStop)
    312         return;
    313 
    314     // reset the run information
    315     m_positionNode = 0;
    316     m_textLength = 0;
    317 
    318     // handle remembered node that needed a newline after the text node's newline
    319     if (m_needsAnotherNewline) {
    320         // Emit the extra newline, and position it *inside* m_node, after m_node's
    321         // contents, in case it's a block, in the same way that we position the first
    322         // newline.  The range for the emitted newline should start where the line
    323         // break begins.
    324         // FIXME: It would be cleaner if we emitted two newlines during the last
    325         // iteration, instead of using m_needsAnotherNewline.
    326         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
    327         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    328         m_needsAnotherNewline = false;
    329         return;
    330     }
    331 
    332     if (!m_textBox && m_remainingTextBox) {
    333         m_textBox = m_remainingTextBox;
    334         m_remainingTextBox = 0;
    335         m_firstLetterText = 0;
    336         m_offset = 0;
    337     }
    338     // handle remembered text box
    339     if (m_textBox) {
    340         handleTextBox();
    341         if (m_positionNode)
    342             return;
    343     }
    344 
    345     while (m_node && m_node != m_pastEndNode) {
    346         if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
    347             m_shouldStop = true;
    348 
    349         // if the range ends at offset 0 of an element, represent the
    350         // position, but not the content, of that element e.g. if the
    351         // node is a blockflow element, emit a newline that
    352         // precedes the element
    353         if (m_node == m_endContainer && m_endOffset == 0) {
    354             representNodeOffsetZero();
    355             m_node = 0;
    356             return;
    357         }
    358 
    359         RenderObject* renderer = m_node->renderer();
    360         if (!renderer) {
    361             m_handledNode = true;
    362             m_handledChildren = true;
    363         } else {
    364             // handle current node according to its type
    365             if (!m_handledNode) {
    366                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE?
    367                     m_handledNode = handleTextNode();
    368                 else if (renderer && (renderer->isImage() || renderer->isWidget() ||
    369                          (renderer->node() && renderer->node()->isElementNode() &&
    370                           (toElement(renderer->node())->isFormControlElement()
    371                           || toElement(renderer->node())->hasTagName(legendTag)
    372                           || toElement(renderer->node())->hasTagName(meterTag)
    373                           || toElement(renderer->node())->hasTagName(progressTag)))))
    374                     m_handledNode = handleReplacedElement();
    375                 else
    376                     m_handledNode = handleNonTextNode();
    377                 if (m_positionNode)
    378                     return;
    379             }
    380         }
    381 
    382         // find a new current node to handle in depth-first manner,
    383         // calling exitNode() as we come back thru a parent node
    384         Node* next = m_handledChildren ? 0 : m_node->firstChild();
    385         m_offset = 0;
    386         if (!next) {
    387             next = m_node->nextSibling();
    388             if (!next) {
    389                 bool pastEnd = NodeTraversal::next(m_node) == m_pastEndNode;
    390                 Node* parentNode = m_node->parentOrShadowHostNode();
    391                 while (!next && parentNode) {
    392                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
    393                         return;
    394                     bool haveRenderer = m_node->renderer();
    395                     m_node = parentNode;
    396                     m_fullyClippedStack.pop();
    397                     parentNode = m_node->parentOrShadowHostNode();
    398                     if (haveRenderer)
    399                         exitNode();
    400                     if (m_positionNode) {
    401                         m_handledNode = true;
    402                         m_handledChildren = true;
    403                         return;
    404                     }
    405                     next = m_node->nextSibling();
    406                 }
    407             }
    408             m_fullyClippedStack.pop();
    409         }
    410 
    411         // set the new current node
    412         m_node = next;
    413         if (m_node)
    414             pushFullyClippedState(m_fullyClippedStack, m_node);
    415         m_handledNode = false;
    416         m_handledChildren = false;
    417         m_handledFirstLetter = false;
    418         m_firstLetterText = 0;
    419 
    420         // how would this ever be?
    421         if (m_positionNode)
    422             return;
    423     }
    424 }
    425 
    426 UChar TextIterator::characterAt(unsigned index) const
    427 {
    428     ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
    429     if (!(index < static_cast<unsigned>(length())))
    430         return 0;
    431 
    432     if (m_singleCharacterBuffer) {
    433         ASSERT(!index);
    434         ASSERT(length() == 1);
    435         return m_singleCharacterBuffer;
    436     }
    437 
    438     return string()[startOffset() + index];
    439 }
    440 
    441 String TextIterator::substring(unsigned position, unsigned length) const
    442 {
    443     ASSERT_WITH_SECURITY_IMPLICATION(position < static_cast<unsigned>(this->length()));
    444     ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
    445     if (!length)
    446         return emptyString();
    447     if (m_singleCharacterBuffer) {
    448         ASSERT(!position);
    449         ASSERT(length == 1);
    450         return String(&m_singleCharacterBuffer, 1);
    451     }
    452     return string().substring(startOffset() + position, length);
    453 }
    454 
    455 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
    456 {
    457     unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
    458     if (!lengthToAppend)
    459         return;
    460     if (m_singleCharacterBuffer) {
    461         ASSERT(!position);
    462         builder.append(m_singleCharacterBuffer);
    463     } else {
    464         builder.append(string(), startOffset() + position, lengthToAppend);
    465     }
    466 }
    467 
    468 bool TextIterator::handleTextNode()
    469 {
    470     if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
    471         return false;
    472 
    473     RenderText* renderer = toRenderText(m_node->renderer());
    474 
    475     m_lastTextNode = m_node;
    476     String str = renderer->text();
    477 
    478     // handle pre-formatted text
    479     if (!renderer->style()->collapseWhiteSpace()) {
    480         int runStart = m_offset;
    481         if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
    482             emitCharacter(' ', m_node, 0, runStart, runStart);
    483             return false;
    484         }
    485         if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
    486             handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
    487             if (m_firstLetterText) {
    488                 String firstLetter = m_firstLetterText->text();
    489                 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length());
    490                 m_firstLetterText = 0;
    491                 m_textBox = 0;
    492                 return false;
    493             }
    494         }
    495         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    496             return false;
    497         int strLength = str.length();
    498         int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
    499         int runEnd = min(strLength, end);
    500 
    501         if (runStart >= runEnd)
    502             return true;
    503 
    504         emitText(m_node, runStart, runEnd);
    505         return true;
    506     }
    507 
    508     if (renderer->firstTextBox())
    509         m_textBox = renderer->firstTextBox();
    510 
    511     bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
    512     if (shouldHandleFirstLetter)
    513         handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
    514 
    515     if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
    516         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    517             return false;
    518         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
    519         return true;
    520     }
    521 
    522     if (m_firstLetterText)
    523         renderer = m_firstLetterText;
    524 
    525     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    526     if (renderer->containsReversedText()) {
    527         m_sortedTextBoxes.clear();
    528         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
    529             m_sortedTextBoxes.append(textBox);
    530         }
    531         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
    532         m_sortedTextBoxesPosition = 0;
    533         m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
    534     }
    535 
    536     handleTextBox();
    537     return true;
    538 }
    539 
    540 void TextIterator::handleTextBox()
    541 {
    542     RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer());
    543     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
    544         m_textBox = 0;
    545         return;
    546     }
    547     String str = renderer->text();
    548     unsigned start = m_offset;
    549     unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
    550     while (m_textBox) {
    551         unsigned textBoxStart = m_textBox->start();
    552         unsigned runStart = max(textBoxStart, start);
    553 
    554         // Check for collapsed space at the start of this run.
    555         InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
    556         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
    557             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
    558         if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
    559             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
    560                 unsigned spaceRunStart = runStart - 1;
    561                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
    562                     --spaceRunStart;
    563                 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
    564             } else
    565                 emitCharacter(' ', m_node, 0, runStart, runStart);
    566             return;
    567         }
    568         unsigned textBoxEnd = textBoxStart + m_textBox->len();
    569         unsigned runEnd = min(textBoxEnd, end);
    570 
    571         // Determine what the next text box will be, but don't advance yet
    572         InlineTextBox* nextTextBox = 0;
    573         if (renderer->containsReversedText()) {
    574             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
    575                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
    576         } else
    577             nextTextBox = m_textBox->nextTextBox();
    578         ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
    579 
    580         if (runStart < runEnd) {
    581             // Handle either a single newline character (which becomes a space),
    582             // or a run of characters that does not include a newline.
    583             // This effectively translates newlines to spaces without copying the text.
    584             if (str[runStart] == '\n') {
    585                 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
    586                 m_offset = runStart + 1;
    587             } else {
    588                 size_t subrunEnd = str.find('\n', runStart);
    589                 if (subrunEnd == notFound || subrunEnd > runEnd)
    590                     subrunEnd = runEnd;
    591 
    592                 m_offset = subrunEnd;
    593                 emitText(m_node, renderer, runStart, subrunEnd);
    594             }
    595 
    596             // If we are doing a subrun that doesn't go to the end of the text box,
    597             // come back again to finish handling this text box; don't advance to the next one.
    598             if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
    599                 return;
    600 
    601             // Advance and return
    602             unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
    603             if (nextRunStart > runEnd)
    604                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
    605             m_textBox = nextTextBox;
    606             if (renderer->containsReversedText())
    607                 ++m_sortedTextBoxesPosition;
    608             return;
    609         }
    610         // Advance and continue
    611         m_textBox = nextTextBox;
    612         if (renderer->containsReversedText())
    613             ++m_sortedTextBoxesPosition;
    614     }
    615     if (!m_textBox && m_remainingTextBox) {
    616         m_textBox = m_remainingTextBox;
    617         m_remainingTextBox = 0;
    618         m_firstLetterText = 0;
    619         m_offset = 0;
    620         handleTextBox();
    621     }
    622 }
    623 
    624 static inline RenderText* firstRenderTextInFirstLetter(RenderObject* firstLetter)
    625 {
    626     if (!firstLetter)
    627         return 0;
    628 
    629     // FIXME: Should this check descendent objects?
    630     for (RenderObject* current = firstLetter->firstChild(); current; current = current->nextSibling()) {
    631         if (current->isText())
    632             return toRenderText(current);
    633     }
    634     return 0;
    635 }
    636 
    637 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
    638 {
    639     if (renderer->firstLetter()) {
    640         RenderObject* r = renderer->firstLetter();
    641         if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    642             return;
    643         if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
    644             m_handledFirstLetter = true;
    645             m_remainingTextBox = m_textBox;
    646             m_textBox = firstLetter->firstTextBox();
    647             m_sortedTextBoxes.clear();
    648             m_firstLetterText = firstLetter;
    649         }
    650     }
    651     m_handledFirstLetter = true;
    652 }
    653 
    654 bool TextIterator::handleReplacedElement()
    655 {
    656     if (m_fullyClippedStack.top())
    657         return false;
    658 
    659     RenderObject* renderer = m_node->renderer();
    660     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    661         return false;
    662 
    663     if (m_lastTextNodeEndedWithCollapsedSpace) {
    664         emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
    665         return false;
    666     }
    667 
    668     if (m_entersTextControls && renderer->isTextControl()) {
    669         if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->textFormControlElement()->innerTextElement()) {
    670             m_node = innerTextElement->containingShadowRoot();
    671             pushFullyClippedState(m_fullyClippedStack, m_node);
    672             m_offset = 0;
    673             return false;
    674         }
    675     }
    676 
    677     m_hasEmitted = true;
    678 
    679     if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) {
    680         emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
    681         return true;
    682     }
    683 
    684     if (m_emitsCharactersBetweenAllVisiblePositions) {
    685         // We want replaced elements to behave like punctuation for boundary
    686         // finding, and to simply take up space for the selection preservation
    687         // code in moveParagraphs, so we use a comma.
    688         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
    689         return true;
    690     }
    691 
    692     m_positionNode = m_node->parentNode();
    693     m_positionOffsetBaseNode = m_node;
    694     m_positionStartOffset = 0;
    695     m_positionEndOffset = 1;
    696     m_singleCharacterBuffer = 0;
    697 
    698     if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
    699         m_text = toRenderImage(renderer)->altText();
    700         if (!m_text.isEmpty()) {
    701             m_textLength = m_text.length();
    702             m_lastCharacter = m_text[m_textLength - 1];
    703             return true;
    704         }
    705     }
    706 
    707     m_textLength = 0;
    708     m_lastCharacter = 0;
    709 
    710     return true;
    711 }
    712 
    713 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
    714 {
    715     if (renderer->style()->visibility() == VISIBLE)
    716         return true;
    717     if (renderer->isTextFragment()) {
    718         RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer);
    719         if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
    720             return true;
    721     }
    722     return false;
    723 }
    724 
    725 static bool shouldEmitTabBeforeNode(Node* node)
    726 {
    727     RenderObject* r = node->renderer();
    728 
    729     // Table cells are delimited by tabs.
    730     if (!r || !isTableCell(node))
    731         return false;
    732 
    733     // Want a tab before every cell other than the first one
    734     RenderTableCell* rc = toRenderTableCell(r);
    735     RenderTable* t = rc->table();
    736     return t && (t->cellBefore(rc) || t->cellAbove(rc));
    737 }
    738 
    739 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
    740 {
    741     RenderObject* renderer = node->renderer();
    742 
    743     if (renderer ? !renderer->isBR() : !node->hasTagName(brTag))
    744         return false;
    745     return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->hasTagName(inputTag));
    746 }
    747 
    748 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
    749 {
    750     // Block flow (versus inline flow) is represented by having
    751     // a newline both before and after the element.
    752     RenderObject* r = node->renderer();
    753     if (!r) {
    754         return (node->hasTagName(blockquoteTag)
    755                 || node->hasTagName(ddTag)
    756                 || node->hasTagName(divTag)
    757                 || node->hasTagName(dlTag)
    758                 || node->hasTagName(dtTag)
    759                 || node->hasTagName(h1Tag)
    760                 || node->hasTagName(h2Tag)
    761                 || node->hasTagName(h3Tag)
    762                 || node->hasTagName(h4Tag)
    763                 || node->hasTagName(h5Tag)
    764                 || node->hasTagName(h6Tag)
    765                 || node->hasTagName(hrTag)
    766                 || node->hasTagName(liTag)
    767                 || node->hasTagName(listingTag)
    768                 || node->hasTagName(olTag)
    769                 || node->hasTagName(pTag)
    770                 || node->hasTagName(preTag)
    771                 || node->hasTagName(trTag)
    772                 || node->hasTagName(ulTag));
    773     }
    774 
    775     // Need to make an exception for table cells, because they are blocks, but we
    776     // want them tab-delimited rather than having newlines before and after.
    777     if (isTableCell(node))
    778         return false;
    779 
    780     // Need to make an exception for table row elements, because they are neither
    781     // "inline" or "RenderBlock", but we want newlines for them.
    782     if (r->isTableRow()) {
    783         RenderTable* t = toRenderTableRow(r)->table();
    784         if (t && !t->isInline())
    785             return true;
    786     }
    787 
    788     return !r->isInline() && r->isRenderBlock()
    789         && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
    790 }
    791 
    792 static bool shouldEmitNewlineAfterNode(Node* node)
    793 {
    794     // FIXME: It should be better but slower to create a VisiblePosition here.
    795     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
    796         return false;
    797     // Check if this is the very last renderer in the document.
    798     // If so, then we should not emit a newline.
    799     while ((node = NodeTraversal::nextSkippingChildren(node)))
    800         if (node->renderer())
    801             return true;
    802     return false;
    803 }
    804 
    805 static bool shouldEmitNewlineBeforeNode(Node* node)
    806 {
    807     return shouldEmitNewlinesBeforeAndAfterNode(node);
    808 }
    809 
    810 static bool shouldEmitExtraNewlineForNode(Node* node)
    811 {
    812     // When there is a significant collapsed bottom margin, emit an extra
    813     // newline for a more realistic result.  We end up getting the right
    814     // result even without margin collapsing. For example: <div><p>text</p></div>
    815     // will work right even if both the <div> and the <p> have bottom margins.
    816     RenderObject* r = node->renderer();
    817     if (!r || !r->isBox())
    818         return false;
    819 
    820     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
    821     // not to do this at all
    822     if (node->hasTagName(h1Tag)
    823         || node->hasTagName(h2Tag)
    824         || node->hasTagName(h3Tag)
    825         || node->hasTagName(h4Tag)
    826         || node->hasTagName(h5Tag)
    827         || node->hasTagName(h6Tag)
    828         || node->hasTagName(pTag)) {
    829         RenderStyle* style = r->style();
    830         if (style) {
    831             int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
    832             int fontSize = style->fontDescription().computedPixelSize();
    833             if (bottomMargin * 2 >= fontSize)
    834                 return true;
    835         }
    836     }
    837 
    838     return false;
    839 }
    840 
    841 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
    842 {
    843     const String& text = renderer->text();
    844     int length = text.length();
    845     for (int i = textEnd; i < length; ++i) {
    846         if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
    847             return i - textEnd;
    848     }
    849 
    850     return length - textEnd;
    851 }
    852 
    853 static int maxOffsetIncludingCollapsedSpaces(Node* node)
    854 {
    855     int offset = caretMaxOffset(node);
    856 
    857     if (node->renderer() && node->renderer()->isText())
    858         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
    859 
    860     return offset;
    861 }
    862 
    863 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
    864 bool TextIterator::shouldRepresentNodeOffsetZero()
    865 {
    866     if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
    867         return true;
    868 
    869     // Leave element positioned flush with start of a paragraph
    870     // (e.g. do not insert tab before a table cell at the start of a paragraph)
    871     if (m_lastCharacter == '\n')
    872         return false;
    873 
    874     // Otherwise, show the position if we have emitted any characters
    875     if (m_hasEmitted)
    876         return true;
    877 
    878     // We've not emitted anything yet. Generally, there is no need for any positioning then.
    879     // The only exception is when the element is visually not in the same line as
    880     // the start of the range (e.g. the range starts at the end of the previous paragraph).
    881     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
    882     // make quicker checks to possibly avoid that. Another check that we could make is
    883     // is whether the inline vs block flow changed since the previous visible element.
    884     // I think we're already in a special enough case that that won't be needed, tho.
    885 
    886     // No character needed if this is the first node in the range.
    887     if (m_node == m_startContainer)
    888         return false;
    889 
    890     // If we are outside the start container's subtree, assume we need to emit.
    891     // FIXME: m_startContainer could be an inline block
    892     if (!m_node->isDescendantOf(m_startContainer))
    893         return true;
    894 
    895     // If we started as m_startContainer offset 0 and the current node is a descendant of
    896     // the start container, we already had enough context to correctly decide whether to
    897     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
    898     // so don't second guess that now.
    899     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
    900     // immaterial since we likely would have already emitted something by now.
    901     if (m_startOffset == 0)
    902         return false;
    903 
    904     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
    905     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
    906     // we would create VisiblePositions on every call to this function without this check.
    907     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
    908         || (m_node->renderer()->isBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag)))
    909         return false;
    910 
    911     // The startPos.isNotNull() check is needed because the start could be before the body,
    912     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
    913     // The currPos.isNotNull() check is needed because positions in non-HTML content
    914     // (like SVG) do not have visible positions, and we don't want to emit for them either.
    915     VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
    916     VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
    917     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
    918 }
    919 
    920 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
    921 {
    922     return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
    923 }
    924 
    925 void TextIterator::representNodeOffsetZero()
    926 {
    927     // Emit a character to show the positioning of m_node.
    928 
    929     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
    930     // create VisiblePositions, which is expensive.  So, we perform the inexpensive checks
    931     // on m_node to see if it necessitates emitting a character first and will early return
    932     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
    933     if (shouldEmitTabBeforeNode(m_node)) {
    934         if (shouldRepresentNodeOffsetZero())
    935             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
    936     } else if (shouldEmitNewlineBeforeNode(m_node)) {
    937         if (shouldRepresentNodeOffsetZero())
    938             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
    939     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
    940         if (shouldRepresentNodeOffsetZero())
    941             emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
    942     }
    943 }
    944 
    945 bool TextIterator::handleNonTextNode()
    946 {
    947     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
    948         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
    949     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
    950         emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
    951     else
    952         representNodeOffsetZero();
    953 
    954     return true;
    955 }
    956 
    957 void TextIterator::exitNode()
    958 {
    959     // prevent emitting a newline when exiting a collapsed block at beginning of the range
    960     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
    961     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
    962     // therefore look like a blank line.
    963     if (!m_hasEmitted)
    964         return;
    965 
    966     // Emit with a position *inside* m_node, after m_node's contents, in
    967     // case it is a block, because the run should start where the
    968     // emitted character is positioned visually.
    969     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
    970     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
    971     // the logic in _web_attributedStringFromRange match.  We'll get that for free when we switch to use
    972     // TextIterator in _web_attributedStringFromRange.
    973     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
    974     if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) {
    975         // use extra newline to represent margin bottom, as needed
    976         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
    977 
    978         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
    979         // contain a VisiblePosition when doing selection preservation.
    980         if (m_lastCharacter != '\n') {
    981             // insert a newline with a position following this block's contents.
    982             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    983             // remember whether to later add a newline for the current node
    984             ASSERT(!m_needsAnotherNewline);
    985             m_needsAnotherNewline = addNewline;
    986         } else if (addNewline)
    987             // insert a newline with a position following this block's contents.
    988             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    989     }
    990 
    991     // If nothing was emitted, see if we need to emit a space.
    992     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
    993         emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
    994 }
    995 
    996 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
    997 {
    998     m_hasEmitted = true;
    999 
   1000     // remember information with which to construct the TextIterator::range()
   1001     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
   1002     m_positionNode = textNode;
   1003     m_positionOffsetBaseNode = offsetBaseNode;
   1004     m_positionStartOffset = textStartOffset;
   1005     m_positionEndOffset = textEndOffset;
   1006 
   1007     // remember information with which to construct the TextIterator::characters() and length()
   1008     m_singleCharacterBuffer = c;
   1009     ASSERT(m_singleCharacterBuffer);
   1010     m_textLength = 1;
   1011 
   1012     // remember some iteration state
   1013     m_lastTextNodeEndedWithCollapsedSpace = false;
   1014     m_lastCharacter = c;
   1015 }
   1016 
   1017 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset)
   1018 {
   1019     RenderText* renderer = toRenderText(renderObject);
   1020     m_text = m_emitsOriginalText ? renderer->originalText() : (m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text());
   1021     ASSERT(!m_text.isEmpty());
   1022     ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
   1023     ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
   1024     ASSERT(textStartOffset <= textEndOffset);
   1025 
   1026     m_positionNode = textNode;
   1027     m_positionOffsetBaseNode = 0;
   1028     m_positionStartOffset = textStartOffset;
   1029     m_positionEndOffset = textEndOffset;
   1030     m_singleCharacterBuffer = 0;
   1031     m_textLength = textEndOffset - textStartOffset;
   1032     m_lastCharacter = m_text[textEndOffset - 1];
   1033 
   1034     m_lastTextNodeEndedWithCollapsedSpace = false;
   1035     m_hasEmitted = true;
   1036 }
   1037 
   1038 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
   1039 {
   1040     emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset);
   1041 }
   1042 
   1043 PassRefPtr<Range> TextIterator::range() const
   1044 {
   1045     // use the current run information, if we have it
   1046     if (m_positionNode) {
   1047         if (m_positionOffsetBaseNode) {
   1048             int index = m_positionOffsetBaseNode->nodeIndex();
   1049             m_positionStartOffset += index;
   1050             m_positionEndOffset += index;
   1051             m_positionOffsetBaseNode = 0;
   1052         }
   1053         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1054     }
   1055 
   1056     // otherwise, return the end of the overall range we were given
   1057     if (m_endContainer)
   1058         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
   1059 
   1060     return 0;
   1061 }
   1062 
   1063 Node* TextIterator::node() const
   1064 {
   1065     RefPtr<Range> textRange = range();
   1066     if (!textRange)
   1067         return 0;
   1068 
   1069     Node* node = textRange->startContainer();
   1070     if (!node)
   1071         return 0;
   1072     if (node->offsetInCharacters())
   1073         return node;
   1074 
   1075     return node->childNode(textRange->startOffset());
   1076 }
   1077 
   1078 // --------
   1079 
   1080 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior)
   1081     : m_node(0)
   1082     , m_offset(0)
   1083     , m_handledNode(false)
   1084     , m_handledChildren(false)
   1085     , m_startNode(0)
   1086     , m_startOffset(0)
   1087     , m_endNode(0)
   1088     , m_endOffset(0)
   1089     , m_positionNode(0)
   1090     , m_positionStartOffset(0)
   1091     , m_positionEndOffset(0)
   1092     , m_textOffset(0)
   1093     , m_textLength(0)
   1094     , m_lastTextNode(0)
   1095     , m_lastCharacter(0)
   1096     , m_singleCharacterBuffer(0)
   1097     , m_havePassedStartNode(false)
   1098     , m_shouldHandleFirstLetter(false)
   1099     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
   1100     , m_shouldStop(false)
   1101     , m_emitsOriginalText(false)
   1102 {
   1103     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
   1104 
   1105     if (!r)
   1106         return;
   1107 
   1108     Node* startNode = r->startContainer();
   1109     if (!startNode)
   1110         return;
   1111     Node* endNode = r->endContainer();
   1112     int startOffset = r->startOffset();
   1113     int endOffset = r->endOffset();
   1114 
   1115     if (!startNode->offsetInCharacters()) {
   1116         if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
   1117             startNode = startNode->childNode(startOffset);
   1118             startOffset = 0;
   1119         }
   1120     }
   1121     if (!endNode->offsetInCharacters()) {
   1122         if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
   1123             endNode = endNode->childNode(endOffset - 1);
   1124             endOffset = lastOffsetInNode(endNode);
   1125         }
   1126     }
   1127 
   1128     m_node = endNode;
   1129     setUpFullyClippedStack(m_fullyClippedStack, m_node);
   1130     m_offset = endOffset;
   1131     m_handledNode = false;
   1132     m_handledChildren = endOffset == 0;
   1133 
   1134     m_startNode = startNode;
   1135     m_startOffset = startOffset;
   1136     m_endNode = endNode;
   1137     m_endOffset = endOffset;
   1138 
   1139 #ifndef NDEBUG
   1140     // Need this just because of the assert.
   1141     m_positionNode = endNode;
   1142 #endif
   1143 
   1144     m_lastTextNode = 0;
   1145     m_lastCharacter = '\n';
   1146 
   1147     m_havePassedStartNode = false;
   1148 
   1149     advance();
   1150 }
   1151 
   1152 void SimplifiedBackwardsTextIterator::advance()
   1153 {
   1154     ASSERT(m_positionNode);
   1155 
   1156     if (m_shouldStop)
   1157         return;
   1158 
   1159     if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
   1160         m_shouldStop = true;
   1161         return;
   1162     }
   1163 
   1164     m_positionNode = 0;
   1165     m_textLength = 0;
   1166 
   1167     while (m_node && !m_havePassedStartNode) {
   1168         // Don't handle node if we start iterating at [node, 0].
   1169         if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) {
   1170             RenderObject* renderer = m_node->renderer();
   1171             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
   1172                 // FIXME: What about CDATA_SECTION_NODE?
   1173                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1174                     m_handledNode = handleTextNode();
   1175             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
   1176                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1177                     m_handledNode = handleReplacedElement();
   1178             } else
   1179                 m_handledNode = handleNonTextNode();
   1180             if (m_positionNode)
   1181                 return;
   1182         }
   1183 
   1184         if (!m_handledChildren && m_node->hasChildNodes()) {
   1185             m_node = m_node->lastChild();
   1186             pushFullyClippedState(m_fullyClippedStack, m_node);
   1187         } else {
   1188             // Exit empty containers as we pass over them or containers
   1189             // where [container, 0] is where we started iterating.
   1190             if (!m_handledNode
   1191                     && canHaveChildrenForEditing(m_node)
   1192                     && m_node->parentNode()
   1193                     && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
   1194                 exitNode();
   1195                 if (m_positionNode) {
   1196                     m_handledNode = true;
   1197                     m_handledChildren = true;
   1198                     return;
   1199                 }
   1200             }
   1201 
   1202             // Exit all other containers.
   1203             while (!m_node->previousSibling()) {
   1204                 if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
   1205                     break;
   1206                 m_fullyClippedStack.pop();
   1207                 exitNode();
   1208                 if (m_positionNode) {
   1209                     m_handledNode = true;
   1210                     m_handledChildren = true;
   1211                     return;
   1212                 }
   1213             }
   1214 
   1215             m_fullyClippedStack.pop();
   1216             if (advanceRespectingRange(m_node->previousSibling()))
   1217                 pushFullyClippedState(m_fullyClippedStack, m_node);
   1218             else
   1219                 m_node = 0;
   1220         }
   1221 
   1222         // For the purpose of word boundary detection,
   1223         // we should iterate all visible text and trailing (collapsed) whitespaces.
   1224         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
   1225         m_handledNode = false;
   1226         m_handledChildren = false;
   1227 
   1228         if (m_positionNode)
   1229             return;
   1230     }
   1231 }
   1232 
   1233 bool SimplifiedBackwardsTextIterator::handleTextNode()
   1234 {
   1235     m_lastTextNode = m_node;
   1236 
   1237     int startOffset;
   1238     int offsetInNode;
   1239     RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
   1240     if (!renderer)
   1241         return true;
   1242 
   1243     String text = renderer->text();
   1244     if (!renderer->firstTextBox() && text.length() > 0)
   1245         return true;
   1246 
   1247     m_positionEndOffset = m_offset;
   1248     m_offset = startOffset + offsetInNode;
   1249     m_positionNode = m_node;
   1250     m_positionStartOffset = m_offset;
   1251 
   1252     ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
   1253     ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
   1254     ASSERT(m_positionStartOffset <= m_positionEndOffset);
   1255 
   1256     m_textLength = m_positionEndOffset - m_positionStartOffset;
   1257     m_textOffset = m_positionStartOffset - offsetInNode;
   1258     m_textContainer = text;
   1259     m_singleCharacterBuffer = 0;
   1260     RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
   1261 
   1262     m_lastCharacter = text[m_positionEndOffset - 1];
   1263 
   1264     return !m_shouldHandleFirstLetter;
   1265 }
   1266 
   1267 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
   1268 {
   1269     RenderText* renderer = toRenderText(m_node->renderer());
   1270     startOffset = (m_node == m_startNode) ? m_startOffset : 0;
   1271 
   1272     if (!renderer->isTextFragment()) {
   1273         offsetInNode = 0;
   1274         return renderer;
   1275     }
   1276 
   1277     RenderTextFragment* fragment = toRenderTextFragment(renderer);
   1278     int offsetAfterFirstLetter = fragment->start();
   1279     if (startOffset >= offsetAfterFirstLetter) {
   1280         ASSERT(!m_shouldHandleFirstLetter);
   1281         offsetInNode = offsetAfterFirstLetter;
   1282         return renderer;
   1283     }
   1284 
   1285     if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
   1286         m_shouldHandleFirstLetter = true;
   1287         offsetInNode = offsetAfterFirstLetter;
   1288         return renderer;
   1289     }
   1290 
   1291     m_shouldHandleFirstLetter = false;
   1292     offsetInNode = 0;
   1293     return firstRenderTextInFirstLetter(fragment->firstLetter());
   1294 }
   1295 
   1296 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
   1297 {
   1298     unsigned index = m_node->nodeIndex();
   1299     // We want replaced elements to behave like punctuation for boundary
   1300     // finding, and to simply take up space for the selection preservation
   1301     // code in moveParagraphs, so we use a comma.  Unconditionally emit
   1302     // here because this iterator is only used for boundary finding.
   1303     emitCharacter(',', m_node->parentNode(), index, index + 1);
   1304     return true;
   1305 }
   1306 
   1307 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
   1308 {
   1309     // We can use a linefeed in place of a tab because this simple iterator is only used to
   1310     // find boundaries, not actual content.  A linefeed breaks words, sentences, and paragraphs.
   1311     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
   1312         unsigned index = m_node->nodeIndex();
   1313         // The start of this emitted range is wrong. Ensuring correctness would require
   1314         // VisiblePositions and so would be slow. previousBoundary expects this.
   1315         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
   1316     }
   1317     return true;
   1318 }
   1319 
   1320 void SimplifiedBackwardsTextIterator::exitNode()
   1321 {
   1322     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
   1323         // The start of this emitted range is wrong. Ensuring correctness would require
   1324         // VisiblePositions and so would be slow. previousBoundary expects this.
   1325         emitCharacter('\n', m_node, 0, 0);
   1326     }
   1327 }
   1328 
   1329 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
   1330 {
   1331     m_singleCharacterBuffer = c;
   1332     m_positionNode = node;
   1333     m_positionStartOffset = startOffset;
   1334     m_positionEndOffset = endOffset;
   1335     m_textOffset = 0;
   1336     m_textLength = 1;
   1337     m_lastCharacter = c;
   1338 }
   1339 
   1340 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
   1341 {
   1342     if (!next)
   1343         return false;
   1344     m_havePassedStartNode |= m_node == m_startNode;
   1345     if (m_havePassedStartNode)
   1346         return false;
   1347     m_node = next;
   1348     return true;
   1349 }
   1350 
   1351 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
   1352 {
   1353     if (m_positionNode)
   1354         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1355 
   1356     return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
   1357 }
   1358 
   1359 // --------
   1360 
   1361 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior)
   1362     : m_offset(0)
   1363     , m_runOffset(0)
   1364     , m_atBreak(true)
   1365     , m_textIterator(r, behavior)
   1366 {
   1367     while (!atEnd() && m_textIterator.length() == 0)
   1368         m_textIterator.advance();
   1369 }
   1370 
   1371 PassRefPtr<Range> CharacterIterator::range() const
   1372 {
   1373     RefPtr<Range> r = m_textIterator.range();
   1374     if (!m_textIterator.atEnd()) {
   1375         if (m_textIterator.length() <= 1) {
   1376             ASSERT(m_runOffset == 0);
   1377         } else {
   1378             Node* n = r->startContainer();
   1379             ASSERT(n == r->endContainer());
   1380             int offset = r->startOffset() + m_runOffset;
   1381             r->setStart(n, offset, ASSERT_NO_EXCEPTION);
   1382             r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
   1383         }
   1384     }
   1385     return r.release();
   1386 }
   1387 
   1388 void CharacterIterator::advance(int count)
   1389 {
   1390     if (count <= 0) {
   1391         ASSERT(count == 0);
   1392         return;
   1393     }
   1394 
   1395     m_atBreak = false;
   1396 
   1397     // easy if there is enough left in the current m_textIterator run
   1398     int remaining = m_textIterator.length() - m_runOffset;
   1399     if (count < remaining) {
   1400         m_runOffset += count;
   1401         m_offset += count;
   1402         return;
   1403     }
   1404 
   1405     // exhaust the current m_textIterator run
   1406     count -= remaining;
   1407     m_offset += remaining;
   1408 
   1409     // move to a subsequent m_textIterator run
   1410     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1411         int runLength = m_textIterator.length();
   1412         if (runLength == 0)
   1413             m_atBreak = true;
   1414         else {
   1415             // see whether this is m_textIterator to use
   1416             if (count < runLength) {
   1417                 m_runOffset = count;
   1418                 m_offset += count;
   1419                 return;
   1420             }
   1421 
   1422             // exhaust this m_textIterator run
   1423             count -= runLength;
   1424             m_offset += runLength;
   1425         }
   1426     }
   1427 
   1428     // ran to the end of the m_textIterator... no more runs left
   1429     m_atBreak = true;
   1430     m_runOffset = 0;
   1431 }
   1432 
   1433 String CharacterIterator::string(int numChars)
   1434 {
   1435     StringBuilder result;
   1436     result.reserveCapacity(numChars);
   1437     while (numChars > 0 && !atEnd()) {
   1438         int runSize = min(numChars, length());
   1439         m_textIterator.appendTextToStringBuilder(result, m_runOffset, runSize);
   1440         numChars -= runSize;
   1441         advance(runSize);
   1442     }
   1443     return result.toString();
   1444 }
   1445 
   1446 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
   1447 {
   1448     it.advance(offset);
   1449     RefPtr<Range> start = it.range();
   1450 
   1451     if (length > 1)
   1452         it.advance(length - 1);
   1453     RefPtr<Range> end = it.range();
   1454 
   1455     return Range::create(start->startContainer()->document(),
   1456         start->startContainer(), start->startOffset(),
   1457         end->endContainer(), end->endOffset());
   1458 }
   1459 
   1460 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior)
   1461     : m_offset(0)
   1462     , m_runOffset(0)
   1463     , m_atBreak(true)
   1464     , m_textIterator(range, behavior)
   1465 {
   1466     while (!atEnd() && !m_textIterator.length())
   1467         m_textIterator.advance();
   1468 }
   1469 
   1470 PassRefPtr<Range> BackwardsCharacterIterator::range() const
   1471 {
   1472     RefPtr<Range> r = m_textIterator.range();
   1473     if (!m_textIterator.atEnd()) {
   1474         if (m_textIterator.length() <= 1)
   1475             ASSERT(m_runOffset == 0);
   1476         else {
   1477             Node* n = r->startContainer();
   1478             ASSERT(n == r->endContainer());
   1479             int offset = r->endOffset() - m_runOffset;
   1480             r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION);
   1481             r->setEnd(n, offset, ASSERT_NO_EXCEPTION);
   1482         }
   1483     }
   1484     return r.release();
   1485 }
   1486 
   1487 void BackwardsCharacterIterator::advance(int count)
   1488 {
   1489     if (count <= 0) {
   1490         ASSERT(!count);
   1491         return;
   1492     }
   1493 
   1494     m_atBreak = false;
   1495 
   1496     int remaining = m_textIterator.length() - m_runOffset;
   1497     if (count < remaining) {
   1498         m_runOffset += count;
   1499         m_offset += count;
   1500         return;
   1501     }
   1502 
   1503     count -= remaining;
   1504     m_offset += remaining;
   1505 
   1506     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1507         int runLength = m_textIterator.length();
   1508         if (runLength == 0)
   1509             m_atBreak = true;
   1510         else {
   1511             if (count < runLength) {
   1512                 m_runOffset = count;
   1513                 m_offset += count;
   1514                 return;
   1515             }
   1516 
   1517             count -= runLength;
   1518             m_offset += runLength;
   1519         }
   1520     }
   1521 
   1522     m_atBreak = true;
   1523     m_runOffset = 0;
   1524 }
   1525 
   1526 // --------
   1527 
   1528 WordAwareIterator::WordAwareIterator(const Range* range)
   1529     : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
   1530     , m_textIterator(range)
   1531 {
   1532     advance(); // Get in position over the first chunk of text.
   1533 }
   1534 
   1535 WordAwareIterator::~WordAwareIterator()
   1536 {
   1537 }
   1538 
   1539 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
   1540 
   1541 void WordAwareIterator::advance()
   1542 {
   1543     m_buffer.clear();
   1544 
   1545     // If last time we did a look-ahead, start with that looked-ahead chunk now
   1546     if (!m_didLookAhead) {
   1547         ASSERT(!m_textIterator.atEnd());
   1548         m_textIterator.advance();
   1549     }
   1550     m_didLookAhead = false;
   1551 
   1552     // Go to next non-empty chunk.
   1553     while (!m_textIterator.atEnd() && m_textIterator.length() == 0)
   1554         m_textIterator.advance();
   1555 
   1556     m_range = m_textIterator.range();
   1557 
   1558     if (m_textIterator.atEnd())
   1559         return;
   1560 
   1561     while (1) {
   1562         // If this chunk ends in whitespace we can just use it as our chunk.
   1563         if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
   1564             return;
   1565 
   1566         // If this is the first chunk that failed, save it in m_buffer before look ahead.
   1567         if (m_buffer.isEmpty())
   1568             m_textIterator.appendTextTo(m_buffer);
   1569 
   1570         // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
   1571         m_textIterator.advance();
   1572         if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
   1573             m_didLookAhead = true;
   1574             return;
   1575         }
   1576 
   1577         // Start gobbling chunks until we get to a suitable stopping point
   1578         m_textIterator.appendTextTo(m_buffer);
   1579         m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION);
   1580     }
   1581 }
   1582 
   1583 int WordAwareIterator::length() const
   1584 {
   1585     if (!m_buffer.isEmpty())
   1586         return m_buffer.size();
   1587     return m_textIterator.length();
   1588 }
   1589 
   1590 String WordAwareIterator::substring(unsigned position, unsigned length) const
   1591 {
   1592     if (!m_buffer.isEmpty())
   1593         return String(m_buffer.data() + position, length);
   1594     return m_textIterator.substring(position, length);
   1595 }
   1596 
   1597 UChar WordAwareIterator::characterAt(unsigned index) const
   1598 {
   1599     if (!m_buffer.isEmpty())
   1600         return m_buffer[index];
   1601     return m_textIterator.characterAt(index);
   1602 }
   1603 
   1604 // --------
   1605 
   1606 static inline UChar foldQuoteMarkOrSoftHyphen(UChar c)
   1607 {
   1608     switch (c) {
   1609         case hebrewPunctuationGershayim:
   1610         case leftDoubleQuotationMark:
   1611         case rightDoubleQuotationMark:
   1612             return '"';
   1613         case hebrewPunctuationGeresh:
   1614         case leftSingleQuotationMark:
   1615         case rightSingleQuotationMark:
   1616             return '\'';
   1617         case softHyphen:
   1618             // Replace soft hyphen with an ignorable character so that their presence or absence will
   1619             // not affect string comparison.
   1620             return 0;
   1621         default:
   1622             return c;
   1623     }
   1624 }
   1625 
   1626 static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length)
   1627 {
   1628     for (size_t i = 0; i < length; ++i)
   1629         data[i] = foldQuoteMarkOrSoftHyphen(data[i]);
   1630 }
   1631 
   1632 static const size_t minimumSearchBufferSize = 8192;
   1633 
   1634 #ifndef NDEBUG
   1635 static bool searcherInUse;
   1636 #endif
   1637 
   1638 static UStringSearch* createSearcher()
   1639 {
   1640     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
   1641     // but it doesn't matter exactly what it is, since we don't perform any searches
   1642     // without setting both the pattern and the text.
   1643     UErrorCode status = U_ZERO_ERROR;
   1644     String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
   1645     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
   1646     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
   1647     return searcher;
   1648 }
   1649 
   1650 static UStringSearch* searcher()
   1651 {
   1652     static UStringSearch* searcher = createSearcher();
   1653     return searcher;
   1654 }
   1655 
   1656 static inline void lockSearcher()
   1657 {
   1658 #ifndef NDEBUG
   1659     ASSERT(!searcherInUse);
   1660     searcherInUse = true;
   1661 #endif
   1662 }
   1663 
   1664 static inline void unlockSearcher()
   1665 {
   1666 #ifndef NDEBUG
   1667     ASSERT(searcherInUse);
   1668     searcherInUse = false;
   1669 #endif
   1670 }
   1671 
   1672 // ICU's search ignores the distinction between small kana letters and ones
   1673 // that are not small, and also characters that differ only in the voicing
   1674 // marks when considering only primary collation strength differences.
   1675 // This is not helpful for end users, since these differences make words
   1676 // distinct, so for our purposes we need these to be considered.
   1677 // The Unicode folks do not think the collation algorithm should be
   1678 // changed. To work around this, we would like to tailor the ICU searcher,
   1679 // but we can't get that to work yet. So instead, we check for cases where
   1680 // these differences occur, and skip those matches.
   1681 
   1682 // We refer to the above technique as the "kana workaround". The next few
   1683 // functions are helper functinos for the kana workaround.
   1684 
   1685 static inline bool isKanaLetter(UChar character)
   1686 {
   1687     // Hiragana letters.
   1688     if (character >= 0x3041 && character <= 0x3096)
   1689         return true;
   1690 
   1691     // Katakana letters.
   1692     if (character >= 0x30A1 && character <= 0x30FA)
   1693         return true;
   1694     if (character >= 0x31F0 && character <= 0x31FF)
   1695         return true;
   1696 
   1697     // Halfwidth katakana letters.
   1698     if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70)
   1699         return true;
   1700 
   1701     return false;
   1702 }
   1703 
   1704 static inline bool isSmallKanaLetter(UChar character)
   1705 {
   1706     ASSERT(isKanaLetter(character));
   1707 
   1708     switch (character) {
   1709     case 0x3041: // HIRAGANA LETTER SMALL A
   1710     case 0x3043: // HIRAGANA LETTER SMALL I
   1711     case 0x3045: // HIRAGANA LETTER SMALL U
   1712     case 0x3047: // HIRAGANA LETTER SMALL E
   1713     case 0x3049: // HIRAGANA LETTER SMALL O
   1714     case 0x3063: // HIRAGANA LETTER SMALL TU
   1715     case 0x3083: // HIRAGANA LETTER SMALL YA
   1716     case 0x3085: // HIRAGANA LETTER SMALL YU
   1717     case 0x3087: // HIRAGANA LETTER SMALL YO
   1718     case 0x308E: // HIRAGANA LETTER SMALL WA
   1719     case 0x3095: // HIRAGANA LETTER SMALL KA
   1720     case 0x3096: // HIRAGANA LETTER SMALL KE
   1721     case 0x30A1: // KATAKANA LETTER SMALL A
   1722     case 0x30A3: // KATAKANA LETTER SMALL I
   1723     case 0x30A5: // KATAKANA LETTER SMALL U
   1724     case 0x30A7: // KATAKANA LETTER SMALL E
   1725     case 0x30A9: // KATAKANA LETTER SMALL O
   1726     case 0x30C3: // KATAKANA LETTER SMALL TU
   1727     case 0x30E3: // KATAKANA LETTER SMALL YA
   1728     case 0x30E5: // KATAKANA LETTER SMALL YU
   1729     case 0x30E7: // KATAKANA LETTER SMALL YO
   1730     case 0x30EE: // KATAKANA LETTER SMALL WA
   1731     case 0x30F5: // KATAKANA LETTER SMALL KA
   1732     case 0x30F6: // KATAKANA LETTER SMALL KE
   1733     case 0x31F0: // KATAKANA LETTER SMALL KU
   1734     case 0x31F1: // KATAKANA LETTER SMALL SI
   1735     case 0x31F2: // KATAKANA LETTER SMALL SU
   1736     case 0x31F3: // KATAKANA LETTER SMALL TO
   1737     case 0x31F4: // KATAKANA LETTER SMALL NU
   1738     case 0x31F5: // KATAKANA LETTER SMALL HA
   1739     case 0x31F6: // KATAKANA LETTER SMALL HI
   1740     case 0x31F7: // KATAKANA LETTER SMALL HU
   1741     case 0x31F8: // KATAKANA LETTER SMALL HE
   1742     case 0x31F9: // KATAKANA LETTER SMALL HO
   1743     case 0x31FA: // KATAKANA LETTER SMALL MU
   1744     case 0x31FB: // KATAKANA LETTER SMALL RA
   1745     case 0x31FC: // KATAKANA LETTER SMALL RI
   1746     case 0x31FD: // KATAKANA LETTER SMALL RU
   1747     case 0x31FE: // KATAKANA LETTER SMALL RE
   1748     case 0x31FF: // KATAKANA LETTER SMALL RO
   1749     case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A
   1750     case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I
   1751     case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U
   1752     case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E
   1753     case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O
   1754     case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA
   1755     case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU
   1756     case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO
   1757     case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU
   1758         return true;
   1759     }
   1760     return false;
   1761 }
   1762 
   1763 enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark };
   1764 
   1765 static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character)
   1766 {
   1767     ASSERT(isKanaLetter(character));
   1768 
   1769     switch (character) {
   1770     case 0x304C: // HIRAGANA LETTER GA
   1771     case 0x304E: // HIRAGANA LETTER GI
   1772     case 0x3050: // HIRAGANA LETTER GU
   1773     case 0x3052: // HIRAGANA LETTER GE
   1774     case 0x3054: // HIRAGANA LETTER GO
   1775     case 0x3056: // HIRAGANA LETTER ZA
   1776     case 0x3058: // HIRAGANA LETTER ZI
   1777     case 0x305A: // HIRAGANA LETTER ZU
   1778     case 0x305C: // HIRAGANA LETTER ZE
   1779     case 0x305E: // HIRAGANA LETTER ZO
   1780     case 0x3060: // HIRAGANA LETTER DA
   1781     case 0x3062: // HIRAGANA LETTER DI
   1782     case 0x3065: // HIRAGANA LETTER DU
   1783     case 0x3067: // HIRAGANA LETTER DE
   1784     case 0x3069: // HIRAGANA LETTER DO
   1785     case 0x3070: // HIRAGANA LETTER BA
   1786     case 0x3073: // HIRAGANA LETTER BI
   1787     case 0x3076: // HIRAGANA LETTER BU
   1788     case 0x3079: // HIRAGANA LETTER BE
   1789     case 0x307C: // HIRAGANA LETTER BO
   1790     case 0x3094: // HIRAGANA LETTER VU
   1791     case 0x30AC: // KATAKANA LETTER GA
   1792     case 0x30AE: // KATAKANA LETTER GI
   1793     case 0x30B0: // KATAKANA LETTER GU
   1794     case 0x30B2: // KATAKANA LETTER GE
   1795     case 0x30B4: // KATAKANA LETTER GO
   1796     case 0x30B6: // KATAKANA LETTER ZA
   1797     case 0x30B8: // KATAKANA LETTER ZI
   1798     case 0x30BA: // KATAKANA LETTER ZU
   1799     case 0x30BC: // KATAKANA LETTER ZE
   1800     case 0x30BE: // KATAKANA LETTER ZO
   1801     case 0x30C0: // KATAKANA LETTER DA
   1802     case 0x30C2: // KATAKANA LETTER DI
   1803     case 0x30C5: // KATAKANA LETTER DU
   1804     case 0x30C7: // KATAKANA LETTER DE
   1805     case 0x30C9: // KATAKANA LETTER DO
   1806     case 0x30D0: // KATAKANA LETTER BA
   1807     case 0x30D3: // KATAKANA LETTER BI
   1808     case 0x30D6: // KATAKANA LETTER BU
   1809     case 0x30D9: // KATAKANA LETTER BE
   1810     case 0x30DC: // KATAKANA LETTER BO
   1811     case 0x30F4: // KATAKANA LETTER VU
   1812     case 0x30F7: // KATAKANA LETTER VA
   1813     case 0x30F8: // KATAKANA LETTER VI
   1814     case 0x30F9: // KATAKANA LETTER VE
   1815     case 0x30FA: // KATAKANA LETTER VO
   1816         return VoicedSoundMark;
   1817     case 0x3071: // HIRAGANA LETTER PA
   1818     case 0x3074: // HIRAGANA LETTER PI
   1819     case 0x3077: // HIRAGANA LETTER PU
   1820     case 0x307A: // HIRAGANA LETTER PE
   1821     case 0x307D: // HIRAGANA LETTER PO
   1822     case 0x30D1: // KATAKANA LETTER PA
   1823     case 0x30D4: // KATAKANA LETTER PI
   1824     case 0x30D7: // KATAKANA LETTER PU
   1825     case 0x30DA: // KATAKANA LETTER PE
   1826     case 0x30DD: // KATAKANA LETTER PO
   1827         return SemiVoicedSoundMark;
   1828     }
   1829     return NoVoicedSoundMark;
   1830 }
   1831 
   1832 static inline bool isCombiningVoicedSoundMark(UChar character)
   1833 {
   1834     switch (character) {
   1835     case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
   1836     case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
   1837         return true;
   1838     }
   1839     return false;
   1840 }
   1841 
   1842 static inline bool containsKanaLetters(const String& pattern)
   1843 {
   1844     unsigned length = pattern.length();
   1845     for (unsigned i = 0; i < length; ++i) {
   1846         if (isKanaLetter(pattern[i]))
   1847             return true;
   1848     }
   1849     return false;
   1850 }
   1851 
   1852 static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer)
   1853 {
   1854     ASSERT(length);
   1855 
   1856     buffer.resize(length);
   1857 
   1858     UErrorCode status = U_ZERO_ERROR;
   1859     size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
   1860     ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
   1861     ASSERT(bufferSize);
   1862 
   1863     buffer.resize(bufferSize);
   1864 
   1865     if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
   1866         return;
   1867 
   1868     status = U_ZERO_ERROR;
   1869     unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
   1870     ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
   1871 }
   1872 
   1873 static bool isNonLatin1Separator(UChar32 character)
   1874 {
   1875     ASSERT_ARG(character, character >= 256);
   1876 
   1877     return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
   1878 }
   1879 
   1880 static inline bool isSeparator(UChar32 character)
   1881 {
   1882     static const bool latin1SeparatorTable[256] = {
   1883         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1884         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1885         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
   1886         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, //                         : ; < = > ?
   1887         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   @
   1888         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, //                         [ \ ] ^ _
   1889         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   `
   1890         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //                           { | } ~
   1891         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1892         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1893         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
   1894         1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
   1895         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1896         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
   1897         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1898         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
   1899     };
   1900 
   1901     if (character < 256)
   1902         return latin1SeparatorTable[character];
   1903 
   1904     return isNonLatin1Separator(character);
   1905 }
   1906 
   1907 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
   1908     : m_options(options)
   1909     , m_prefixLength(0)
   1910     , m_numberOfCharactersJustAppended(0)
   1911     , m_atBreak(true)
   1912     , m_needsMoreContext(options & AtWordStarts)
   1913     , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
   1914 {
   1915     ASSERT(!target.isEmpty());
   1916     target.appendTo(m_target);
   1917 
   1918     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
   1919     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
   1920     // to add tailoring on top of the locale-specific tailoring as of this writing.
   1921     foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
   1922 
   1923     size_t targetLength = m_target.size();
   1924     m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
   1925     m_overlap = m_buffer.capacity() / 4;
   1926 
   1927     if ((m_options & AtWordStarts) && targetLength) {
   1928         UChar32 targetFirstCharacter;
   1929         U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
   1930         // Characters in the separator category never really occur at the beginning of a word,
   1931         // so if the target begins with such a character, we just ignore the AtWordStart option.
   1932         if (isSeparator(targetFirstCharacter)) {
   1933             m_options &= ~AtWordStarts;
   1934             m_needsMoreContext = false;
   1935         }
   1936     }
   1937 
   1938     // Grab the single global searcher.
   1939     // If we ever have a reason to do more than once search buffer at once, we'll have
   1940     // to move to multiple searchers.
   1941     lockSearcher();
   1942 
   1943     UStringSearch* searcher = WebCore::searcher();
   1944     UCollator* collator = usearch_getCollator(searcher);
   1945 
   1946     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
   1947     if (ucol_getStrength(collator) != strength) {
   1948         ucol_setStrength(collator, strength);
   1949         usearch_reset(searcher);
   1950     }
   1951 
   1952     UErrorCode status = U_ZERO_ERROR;
   1953     usearch_setPattern(searcher, m_target.data(), targetLength, &status);
   1954     ASSERT(status == U_ZERO_ERROR);
   1955 
   1956     // The kana workaround requires a normalized copy of the target string.
   1957     if (m_targetRequiresKanaWorkaround)
   1958         normalizeCharacters(m_target.data(), m_target.size(), m_normalizedTarget);
   1959 }
   1960 
   1961 inline SearchBuffer::~SearchBuffer()
   1962 {
   1963     // Leave the static object pointing to a valid string.
   1964     UErrorCode status = U_ZERO_ERROR;
   1965     usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
   1966     ASSERT(status == U_ZERO_ERROR);
   1967 
   1968     unlockSearcher();
   1969 }
   1970 
   1971 template<typename CharType>
   1972 inline void SearchBuffer::append(const CharType* characters, size_t length)
   1973 {
   1974     ASSERT(length);
   1975 
   1976     if (m_atBreak) {
   1977         m_buffer.shrink(0);
   1978         m_prefixLength = 0;
   1979         m_atBreak = false;
   1980     } else if (m_buffer.size() == m_buffer.capacity()) {
   1981         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
   1982         m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap);
   1983         m_buffer.shrink(m_overlap);
   1984     }
   1985 
   1986     size_t oldLength = m_buffer.size();
   1987     size_t usableLength = min(m_buffer.capacity() - oldLength, length);
   1988     ASSERT(usableLength);
   1989     m_buffer.resize(oldLength + usableLength);
   1990     UChar* destination = m_buffer.data() + oldLength;
   1991     StringImpl::copyChars(destination, characters, usableLength);
   1992     foldQuoteMarksAndSoftHyphens(destination, usableLength);
   1993     m_numberOfCharactersJustAppended = usableLength;
   1994 }
   1995 
   1996 inline bool SearchBuffer::needsMoreContext() const
   1997 {
   1998     return m_needsMoreContext;
   1999 }
   2000 
   2001 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
   2002 {
   2003     ASSERT(m_needsMoreContext);
   2004     ASSERT(m_prefixLength == m_buffer.size());
   2005 
   2006     if (!length)
   2007         return;
   2008 
   2009     m_atBreak = false;
   2010 
   2011     size_t wordBoundaryContextStart = length;
   2012     if (wordBoundaryContextStart) {
   2013         U16_BACK_1(characters, 0, wordBoundaryContextStart);
   2014         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
   2015     }
   2016 
   2017     size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
   2018     m_buffer.prepend(characters + length - usableLength, usableLength);
   2019     m_prefixLength += usableLength;
   2020 
   2021     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
   2022         m_needsMoreContext = false;
   2023 }
   2024 
   2025 inline bool SearchBuffer::atBreak() const
   2026 {
   2027     return m_atBreak;
   2028 }
   2029 
   2030 inline void SearchBuffer::reachedBreak()
   2031 {
   2032     m_atBreak = true;
   2033 }
   2034 
   2035 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
   2036 {
   2037     // This function implements the kana workaround. If usearch treats
   2038     // it as a match, but we do not want to, then it's a "bad match".
   2039     if (!m_targetRequiresKanaWorkaround)
   2040         return false;
   2041 
   2042     // Normalize into a match buffer. We reuse a single buffer rather than
   2043     // creating a new one each time.
   2044     normalizeCharacters(match, matchLength, m_normalizedMatch);
   2045 
   2046     const UChar* a = m_normalizedTarget.begin();
   2047     const UChar* aEnd = m_normalizedTarget.end();
   2048 
   2049     const UChar* b = m_normalizedMatch.begin();
   2050     const UChar* bEnd = m_normalizedMatch.end();
   2051 
   2052     while (true) {
   2053         // Skip runs of non-kana-letter characters. This is necessary so we can
   2054         // correctly handle strings where the target and match have different-length
   2055         // runs of characters that match, while still double checking the correctness
   2056         // of matches of kana letters with other kana letters.
   2057         while (a != aEnd && !isKanaLetter(*a))
   2058             ++a;
   2059         while (b != bEnd && !isKanaLetter(*b))
   2060             ++b;
   2061 
   2062         // If we reached the end of either the target or the match, we should have
   2063         // reached the end of both; both should have the same number of kana letters.
   2064         if (a == aEnd || b == bEnd) {
   2065             ASSERT(a == aEnd);
   2066             ASSERT(b == bEnd);
   2067             return false;
   2068         }
   2069 
   2070         // Check for differences in the kana letter character itself.
   2071         if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b))
   2072             return true;
   2073         if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b))
   2074             return true;
   2075         ++a;
   2076         ++b;
   2077 
   2078         // Check for differences in combining voiced sound marks found after the letter.
   2079         while (1) {
   2080             if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) {
   2081                 if (b != bEnd && isCombiningVoicedSoundMark(*b))
   2082                     return true;
   2083                 break;
   2084             }
   2085             if (!(b != bEnd && isCombiningVoicedSoundMark(*b)))
   2086                 return true;
   2087             if (*a != *b)
   2088                 return true;
   2089             ++a;
   2090             ++b;
   2091         }
   2092     }
   2093 }
   2094 
   2095 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
   2096 {
   2097     ASSERT(m_options & AtWordStarts);
   2098 
   2099     if (!start)
   2100         return true;
   2101 
   2102     int size = m_buffer.size();
   2103     int offset = start;
   2104     UChar32 firstCharacter;
   2105     U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
   2106 
   2107     if (m_options & TreatMedialCapitalAsWordStart) {
   2108         UChar32 previousCharacter;
   2109         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
   2110 
   2111         if (isSeparator(firstCharacter)) {
   2112             // The start of a separator run is a word start (".org" in "webkit.org").
   2113             if (!isSeparator(previousCharacter))
   2114                 return true;
   2115         } else if (isASCIIUpper(firstCharacter)) {
   2116             // The start of an uppercase run is a word start ("Kit" in "WebKit").
   2117             if (!isASCIIUpper(previousCharacter))
   2118                 return true;
   2119             // The last character of an uppercase run followed by a non-separator, non-digit
   2120             // is a word start ("Request" in "XMLHTTPRequest").
   2121             offset = start;
   2122             U16_FWD_1(m_buffer.data(), offset, size);
   2123             UChar32 nextCharacter = 0;
   2124             if (offset < size)
   2125                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
   2126             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
   2127                 return true;
   2128         } else if (isASCIIDigit(firstCharacter)) {
   2129             // The start of a digit run is a word start ("2" in "WebKit2").
   2130             if (!isASCIIDigit(previousCharacter))
   2131                 return true;
   2132         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
   2133             // The start of a non-separator, non-uppercase, non-digit run is a word start,
   2134             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
   2135             return true;
   2136         }
   2137     }
   2138 
   2139     // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
   2140     // a word, so treat the position before any CJK character as a word start.
   2141     if (Font::isCJKIdeographOrSymbol(firstCharacter))
   2142         return true;
   2143 
   2144     size_t wordBreakSearchStart = start + length;
   2145     while (wordBreakSearchStart > start)
   2146         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
   2147     return wordBreakSearchStart == start;
   2148 }
   2149 
   2150 inline size_t SearchBuffer::search(size_t& start)
   2151 {
   2152     size_t size = m_buffer.size();
   2153     if (m_atBreak) {
   2154         if (!size)
   2155             return 0;
   2156     } else {
   2157         if (size != m_buffer.capacity())
   2158             return 0;
   2159     }
   2160 
   2161     UStringSearch* searcher = WebCore::searcher();
   2162 
   2163     UErrorCode status = U_ZERO_ERROR;
   2164     usearch_setText(searcher, m_buffer.data(), size, &status);
   2165     ASSERT(status == U_ZERO_ERROR);
   2166 
   2167     usearch_setOffset(searcher, m_prefixLength, &status);
   2168     ASSERT(status == U_ZERO_ERROR);
   2169 
   2170     int matchStart = usearch_next(searcher, &status);
   2171     ASSERT(status == U_ZERO_ERROR);
   2172 
   2173 nextMatch:
   2174     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
   2175         ASSERT(matchStart == USEARCH_DONE);
   2176         return 0;
   2177     }
   2178 
   2179     // Matches that start in the overlap area are only tentative.
   2180     // The same match may appear later, matching more characters,
   2181     // possibly including a combining character that's not yet in the buffer.
   2182     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
   2183         size_t overlap = m_overlap;
   2184         if (m_options & AtWordStarts) {
   2185             // Ensure that there is sufficient context before matchStart the next time around for
   2186             // determining if it is at a word boundary.
   2187             int wordBoundaryContextStart = matchStart;
   2188             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
   2189             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
   2190             overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart));
   2191         }
   2192         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
   2193         m_prefixLength -= min(m_prefixLength, size - overlap);
   2194         m_buffer.shrink(overlap);
   2195         return 0;
   2196     }
   2197 
   2198     size_t matchedLength = usearch_getMatchedLength(searcher);
   2199     ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
   2200 
   2201     // If this match is "bad", move on to the next match.
   2202     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
   2203         matchStart = usearch_next(searcher, &status);
   2204         ASSERT(status == U_ZERO_ERROR);
   2205         goto nextMatch;
   2206     }
   2207 
   2208     size_t newSize = size - (matchStart + 1);
   2209     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
   2210     m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1);
   2211     m_buffer.shrink(newSize);
   2212 
   2213     start = size - matchStart;
   2214     return matchedLength;
   2215 }
   2216 
   2217 // --------
   2218 
   2219 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
   2220 {
   2221     int length = 0;
   2222     for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance())
   2223         length += it.length();
   2224 
   2225     return length;
   2226 }
   2227 
   2228 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
   2229 {
   2230     CharacterIterator entireRangeIterator(entireRange);
   2231     return characterSubrange(entireRangeIterator, characterOffset, characterCount);
   2232 }
   2233 
   2234 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
   2235 {
   2236     RefPtr<Range> resultRange = scope->document()->createRange();
   2237 
   2238     int docTextPosition = 0;
   2239     int rangeEnd = rangeLocation + rangeLength;
   2240     bool startRangeFound = false;
   2241 
   2242     RefPtr<Range> textRunRange;
   2243 
   2244     TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior);
   2245 
   2246     // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
   2247     if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
   2248         textRunRange = it.range();
   2249 
   2250         resultRange->setStart(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION);
   2251         resultRange->setEnd(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION);
   2252 
   2253         return resultRange.release();
   2254     }
   2255 
   2256     for (; !it.atEnd(); it.advance()) {
   2257         int len = it.length();
   2258         textRunRange = it.range();
   2259 
   2260         bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;
   2261         bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;
   2262 
   2263         // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only
   2264         // in those cases that textRunRange is used.
   2265         if (foundEnd) {
   2266             // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
   2267             // position for emitted '\n's.
   2268             if (len == 1 && it.characterAt(0) == '\n') {
   2269                 scope->document()->updateLayoutIgnorePendingStylesheets();
   2270                 it.advance();
   2271                 if (!it.atEnd()) {
   2272                     RefPtr<Range> range = it.range();
   2273                     textRunRange->setEnd(range->startContainer(), range->startOffset(), ASSERT_NO_EXCEPTION);
   2274                 } else {
   2275                     Position runStart = textRunRange->startPosition();
   2276                     Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
   2277                     if (runEnd.isNotNull())
   2278                         textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ASSERT_NO_EXCEPTION);
   2279                 }
   2280             }
   2281         }
   2282 
   2283         if (foundStart) {
   2284             startRangeFound = true;
   2285             if (textRunRange->startContainer()->isTextNode()) {
   2286                 int offset = rangeLocation - docTextPosition;
   2287                 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), IGNORE_EXCEPTION);
   2288             } else {
   2289                 if (rangeLocation == docTextPosition)
   2290                     resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), IGNORE_EXCEPTION);
   2291                 else
   2292                     resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION);
   2293             }
   2294         }
   2295 
   2296         if (foundEnd) {
   2297             if (textRunRange->startContainer()->isTextNode()) {
   2298                 int offset = rangeEnd - docTextPosition;
   2299                 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), IGNORE_EXCEPTION);
   2300             } else {
   2301                 if (rangeEnd == docTextPosition)
   2302                     resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), IGNORE_EXCEPTION);
   2303                 else
   2304                     resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION);
   2305             }
   2306             docTextPosition += len;
   2307             break;
   2308         }
   2309         docTextPosition += len;
   2310     }
   2311 
   2312     if (!startRangeFound)
   2313         return 0;
   2314 
   2315     if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
   2316         resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION);
   2317     }
   2318 
   2319     return resultRange.release();
   2320 }
   2321 
   2322 bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range, size_t& location, size_t& length)
   2323 {
   2324     location = notFound;
   2325     length = 0;
   2326 
   2327     if (!range->startContainer())
   2328         return false;
   2329 
   2330     // The critical assumption is that this only gets called with ranges that
   2331     // concentrate on a given area containing the selection root. This is done
   2332     // because of text fields and textareas. The DOM for those is not
   2333     // directly in the document DOM, so ensure that the range does not cross a
   2334     // boundary of one of those.
   2335     if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope))
   2336         return false;
   2337     if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope))
   2338         return false;
   2339 
   2340     RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset());
   2341     ASSERT(testRange->startContainer() == scope);
   2342     location = TextIterator::rangeLength(testRange.get());
   2343 
   2344     testRange->setEnd(range->endContainer(), range->endOffset(), IGNORE_EXCEPTION);
   2345     ASSERT(testRange->startContainer() == scope);
   2346     length = TextIterator::rangeLength(testRange.get()) - location;
   2347     return true;
   2348 }
   2349 
   2350 // --------
   2351 
   2352 String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDisplayString)
   2353 {
   2354     // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
   2355     static const unsigned initialCapacity = 1 << 15;
   2356 
   2357     unsigned bufferLength = 0;
   2358     StringBuilder builder;
   2359     builder.reserveCapacity(initialCapacity);
   2360     TextIteratorBehavior behavior = defaultBehavior;
   2361     if (!isDisplayString)
   2362         behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding);
   2363 
   2364     for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
   2365         it.appendTextToStringBuilder(builder);
   2366         bufferLength += it.length();
   2367     }
   2368 
   2369     if (!bufferLength)
   2370         return emptyString();
   2371 
   2372     String result = builder.toString();
   2373 
   2374     if (isDisplayString && r->ownerDocument())
   2375         r->ownerDocument()->displayStringModifiedByEncoding(result);
   2376 
   2377     return result;
   2378 }
   2379 
   2380 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
   2381 {
   2382     RefPtr<Range> result = range->cloneRange(ASSERT_NO_EXCEPTION);
   2383     result->collapse(!forward, ASSERT_NO_EXCEPTION);
   2384     return result.release();
   2385 }
   2386 
   2387 static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
   2388 {
   2389     matchStart = 0;
   2390     size_t matchLength = 0;
   2391 
   2392     SearchBuffer buffer(target, options);
   2393 
   2394     if (buffer.needsMoreContext()) {
   2395         RefPtr<Range> startRange = it.range();
   2396         RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange();
   2397         beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION);
   2398         for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
   2399             Vector<UChar, 1024> characters;
   2400             backwardsIterator.prependTextTo(characters);
   2401             buffer.prependContext(characters.data(), characters.size());
   2402             if (!buffer.needsMoreContext())
   2403                 break;
   2404         }
   2405     }
   2406 
   2407     while (!it.atEnd()) {
   2408         it.appendTextTo(buffer);
   2409         it.advance(buffer.numberOfCharactersJustAppended());
   2410 tryAgain:
   2411         size_t matchStartOffset;
   2412         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
   2413             // Note that we found a match, and where we found it.
   2414             size_t lastCharacterInBufferOffset = it.characterOffset();
   2415             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
   2416             matchStart = lastCharacterInBufferOffset - matchStartOffset;
   2417             matchLength = newMatchLength;
   2418             // If searching forward, stop on the first match.
   2419             // If searching backward, don't stop, so we end up with the last match.
   2420             if (!(options & Backwards))
   2421                 break;
   2422             goto tryAgain;
   2423         }
   2424         if (it.atBreak() && !buffer.atBreak()) {
   2425             buffer.reachedBreak();
   2426             goto tryAgain;
   2427         }
   2428     }
   2429 
   2430     return matchLength;
   2431 }
   2432 
   2433 PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
   2434 {
   2435     // CharacterIterator requires renderers to be up-to-date
   2436     range->ownerDocument()->updateLayout();
   2437 
   2438     // First, find the text.
   2439     size_t matchStart;
   2440     size_t matchLength;
   2441     {
   2442         CharacterIterator findIterator(range, TextIteratorEntersTextControls);
   2443         matchLength = findPlainText(findIterator, target, options, matchStart);
   2444         if (!matchLength)
   2445             return collapsedToBoundary(range, !(options & Backwards));
   2446     }
   2447 
   2448     // Then, find the document position of the start and the end of the text.
   2449     CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls);
   2450     return characterSubrange(computeRangeIterator, matchStart, matchLength);
   2451 }
   2452 
   2453 }
   2454