Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
      3  * Copyright (C) 2005 Alexey Proskuryakov.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 #include "config.h"
     28 #include "TextIterator.h"
     29 
     30 #include "Document.h"
     31 #include "Frame.h"
     32 #include "HTMLElement.h"
     33 #include "HTMLNames.h"
     34 #include "htmlediting.h"
     35 #include "InlineTextBox.h"
     36 #include "Range.h"
     37 #include "RenderTableCell.h"
     38 #include "RenderTableRow.h"
     39 #include "RenderTextControl.h"
     40 #include "RenderTextFragment.h"
     41 #include "TextBoundaries.h"
     42 #include "TextBreakIterator.h"
     43 #include "VisiblePosition.h"
     44 #include "visible_units.h"
     45 #include <wtf/unicode/CharacterNames.h>
     46 
     47 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
     48 #include "TextBreakIteratorInternalICU.h"
     49 #include <unicode/usearch.h>
     50 #endif
     51 
     52 using namespace WTF::Unicode;
     53 using namespace std;
     54 
     55 namespace WebCore {
     56 
     57 using namespace HTMLNames;
     58 
     59 // Buffer that knows how to compare with a search target.
     60 // Keeps enough of the previous text to be able to search in the future, but no more.
     61 // Non-breaking spaces are always equal to normal spaces.
     62 // Case folding is also done if the CaseInsensitive option is specified.
     63 // Matches are further filtered if the AtWordStarts option is specified, although some
     64 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
     65 class SearchBuffer {
     66     WTF_MAKE_NONCOPYABLE(SearchBuffer);
     67 public:
     68     SearchBuffer(const String& target, FindOptions);
     69     ~SearchBuffer();
     70 
     71     // Returns number of characters appended; guaranteed to be in the range [1, length].
     72     size_t append(const UChar*, size_t length);
     73     bool needsMoreContext() const;
     74     void prependContext(const UChar*, size_t length);
     75     void reachedBreak();
     76 
     77     // Result is the size in characters of what was found.
     78     // And <startOffset> is the number of characters back to the start of what was found.
     79     size_t search(size_t& startOffset);
     80     bool atBreak() const;
     81 
     82 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
     83 
     84 private:
     85     bool isBadMatch(const UChar*, size_t length) const;
     86     bool isWordStartMatch(size_t start, size_t length) const;
     87 
     88     String m_target;
     89     FindOptions m_options;
     90 
     91     Vector<UChar> m_buffer;
     92     size_t m_overlap;
     93     size_t m_prefixLength;
     94     bool m_atBreak;
     95     bool m_needsMoreContext;
     96 
     97     bool m_targetRequiresKanaWorkaround;
     98     Vector<UChar> m_normalizedTarget;
     99     mutable Vector<UChar> m_normalizedMatch;
    100 
    101 #else
    102 
    103 private:
    104     void append(UChar, bool isCharacterStart);
    105     size_t length() const;
    106 
    107     String m_target;
    108     FindOptions m_options;
    109 
    110     Vector<UChar> m_buffer;
    111     Vector<bool> m_isCharacterStartBuffer;
    112     bool m_isBufferFull;
    113     size_t m_cursor;
    114 
    115 #endif
    116 };
    117 
    118 // --------
    119 
    120 static const unsigned bitsInWord = sizeof(unsigned) * 8;
    121 static const unsigned bitInWordMask = bitsInWord - 1;
    122 
    123 BitStack::BitStack()
    124     : m_size(0)
    125 {
    126 }
    127 
    128 BitStack::~BitStack()
    129 {
    130 }
    131 
    132 void BitStack::push(bool bit)
    133 {
    134     unsigned index = m_size / bitsInWord;
    135     unsigned shift = m_size & bitInWordMask;
    136     if (!shift && index == m_words.size()) {
    137         m_words.grow(index + 1);
    138         m_words[index] = 0;
    139     }
    140     unsigned& word = m_words[index];
    141     unsigned mask = 1U << shift;
    142     if (bit)
    143         word |= mask;
    144     else
    145         word &= ~mask;
    146     ++m_size;
    147 }
    148 
    149 void BitStack::pop()
    150 {
    151     if (m_size)
    152         --m_size;
    153 }
    154 
    155 bool BitStack::top() const
    156 {
    157     if (!m_size)
    158         return false;
    159     unsigned shift = (m_size - 1) & bitInWordMask;
    160     return m_words.last() & (1U << shift);
    161 }
    162 
    163 unsigned BitStack::size() const
    164 {
    165     return m_size;
    166 }
    167 
    168 // --------
    169 
    170 #if !ASSERT_DISABLED
    171 
    172 static unsigned depthCrossingShadowBoundaries(Node* node)
    173 {
    174     unsigned depth = 0;
    175     for (Node* parent = node->parentOrHostNode(); parent; parent = parent->parentOrHostNode())
    176         ++depth;
    177     return depth;
    178 }
    179 
    180 #endif
    181 
    182 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
    183 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
    184 {
    185     if (!rangeEndContainer)
    186         return 0;
    187     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
    188         if (Node* next = rangeEndContainer->childNode(rangeEndOffset))
    189             return next;
    190     }
    191     for (Node* node = rangeEndContainer; node; node = node->parentOrHostNode()) {
    192         if (Node* next = node->nextSibling())
    193             return next;
    194     }
    195     return 0;
    196 }
    197 
    198 // --------
    199 
    200 static inline bool fullyClipsContents(Node* node)
    201 {
    202     RenderObject* renderer = node->renderer();
    203     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
    204         return false;
    205     return toRenderBox(renderer)->size().isEmpty();
    206 }
    207 
    208 static inline bool ignoresContainerClip(Node* node)
    209 {
    210     RenderObject* renderer = node->renderer();
    211     if (!renderer || renderer->isText())
    212         return false;
    213     EPosition position = renderer->style()->position();
    214     return position == AbsolutePosition || position == FixedPosition;
    215 }
    216 
    217 static void pushFullyClippedState(BitStack& stack, Node* node)
    218 {
    219     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
    220 
    221     // Push true if this node full clips its contents, or if a parent already has fully
    222     // clipped and this is not a node that ignores its container's clip.
    223     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
    224 }
    225 
    226 static void setUpFullyClippedStack(BitStack& stack, Node* node)
    227 {
    228     // Put the nodes in a vector so we can iterate in reverse order.
    229     Vector<Node*, 100> ancestry;
    230     for (Node* parent = node->parentOrHostNode(); parent; parent = parent->parentOrHostNode())
    231         ancestry.append(parent);
    232 
    233     // Call pushFullyClippedState on each node starting with the earliest ancestor.
    234     size_t size = ancestry.size();
    235     for (size_t i = 0; i < size; ++i)
    236         pushFullyClippedState(stack, ancestry[size - i - 1]);
    237     pushFullyClippedState(stack, node);
    238 
    239     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
    240 }
    241 
    242 // --------
    243 
    244 TextIterator::TextIterator()
    245     : m_startContainer(0)
    246     , m_startOffset(0)
    247     , m_endContainer(0)
    248     , m_endOffset(0)
    249     , m_positionNode(0)
    250     , m_textCharacters(0)
    251     , m_textLength(0)
    252     , m_remainingTextBox(0)
    253     , m_firstLetterText(0)
    254     , m_lastCharacter(0)
    255     , m_emitsCharactersBetweenAllVisiblePositions(false)
    256     , m_entersTextControls(false)
    257     , m_emitsTextWithoutTranscoding(false)
    258     , m_handledFirstLetter(false)
    259     , m_ignoresStyleVisibility(false)
    260     , m_emitsObjectReplacementCharacters(false)
    261 {
    262 }
    263 
    264 TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior)
    265     : m_startContainer(0)
    266     , m_startOffset(0)
    267     , m_endContainer(0)
    268     , m_endOffset(0)
    269     , m_positionNode(0)
    270     , m_textCharacters(0)
    271     , m_textLength(0)
    272     , m_remainingTextBox(0)
    273     , m_firstLetterText(0)
    274     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
    275     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
    276     , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding)
    277     , m_handledFirstLetter(false)
    278     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
    279     , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters)
    280 {
    281     if (!r)
    282         return;
    283 
    284     // get and validate the range endpoints
    285     Node* startContainer = r->startContainer();
    286     if (!startContainer)
    287         return;
    288     int startOffset = r->startOffset();
    289     Node* endContainer = r->endContainer();
    290     int endOffset = r->endOffset();
    291 
    292     // Callers should be handing us well-formed ranges. If we discover that this isn't
    293     // the case, we could consider changing this assertion to an early return.
    294     ASSERT(r->boundaryPointsValid());
    295 
    296     // remember range - this does not change
    297     m_startContainer = startContainer;
    298     m_startOffset = startOffset;
    299     m_endContainer = endContainer;
    300     m_endOffset = endOffset;
    301 
    302     // set up the current node for processing
    303     m_node = r->firstNode();
    304     if (!m_node)
    305         return;
    306     setUpFullyClippedStack(m_fullyClippedStack, m_node);
    307     m_offset = m_node == m_startContainer ? m_startOffset : 0;
    308     m_handledNode = false;
    309     m_handledChildren = false;
    310 
    311     // calculate first out of bounds node
    312     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
    313 
    314     // initialize node processing state
    315     m_needsAnotherNewline = false;
    316     m_textBox = 0;
    317 
    318     // initialize record of previous node processing
    319     m_hasEmitted = false;
    320     m_lastTextNode = 0;
    321     m_lastTextNodeEndedWithCollapsedSpace = false;
    322     m_lastCharacter = 0;
    323 
    324 #ifndef NDEBUG
    325     // need this just because of the assert in advance()
    326     m_positionNode = m_node;
    327 #endif
    328 
    329     // identify the first run
    330     advance();
    331 }
    332 
    333 TextIterator::~TextIterator()
    334 {
    335 }
    336 
    337 void TextIterator::advance()
    338 {
    339     // reset the run information
    340     m_positionNode = 0;
    341     m_textLength = 0;
    342 
    343     // handle remembered node that needed a newline after the text node's newline
    344     if (m_needsAnotherNewline) {
    345         // Emit the extra newline, and position it *inside* m_node, after m_node's
    346         // contents, in case it's a block, in the same way that we position the first
    347         // newline.  The range for the emitted newline should start where the line
    348         // break begins.
    349         // FIXME: It would be cleaner if we emitted two newlines during the last
    350         // iteration, instead of using m_needsAnotherNewline.
    351         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
    352         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    353         m_needsAnotherNewline = false;
    354         return;
    355     }
    356 
    357     if (!m_textBox && m_remainingTextBox) {
    358         m_textBox = m_remainingTextBox;
    359         m_remainingTextBox = 0;
    360         m_firstLetterText = 0;
    361         m_offset = 0;
    362     }
    363     // handle remembered text box
    364     if (m_textBox) {
    365         handleTextBox();
    366         if (m_positionNode)
    367             return;
    368     }
    369 
    370     while (m_node && m_node != m_pastEndNode) {
    371         // if the range ends at offset 0 of an element, represent the
    372         // position, but not the content, of that element e.g. if the
    373         // node is a blockflow element, emit a newline that
    374         // precedes the element
    375         if (m_node == m_endContainer && m_endOffset == 0) {
    376             representNodeOffsetZero();
    377             m_node = 0;
    378             return;
    379         }
    380 
    381         RenderObject* renderer = m_node->renderer();
    382         if (!renderer) {
    383             m_handledNode = true;
    384             m_handledChildren = true;
    385         } else {
    386             // handle current node according to its type
    387             if (!m_handledNode) {
    388                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE?
    389                     m_handledNode = handleTextNode();
    390                 else if (renderer && (renderer->isImage() || renderer->isWidget() ||
    391                          (renderer->node() && renderer->node()->isElementNode() &&
    392                           static_cast<Element*>(renderer->node())->isFormControlElement())))
    393                     m_handledNode = handleReplacedElement();
    394                 else
    395                     m_handledNode = handleNonTextNode();
    396                 if (m_positionNode)
    397                     return;
    398             }
    399         }
    400 
    401         // find a new current node to handle in depth-first manner,
    402         // calling exitNode() as we come back thru a parent node
    403         Node* next = m_handledChildren ? 0 : m_node->firstChild();
    404         m_offset = 0;
    405         if (!next) {
    406             next = m_node->nextSibling();
    407             if (!next) {
    408                 bool pastEnd = m_node->traverseNextNode() == m_pastEndNode;
    409                 Node* parentNode = m_node->parentOrHostNode();
    410                 while (!next && parentNode) {
    411                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
    412                         return;
    413                     bool haveRenderer = m_node->renderer();
    414                     m_node = parentNode;
    415                     m_fullyClippedStack.pop();
    416                     parentNode = m_node->parentOrHostNode();
    417                     if (haveRenderer)
    418                         exitNode();
    419                     if (m_positionNode) {
    420                         m_handledNode = true;
    421                         m_handledChildren = true;
    422                         return;
    423                     }
    424                     next = m_node->nextSibling();
    425                 }
    426             }
    427             m_fullyClippedStack.pop();
    428         }
    429 
    430         // set the new current node
    431         m_node = next;
    432         if (m_node)
    433             pushFullyClippedState(m_fullyClippedStack, m_node);
    434         m_handledNode = false;
    435         m_handledChildren = false;
    436         m_handledFirstLetter = false;
    437         m_firstLetterText = 0;
    438 
    439         // how would this ever be?
    440         if (m_positionNode)
    441             return;
    442     }
    443 }
    444 
    445 bool TextIterator::handleTextNode()
    446 {
    447     if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
    448         return false;
    449 
    450     RenderText* renderer = toRenderText(m_node->renderer());
    451 
    452     m_lastTextNode = m_node;
    453     String str = renderer->text();
    454 
    455     // handle pre-formatted text
    456     if (!renderer->style()->collapseWhiteSpace()) {
    457         int runStart = m_offset;
    458         if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
    459             emitCharacter(' ', m_node, 0, runStart, runStart);
    460             return false;
    461         }
    462         if (!m_handledFirstLetter && renderer->isTextFragment()) {
    463             handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
    464             if (m_firstLetterText) {
    465                 String firstLetter = m_firstLetterText->text();
    466                 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length());
    467                 m_firstLetterText = 0;
    468                 m_textBox = 0;
    469                 return false;
    470             }
    471         }
    472         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    473             return false;
    474         int strLength = str.length();
    475         int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
    476         int runEnd = min(strLength, end);
    477 
    478         if (runStart >= runEnd)
    479             return true;
    480 
    481         emitText(m_node, runStart, runEnd);
    482         return true;
    483     }
    484 
    485     if (!renderer->firstTextBox() && str.length() > 0) {
    486         if (!m_handledFirstLetter && renderer->isTextFragment()) {
    487             handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
    488             if (m_firstLetterText) {
    489                 handleTextBox();
    490                 return false;
    491             }
    492         }
    493         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    494             return false;
    495         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
    496         return true;
    497     }
    498 
    499     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    500     if (renderer->containsReversedText()) {
    501         m_sortedTextBoxes.clear();
    502         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
    503             m_sortedTextBoxes.append(textBox);
    504         }
    505         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
    506         m_sortedTextBoxesPosition = 0;
    507     }
    508 
    509     m_textBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
    510     if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset)
    511         handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
    512     handleTextBox();
    513     return true;
    514 }
    515 
    516 void TextIterator::handleTextBox()
    517 {
    518     RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer());
    519     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
    520         m_textBox = 0;
    521         return;
    522     }
    523     String str = renderer->text();
    524     unsigned start = m_offset;
    525     unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX;
    526     while (m_textBox) {
    527         unsigned textBoxStart = m_textBox->start();
    528         unsigned runStart = max(textBoxStart, start);
    529 
    530         // Check for collapsed space at the start of this run.
    531         InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
    532         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
    533             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
    534         if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
    535             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
    536                 unsigned spaceRunStart = runStart - 1;
    537                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
    538                     --spaceRunStart;
    539                 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
    540             } else
    541                 emitCharacter(' ', m_node, 0, runStart, runStart);
    542             return;
    543         }
    544         unsigned textBoxEnd = textBoxStart + m_textBox->len();
    545         unsigned runEnd = min(textBoxEnd, end);
    546 
    547         // Determine what the next text box will be, but don't advance yet
    548         InlineTextBox* nextTextBox = 0;
    549         if (renderer->containsReversedText()) {
    550             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
    551                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
    552         } else
    553             nextTextBox = m_textBox->nextTextBox();
    554 
    555         if (runStart < runEnd) {
    556             // Handle either a single newline character (which becomes a space),
    557             // or a run of characters that does not include a newline.
    558             // This effectively translates newlines to spaces without copying the text.
    559             if (str[runStart] == '\n') {
    560                 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
    561                 m_offset = runStart + 1;
    562             } else {
    563                 size_t subrunEnd = str.find('\n', runStart);
    564                 if (subrunEnd == notFound || subrunEnd > runEnd)
    565                     subrunEnd = runEnd;
    566 
    567                 m_offset = subrunEnd;
    568                 emitText(m_node, renderer, runStart, subrunEnd);
    569             }
    570 
    571             // If we are doing a subrun that doesn't go to the end of the text box,
    572             // come back again to finish handling this text box; don't advance to the next one.
    573             if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
    574                 return;
    575 
    576             // Advance and return
    577             unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
    578             if (nextRunStart > runEnd)
    579                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
    580             m_textBox = nextTextBox;
    581             if (renderer->containsReversedText())
    582                 ++m_sortedTextBoxesPosition;
    583             return;
    584         }
    585         // Advance and continue
    586         m_textBox = nextTextBox;
    587         if (renderer->containsReversedText())
    588             ++m_sortedTextBoxesPosition;
    589     }
    590     if (!m_textBox && m_remainingTextBox) {
    591         m_textBox = m_remainingTextBox;
    592         m_remainingTextBox = 0;
    593         m_firstLetterText = 0;
    594         m_offset = 0;
    595         handleTextBox();
    596     }
    597 }
    598 
    599 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
    600 {
    601     if (renderer->firstLetter()) {
    602         RenderObject* r = renderer->firstLetter();
    603         if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    604             return;
    605         for (RenderObject *currChild = r->firstChild(); currChild; currChild->nextSibling()) {
    606             if (currChild->isText()) {
    607                 RenderText* firstLetter = toRenderText(currChild);
    608                 m_handledFirstLetter = true;
    609                 m_remainingTextBox = m_textBox;
    610                 m_textBox = firstLetter->firstTextBox();
    611                 m_firstLetterText = firstLetter;
    612                 return;
    613             }
    614         }
    615     }
    616     m_handledFirstLetter = true;
    617 }
    618 
    619 bool TextIterator::handleReplacedElement()
    620 {
    621     if (m_fullyClippedStack.top())
    622         return false;
    623 
    624     RenderObject* renderer = m_node->renderer();
    625     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
    626         return false;
    627 
    628     if (m_lastTextNodeEndedWithCollapsedSpace) {
    629         emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
    630         return false;
    631     }
    632 
    633     if (m_entersTextControls && renderer->isTextControl()) {
    634         if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->innerTextElement()) {
    635             m_node = innerTextElement->shadowTreeRootNode();
    636             pushFullyClippedState(m_fullyClippedStack, m_node);
    637             m_offset = 0;
    638             return false;
    639         }
    640     }
    641 
    642     m_hasEmitted = true;
    643 
    644     if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) {
    645         emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
    646         return true;
    647     }
    648 
    649     if (m_emitsCharactersBetweenAllVisiblePositions) {
    650         // We want replaced elements to behave like punctuation for boundary
    651         // finding, and to simply take up space for the selection preservation
    652         // code in moveParagraphs, so we use a comma.
    653         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
    654         return true;
    655     }
    656 
    657     m_positionNode = m_node->parentNode();
    658     m_positionOffsetBaseNode = m_node;
    659     m_positionStartOffset = 0;
    660     m_positionEndOffset = 1;
    661 
    662     m_textCharacters = 0;
    663     m_textLength = 0;
    664 
    665     m_lastCharacter = 0;
    666 
    667     return true;
    668 }
    669 
    670 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
    671 {
    672     if (renderer->style()->visibility() == VISIBLE)
    673         return true;
    674     if (renderer->isTextFragment()) {
    675         RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer);
    676         if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
    677             return true;
    678     }
    679     return false;
    680 }
    681 
    682 static bool shouldEmitTabBeforeNode(Node* node)
    683 {
    684     RenderObject* r = node->renderer();
    685 
    686     // Table cells are delimited by tabs.
    687     if (!r || !isTableCell(node))
    688         return false;
    689 
    690     // Want a tab before every cell other than the first one
    691     RenderTableCell* rc = toRenderTableCell(r);
    692     RenderTable* t = rc->table();
    693     return t && (t->cellBefore(rc) || t->cellAbove(rc));
    694 }
    695 
    696 static bool shouldEmitNewlineForNode(Node* node)
    697 {
    698     // br elements are represented by a single newline.
    699     RenderObject* r = node->renderer();
    700     if (!r)
    701         return node->hasTagName(brTag);
    702 
    703     return r->isBR();
    704 }
    705 
    706 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
    707 {
    708     // Block flow (versus inline flow) is represented by having
    709     // a newline both before and after the element.
    710     RenderObject* r = node->renderer();
    711     if (!r) {
    712         return (node->hasTagName(blockquoteTag)
    713                 || node->hasTagName(ddTag)
    714                 || node->hasTagName(divTag)
    715                 || node->hasTagName(dlTag)
    716                 || node->hasTagName(dtTag)
    717                 || node->hasTagName(h1Tag)
    718                 || node->hasTagName(h2Tag)
    719                 || node->hasTagName(h3Tag)
    720                 || node->hasTagName(h4Tag)
    721                 || node->hasTagName(h5Tag)
    722                 || node->hasTagName(h6Tag)
    723                 || node->hasTagName(hrTag)
    724                 || node->hasTagName(liTag)
    725                 || node->hasTagName(listingTag)
    726                 || node->hasTagName(olTag)
    727                 || node->hasTagName(pTag)
    728                 || node->hasTagName(preTag)
    729                 || node->hasTagName(trTag)
    730                 || node->hasTagName(ulTag));
    731     }
    732 
    733     // Need to make an exception for table cells, because they are blocks, but we
    734     // want them tab-delimited rather than having newlines before and after.
    735     if (isTableCell(node))
    736         return false;
    737 
    738     // Need to make an exception for table row elements, because they are neither
    739     // "inline" or "RenderBlock", but we want newlines for them.
    740     if (r->isTableRow()) {
    741         RenderTable* t = toRenderTableRow(r)->table();
    742         if (t && !t->isInline())
    743             return true;
    744     }
    745 
    746     return !r->isInline() && r->isRenderBlock() && !r->isFloatingOrPositioned() && !r->isBody();
    747 }
    748 
    749 static bool shouldEmitNewlineAfterNode(Node* node)
    750 {
    751     // FIXME: It should be better but slower to create a VisiblePosition here.
    752     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
    753         return false;
    754     // Check if this is the very last renderer in the document.
    755     // If so, then we should not emit a newline.
    756     while ((node = node->traverseNextSibling()))
    757         if (node->renderer())
    758             return true;
    759     return false;
    760 }
    761 
    762 static bool shouldEmitNewlineBeforeNode(Node* node)
    763 {
    764     return shouldEmitNewlinesBeforeAndAfterNode(node);
    765 }
    766 
    767 static bool shouldEmitExtraNewlineForNode(Node* node)
    768 {
    769     // When there is a significant collapsed bottom margin, emit an extra
    770     // newline for a more realistic result.  We end up getting the right
    771     // result even without margin collapsing. For example: <div><p>text</p></div>
    772     // will work right even if both the <div> and the <p> have bottom margins.
    773     RenderObject* r = node->renderer();
    774     if (!r || !r->isBox())
    775         return false;
    776 
    777     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
    778     // not to do this at all
    779     if (node->hasTagName(h1Tag)
    780         || node->hasTagName(h2Tag)
    781         || node->hasTagName(h3Tag)
    782         || node->hasTagName(h4Tag)
    783         || node->hasTagName(h5Tag)
    784         || node->hasTagName(h6Tag)
    785         || node->hasTagName(pTag)) {
    786         RenderStyle* style = r->style();
    787         if (style) {
    788             int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
    789             int fontSize = style->fontDescription().computedPixelSize();
    790             if (bottomMargin * 2 >= fontSize)
    791                 return true;
    792         }
    793     }
    794 
    795     return false;
    796 }
    797 
    798 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
    799 {
    800     const UChar* characters = renderer->text()->characters();
    801     int length = renderer->text()->length();
    802     for (int i = textEnd; i < length; ++i) {
    803         if (!renderer->style()->isCollapsibleWhiteSpace(characters[i]))
    804             return i - textEnd;
    805     }
    806 
    807     return length - textEnd;
    808 }
    809 
    810 static int maxOffsetIncludingCollapsedSpaces(Node* node)
    811 {
    812     int offset = caretMaxOffset(node);
    813 
    814     if (node->renderer() && node->renderer()->isText())
    815         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
    816 
    817     return offset;
    818 }
    819 
    820 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
    821 bool TextIterator::shouldRepresentNodeOffsetZero()
    822 {
    823     if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
    824         return true;
    825 
    826     // Leave element positioned flush with start of a paragraph
    827     // (e.g. do not insert tab before a table cell at the start of a paragraph)
    828     if (m_lastCharacter == '\n')
    829         return false;
    830 
    831     // Otherwise, show the position if we have emitted any characters
    832     if (m_hasEmitted)
    833         return true;
    834 
    835     // We've not emitted anything yet. Generally, there is no need for any positioning then.
    836     // The only exception is when the element is visually not in the same line as
    837     // the start of the range (e.g. the range starts at the end of the previous paragraph).
    838     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
    839     // make quicker checks to possibly avoid that. Another check that we could make is
    840     // is whether the inline vs block flow changed since the previous visible element.
    841     // I think we're already in a special enough case that that won't be needed, tho.
    842 
    843     // No character needed if this is the first node in the range.
    844     if (m_node == m_startContainer)
    845         return false;
    846 
    847     // If we are outside the start container's subtree, assume we need to emit.
    848     // FIXME: m_startContainer could be an inline block
    849     if (!m_node->isDescendantOf(m_startContainer))
    850         return true;
    851 
    852     // If we started as m_startContainer offset 0 and the current node is a descendant of
    853     // the start container, we already had enough context to correctly decide whether to
    854     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
    855     // so don't second guess that now.
    856     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
    857     // immaterial since we likely would have already emitted something by now.
    858     if (m_startOffset == 0)
    859         return false;
    860 
    861     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
    862     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
    863     // we would create VisiblePositions on every call to this function without this check.
    864     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE)
    865         return false;
    866 
    867     // The startPos.isNotNull() check is needed because the start could be before the body,
    868     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
    869     // The currPos.isNotNull() check is needed because positions in non-HTML content
    870     // (like SVG) do not have visible positions, and we don't want to emit for them either.
    871     VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
    872     VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
    873     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
    874 }
    875 
    876 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
    877 {
    878     return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
    879 }
    880 
    881 void TextIterator::representNodeOffsetZero()
    882 {
    883     // Emit a character to show the positioning of m_node.
    884 
    885     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
    886     // create VisiblePositions, which is expensive.  So, we perform the inexpensive checks
    887     // on m_node to see if it necessitates emitting a character first and will early return
    888     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
    889     if (shouldEmitTabBeforeNode(m_node)) {
    890         if (shouldRepresentNodeOffsetZero())
    891             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
    892     } else if (shouldEmitNewlineBeforeNode(m_node)) {
    893         if (shouldRepresentNodeOffsetZero())
    894             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
    895     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
    896         if (shouldRepresentNodeOffsetZero())
    897             emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
    898     }
    899 }
    900 
    901 bool TextIterator::handleNonTextNode()
    902 {
    903     if (shouldEmitNewlineForNode(m_node))
    904         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
    905     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
    906         emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
    907     else
    908         representNodeOffsetZero();
    909 
    910     return true;
    911 }
    912 
    913 void TextIterator::exitNode()
    914 {
    915     // prevent emitting a newline when exiting a collapsed block at beginning of the range
    916     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
    917     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
    918     // therefore look like a blank line.
    919     if (!m_hasEmitted)
    920         return;
    921 
    922     // Emit with a position *inside* m_node, after m_node's contents, in
    923     // case it is a block, because the run should start where the
    924     // emitted character is positioned visually.
    925     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
    926     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
    927     // the logic in _web_attributedStringFromRange match.  We'll get that for free when we switch to use
    928     // TextIterator in _web_attributedStringFromRange.
    929     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
    930     if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) {
    931         // use extra newline to represent margin bottom, as needed
    932         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
    933 
    934         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
    935         // contain a VisiblePosition when doing selection preservation.
    936         if (m_lastCharacter != '\n') {
    937             // insert a newline with a position following this block's contents.
    938             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    939             // remember whether to later add a newline for the current node
    940             ASSERT(!m_needsAnotherNewline);
    941             m_needsAnotherNewline = addNewline;
    942         } else if (addNewline)
    943             // insert a newline with a position following this block's contents.
    944             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
    945     }
    946 
    947     // If nothing was emitted, see if we need to emit a space.
    948     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
    949         emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
    950 }
    951 
    952 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
    953 {
    954     m_hasEmitted = true;
    955 
    956     // remember information with which to construct the TextIterator::range()
    957     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
    958     m_positionNode = textNode;
    959     m_positionOffsetBaseNode = offsetBaseNode;
    960     m_positionStartOffset = textStartOffset;
    961     m_positionEndOffset = textEndOffset;
    962 
    963     // remember information with which to construct the TextIterator::characters() and length()
    964     m_singleCharacterBuffer = c;
    965     m_textCharacters = &m_singleCharacterBuffer;
    966     m_textLength = 1;
    967 
    968     // remember some iteration state
    969     m_lastTextNodeEndedWithCollapsedSpace = false;
    970     m_lastCharacter = c;
    971 }
    972 
    973 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset)
    974 {
    975     RenderText* renderer = toRenderText(renderObject);
    976     m_text = m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text();
    977     ASSERT(m_text.characters());
    978 
    979     m_positionNode = textNode;
    980     m_positionOffsetBaseNode = 0;
    981     m_positionStartOffset = textStartOffset;
    982     m_positionEndOffset = textEndOffset;
    983     m_textCharacters = m_text.characters() + textStartOffset;
    984     m_textLength = textEndOffset - textStartOffset;
    985     m_lastCharacter = m_text[textEndOffset - 1];
    986 
    987     m_lastTextNodeEndedWithCollapsedSpace = false;
    988     m_hasEmitted = true;
    989 }
    990 
    991 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
    992 {
    993     emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset);
    994 }
    995 
    996 PassRefPtr<Range> TextIterator::range() const
    997 {
    998     // use the current run information, if we have it
    999     if (m_positionNode) {
   1000         if (m_positionOffsetBaseNode) {
   1001             int index = m_positionOffsetBaseNode->nodeIndex();
   1002             m_positionStartOffset += index;
   1003             m_positionEndOffset += index;
   1004             m_positionOffsetBaseNode = 0;
   1005         }
   1006         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1007     }
   1008 
   1009     // otherwise, return the end of the overall range we were given
   1010     if (m_endContainer)
   1011         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
   1012 
   1013     return 0;
   1014 }
   1015 
   1016 Node* TextIterator::node() const
   1017 {
   1018     RefPtr<Range> textRange = range();
   1019     if (!textRange)
   1020         return 0;
   1021 
   1022     Node* node = textRange->startContainer();
   1023     if (!node)
   1024         return 0;
   1025     if (node->offsetInCharacters())
   1026         return node;
   1027 
   1028     return node->childNode(textRange->startOffset());
   1029 }
   1030 
   1031 // --------
   1032 
   1033 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator()
   1034     : m_behavior(TextIteratorDefaultBehavior)
   1035     , m_node(0)
   1036     , m_positionNode(0)
   1037 {
   1038 }
   1039 
   1040 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior)
   1041     : m_behavior(behavior)
   1042     , m_node(0)
   1043     , m_positionNode(0)
   1044 {
   1045     ASSERT(m_behavior == TextIteratorDefaultBehavior);
   1046 
   1047     if (!r)
   1048         return;
   1049 
   1050     Node* startNode = r->startContainer();
   1051     if (!startNode)
   1052         return;
   1053     Node* endNode = r->endContainer();
   1054     int startOffset = r->startOffset();
   1055     int endOffset = r->endOffset();
   1056 
   1057     if (!startNode->offsetInCharacters()) {
   1058         if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
   1059             startNode = startNode->childNode(startOffset);
   1060             startOffset = 0;
   1061         }
   1062     }
   1063     if (!endNode->offsetInCharacters()) {
   1064         if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
   1065             endNode = endNode->childNode(endOffset - 1);
   1066             endOffset = lastOffsetInNode(endNode);
   1067         }
   1068     }
   1069 
   1070     m_node = endNode;
   1071     setUpFullyClippedStack(m_fullyClippedStack, m_node);
   1072     m_offset = endOffset;
   1073     m_handledNode = false;
   1074     m_handledChildren = endOffset == 0;
   1075 
   1076     m_startNode = startNode;
   1077     m_startOffset = startOffset;
   1078     m_endNode = endNode;
   1079     m_endOffset = endOffset;
   1080 
   1081 #ifndef NDEBUG
   1082     // Need this just because of the assert.
   1083     m_positionNode = endNode;
   1084 #endif
   1085 
   1086     m_lastTextNode = 0;
   1087     m_lastCharacter = '\n';
   1088 
   1089     m_havePassedStartNode = false;
   1090 
   1091     advance();
   1092 }
   1093 
   1094 void SimplifiedBackwardsTextIterator::advance()
   1095 {
   1096     ASSERT(m_positionNode);
   1097 
   1098     m_positionNode = 0;
   1099     m_textLength = 0;
   1100 
   1101     while (m_node && !m_havePassedStartNode) {
   1102         // Don't handle node if we start iterating at [node, 0].
   1103         if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) {
   1104             RenderObject* renderer = m_node->renderer();
   1105             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
   1106                 // FIXME: What about CDATA_SECTION_NODE?
   1107                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1108                     m_handledNode = handleTextNode();
   1109             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
   1110                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
   1111                     m_handledNode = handleReplacedElement();
   1112             } else
   1113                 m_handledNode = handleNonTextNode();
   1114             if (m_positionNode)
   1115                 return;
   1116         }
   1117 
   1118         if (!m_handledChildren && m_node->hasChildNodes()) {
   1119             m_node = m_node->lastChild();
   1120             pushFullyClippedState(m_fullyClippedStack, m_node);
   1121         } else {
   1122             // Exit empty containers as we pass over them or containers
   1123             // where [container, 0] is where we started iterating.
   1124             if (!m_handledNode
   1125                     && canHaveChildrenForEditing(m_node)
   1126                     && m_node->parentNode()
   1127                     && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
   1128                 exitNode();
   1129                 if (m_positionNode) {
   1130                     m_handledNode = true;
   1131                     m_handledChildren = true;
   1132                     return;
   1133                 }
   1134             }
   1135 
   1136             // Exit all other containers.
   1137             while (!m_node->previousSibling()) {
   1138                 if (!advanceRespectingRange(m_node->parentOrHostNode()))
   1139                     break;
   1140                 m_fullyClippedStack.pop();
   1141                 exitNode();
   1142                 if (m_positionNode) {
   1143                     m_handledNode = true;
   1144                     m_handledChildren = true;
   1145                     return;
   1146                 }
   1147             }
   1148 
   1149             m_fullyClippedStack.pop();
   1150             if (advanceRespectingRange(m_node->previousSibling()))
   1151                 pushFullyClippedState(m_fullyClippedStack, m_node);
   1152             else
   1153                 m_node = 0;
   1154         }
   1155 
   1156         // For the purpose of word boundary detection,
   1157         // we should iterate all visible text and trailing (collapsed) whitespaces.
   1158         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
   1159         m_handledNode = false;
   1160         m_handledChildren = false;
   1161 
   1162         if (m_positionNode)
   1163             return;
   1164     }
   1165 }
   1166 
   1167 bool SimplifiedBackwardsTextIterator::handleTextNode()
   1168 {
   1169     m_lastTextNode = m_node;
   1170 
   1171     RenderText* renderer = toRenderText(m_node->renderer());
   1172     String str = renderer->text();
   1173 
   1174     if (!renderer->firstTextBox() && str.length() > 0)
   1175         return true;
   1176 
   1177     m_positionEndOffset = m_offset;
   1178 
   1179     m_offset = (m_node == m_startNode) ? m_startOffset : 0;
   1180     m_positionNode = m_node;
   1181     m_positionStartOffset = m_offset;
   1182     m_textLength = m_positionEndOffset - m_positionStartOffset;
   1183     m_textCharacters = str.characters() + m_positionStartOffset;
   1184 
   1185     m_lastCharacter = str[m_positionEndOffset - 1];
   1186 
   1187     return true;
   1188 }
   1189 
   1190 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
   1191 {
   1192     unsigned index = m_node->nodeIndex();
   1193     // We want replaced elements to behave like punctuation for boundary
   1194     // finding, and to simply take up space for the selection preservation
   1195     // code in moveParagraphs, so we use a comma.  Unconditionally emit
   1196     // here because this iterator is only used for boundary finding.
   1197     emitCharacter(',', m_node->parentNode(), index, index + 1);
   1198     return true;
   1199 }
   1200 
   1201 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
   1202 {
   1203     // We can use a linefeed in place of a tab because this simple iterator is only used to
   1204     // find boundaries, not actual content.  A linefeed breaks words, sentences, and paragraphs.
   1205     if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
   1206         unsigned index = m_node->nodeIndex();
   1207         // The start of this emitted range is wrong. Ensuring correctness would require
   1208         // VisiblePositions and so would be slow. previousBoundary expects this.
   1209         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
   1210     }
   1211     return true;
   1212 }
   1213 
   1214 void SimplifiedBackwardsTextIterator::exitNode()
   1215 {
   1216     if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
   1217         // The start of this emitted range is wrong. Ensuring correctness would require
   1218         // VisiblePositions and so would be slow. previousBoundary expects this.
   1219         emitCharacter('\n', m_node, 0, 0);
   1220     }
   1221 }
   1222 
   1223 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
   1224 {
   1225     m_singleCharacterBuffer = c;
   1226     m_positionNode = node;
   1227     m_positionStartOffset = startOffset;
   1228     m_positionEndOffset = endOffset;
   1229     m_textCharacters = &m_singleCharacterBuffer;
   1230     m_textLength = 1;
   1231     m_lastCharacter = c;
   1232 }
   1233 
   1234 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
   1235 {
   1236     if (!next)
   1237         return false;
   1238     m_havePassedStartNode |= m_node == m_startNode;
   1239     if (m_havePassedStartNode)
   1240         return false;
   1241     m_node = next;
   1242     return true;
   1243 }
   1244 
   1245 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
   1246 {
   1247     if (m_positionNode)
   1248         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
   1249 
   1250     return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
   1251 }
   1252 
   1253 // --------
   1254 
   1255 CharacterIterator::CharacterIterator()
   1256     : m_offset(0)
   1257     , m_runOffset(0)
   1258     , m_atBreak(true)
   1259 {
   1260 }
   1261 
   1262 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior)
   1263     : m_offset(0)
   1264     , m_runOffset(0)
   1265     , m_atBreak(true)
   1266     , m_textIterator(r, behavior)
   1267 {
   1268     while (!atEnd() && m_textIterator.length() == 0)
   1269         m_textIterator.advance();
   1270 }
   1271 
   1272 PassRefPtr<Range> CharacterIterator::range() const
   1273 {
   1274     RefPtr<Range> r = m_textIterator.range();
   1275     if (!m_textIterator.atEnd()) {
   1276         if (m_textIterator.length() <= 1) {
   1277             ASSERT(m_runOffset == 0);
   1278         } else {
   1279             Node* n = r->startContainer();
   1280             ASSERT(n == r->endContainer());
   1281             int offset = r->startOffset() + m_runOffset;
   1282             ExceptionCode ec = 0;
   1283             r->setStart(n, offset, ec);
   1284             r->setEnd(n, offset + 1, ec);
   1285             ASSERT(!ec);
   1286         }
   1287     }
   1288     return r.release();
   1289 }
   1290 
   1291 void CharacterIterator::advance(int count)
   1292 {
   1293     if (count <= 0) {
   1294         ASSERT(count == 0);
   1295         return;
   1296     }
   1297 
   1298     m_atBreak = false;
   1299 
   1300     // easy if there is enough left in the current m_textIterator run
   1301     int remaining = m_textIterator.length() - m_runOffset;
   1302     if (count < remaining) {
   1303         m_runOffset += count;
   1304         m_offset += count;
   1305         return;
   1306     }
   1307 
   1308     // exhaust the current m_textIterator run
   1309     count -= remaining;
   1310     m_offset += remaining;
   1311 
   1312     // move to a subsequent m_textIterator run
   1313     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1314         int runLength = m_textIterator.length();
   1315         if (runLength == 0)
   1316             m_atBreak = true;
   1317         else {
   1318             // see whether this is m_textIterator to use
   1319             if (count < runLength) {
   1320                 m_runOffset = count;
   1321                 m_offset += count;
   1322                 return;
   1323             }
   1324 
   1325             // exhaust this m_textIterator run
   1326             count -= runLength;
   1327             m_offset += runLength;
   1328         }
   1329     }
   1330 
   1331     // ran to the end of the m_textIterator... no more runs left
   1332     m_atBreak = true;
   1333     m_runOffset = 0;
   1334 }
   1335 
   1336 String CharacterIterator::string(int numChars)
   1337 {
   1338     Vector<UChar> result;
   1339     result.reserveInitialCapacity(numChars);
   1340     while (numChars > 0 && !atEnd()) {
   1341         int runSize = min(numChars, length());
   1342         result.append(characters(), runSize);
   1343         numChars -= runSize;
   1344         advance(runSize);
   1345     }
   1346     return String::adopt(result);
   1347 }
   1348 
   1349 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
   1350 {
   1351     it.advance(offset);
   1352     RefPtr<Range> start = it.range();
   1353 
   1354     if (length > 1)
   1355         it.advance(length - 1);
   1356     RefPtr<Range> end = it.range();
   1357 
   1358     return Range::create(start->startContainer()->document(),
   1359         start->startContainer(), start->startOffset(),
   1360         end->endContainer(), end->endOffset());
   1361 }
   1362 
   1363 BackwardsCharacterIterator::BackwardsCharacterIterator()
   1364     : m_offset(0)
   1365     , m_runOffset(0)
   1366     , m_atBreak(true)
   1367 {
   1368 }
   1369 
   1370 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior)
   1371     : m_offset(0)
   1372     , m_runOffset(0)
   1373     , m_atBreak(true)
   1374     , m_textIterator(range, behavior)
   1375 {
   1376     while (!atEnd() && !m_textIterator.length())
   1377         m_textIterator.advance();
   1378 }
   1379 
   1380 PassRefPtr<Range> BackwardsCharacterIterator::range() const
   1381 {
   1382     RefPtr<Range> r = m_textIterator.range();
   1383     if (!m_textIterator.atEnd()) {
   1384         if (m_textIterator.length() <= 1)
   1385             ASSERT(m_runOffset == 0);
   1386         else {
   1387             Node* n = r->startContainer();
   1388             ASSERT(n == r->endContainer());
   1389             int offset = r->endOffset() - m_runOffset;
   1390             ExceptionCode ec = 0;
   1391             r->setStart(n, offset - 1, ec);
   1392             r->setEnd(n, offset, ec);
   1393             ASSERT(!ec);
   1394         }
   1395     }
   1396     return r.release();
   1397 }
   1398 
   1399 void BackwardsCharacterIterator::advance(int count)
   1400 {
   1401     if (count <= 0) {
   1402         ASSERT(!count);
   1403         return;
   1404     }
   1405 
   1406     m_atBreak = false;
   1407 
   1408     int remaining = m_textIterator.length() - m_runOffset;
   1409     if (count < remaining) {
   1410         m_runOffset += count;
   1411         m_offset += count;
   1412         return;
   1413     }
   1414 
   1415     count -= remaining;
   1416     m_offset += remaining;
   1417 
   1418     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
   1419         int runLength = m_textIterator.length();
   1420         if (runLength == 0)
   1421             m_atBreak = true;
   1422         else {
   1423             if (count < runLength) {
   1424                 m_runOffset = count;
   1425                 m_offset += count;
   1426                 return;
   1427             }
   1428 
   1429             count -= runLength;
   1430             m_offset += runLength;
   1431         }
   1432     }
   1433 
   1434     m_atBreak = true;
   1435     m_runOffset = 0;
   1436 }
   1437 
   1438 // --------
   1439 
   1440 WordAwareIterator::WordAwareIterator()
   1441     : m_previousText(0)
   1442     , m_didLookAhead(false)
   1443 {
   1444 }
   1445 
   1446 WordAwareIterator::WordAwareIterator(const Range* r)
   1447     : m_previousText(0)
   1448     , m_didLookAhead(true) // so we consider the first chunk from the text iterator
   1449     , m_textIterator(r)
   1450 {
   1451     advance(); // get in position over the first chunk of text
   1452 }
   1453 
   1454 WordAwareIterator::~WordAwareIterator()
   1455 {
   1456 }
   1457 
   1458 // We're always in one of these modes:
   1459 // - The current chunk in the text iterator is our current chunk
   1460 //      (typically its a piece of whitespace, or text that ended with whitespace)
   1461 // - The previous chunk in the text iterator is our current chunk
   1462 //      (we looked ahead to the next chunk and found a word boundary)
   1463 // - We built up our own chunk of text from many chunks from the text iterator
   1464 
   1465 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
   1466 
   1467 void WordAwareIterator::advance()
   1468 {
   1469     m_previousText = 0;
   1470     m_buffer.clear();      // toss any old buffer we built up
   1471 
   1472     // If last time we did a look-ahead, start with that looked-ahead chunk now
   1473     if (!m_didLookAhead) {
   1474         ASSERT(!m_textIterator.atEnd());
   1475         m_textIterator.advance();
   1476     }
   1477     m_didLookAhead = false;
   1478 
   1479     // Go to next non-empty chunk
   1480     while (!m_textIterator.atEnd() && m_textIterator.length() == 0)
   1481         m_textIterator.advance();
   1482     m_range = m_textIterator.range();
   1483 
   1484     if (m_textIterator.atEnd())
   1485         return;
   1486 
   1487     while (1) {
   1488         // If this chunk ends in whitespace we can just use it as our chunk.
   1489         if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1]))
   1490             return;
   1491 
   1492         // If this is the first chunk that failed, save it in previousText before look ahead
   1493         if (m_buffer.isEmpty()) {
   1494             m_previousText = m_textIterator.characters();
   1495             m_previousLength = m_textIterator.length();
   1496         }
   1497 
   1498         // Look ahead to next chunk.  If it is whitespace or a break, we can use the previous stuff
   1499         m_textIterator.advance();
   1500         if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) {
   1501             m_didLookAhead = true;
   1502             return;
   1503         }
   1504 
   1505         if (m_buffer.isEmpty()) {
   1506             // Start gobbling chunks until we get to a suitable stopping point
   1507             m_buffer.append(m_previousText, m_previousLength);
   1508             m_previousText = 0;
   1509         }
   1510         m_buffer.append(m_textIterator.characters(), m_textIterator.length());
   1511         int exception = 0;
   1512         m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception);
   1513     }
   1514 }
   1515 
   1516 int WordAwareIterator::length() const
   1517 {
   1518     if (!m_buffer.isEmpty())
   1519         return m_buffer.size();
   1520     if (m_previousText)
   1521         return m_previousLength;
   1522     return m_textIterator.length();
   1523 }
   1524 
   1525 const UChar* WordAwareIterator::characters() const
   1526 {
   1527     if (!m_buffer.isEmpty())
   1528         return m_buffer.data();
   1529     if (m_previousText)
   1530         return m_previousText;
   1531     return m_textIterator.characters();
   1532 }
   1533 
   1534 // --------
   1535 
   1536 static inline UChar foldQuoteMarkOrSoftHyphen(UChar c)
   1537 {
   1538     switch (c) {
   1539         case hebrewPunctuationGershayim:
   1540         case leftDoubleQuotationMark:
   1541         case rightDoubleQuotationMark:
   1542             return '"';
   1543         case hebrewPunctuationGeresh:
   1544         case leftSingleQuotationMark:
   1545         case rightSingleQuotationMark:
   1546             return '\'';
   1547         case softHyphen:
   1548             // Replace soft hyphen with an ignorable character so that their presence or absence will
   1549             // not affect string comparison.
   1550             return 0;
   1551         default:
   1552             return c;
   1553     }
   1554 }
   1555 
   1556 static inline void foldQuoteMarksAndSoftHyphens(String& s)
   1557 {
   1558     s.replace(hebrewPunctuationGeresh, '\'');
   1559     s.replace(hebrewPunctuationGershayim, '"');
   1560     s.replace(leftDoubleQuotationMark, '"');
   1561     s.replace(leftSingleQuotationMark, '\'');
   1562     s.replace(rightDoubleQuotationMark, '"');
   1563     s.replace(rightSingleQuotationMark, '\'');
   1564     // Replace soft hyphen with an ignorable character so that their presence or absence will
   1565     // not affect string comparison.
   1566     s.replace(softHyphen, 0);
   1567 }
   1568 
   1569 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
   1570 
   1571 static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length)
   1572 {
   1573     for (size_t i = 0; i < length; ++i)
   1574         data[i] = foldQuoteMarkOrSoftHyphen(data[i]);
   1575 }
   1576 
   1577 static const size_t minimumSearchBufferSize = 8192;
   1578 
   1579 #ifndef NDEBUG
   1580 static bool searcherInUse;
   1581 #endif
   1582 
   1583 static UStringSearch* createSearcher()
   1584 {
   1585     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
   1586     // but it doesn't matter exactly what it is, since we don't perform any searches
   1587     // without setting both the pattern and the text.
   1588     UErrorCode status = U_ZERO_ERROR;
   1589     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status);
   1590     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
   1591     return searcher;
   1592 }
   1593 
   1594 static UStringSearch* searcher()
   1595 {
   1596     static UStringSearch* searcher = createSearcher();
   1597     return searcher;
   1598 }
   1599 
   1600 static inline void lockSearcher()
   1601 {
   1602 #ifndef NDEBUG
   1603     ASSERT(!searcherInUse);
   1604     searcherInUse = true;
   1605 #endif
   1606 }
   1607 
   1608 static inline void unlockSearcher()
   1609 {
   1610 #ifndef NDEBUG
   1611     ASSERT(searcherInUse);
   1612     searcherInUse = false;
   1613 #endif
   1614 }
   1615 
   1616 // ICU's search ignores the distinction between small kana letters and ones
   1617 // that are not small, and also characters that differ only in the voicing
   1618 // marks when considering only primary collation strength diffrences.
   1619 // This is not helpful for end users, since these differences make words
   1620 // distinct, so for our purposes we need these to be considered.
   1621 // The Unicode folks do not think the collation algorithm should be
   1622 // changed. To work around this, we would like to tailor the ICU searcher,
   1623 // but we can't get that to work yet. So instead, we check for cases where
   1624 // these differences occur, and skip those matches.
   1625 
   1626 // We refer to the above technique as the "kana workaround". The next few
   1627 // functions are helper functinos for the kana workaround.
   1628 
   1629 static inline bool isKanaLetter(UChar character)
   1630 {
   1631     // Hiragana letters.
   1632     if (character >= 0x3041 && character <= 0x3096)
   1633         return true;
   1634 
   1635     // Katakana letters.
   1636     if (character >= 0x30A1 && character <= 0x30FA)
   1637         return true;
   1638     if (character >= 0x31F0 && character <= 0x31FF)
   1639         return true;
   1640 
   1641     // Halfwidth katakana letters.
   1642     if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70)
   1643         return true;
   1644 
   1645     return false;
   1646 }
   1647 
   1648 static inline bool isSmallKanaLetter(UChar character)
   1649 {
   1650     ASSERT(isKanaLetter(character));
   1651 
   1652     switch (character) {
   1653     case 0x3041: // HIRAGANA LETTER SMALL A
   1654     case 0x3043: // HIRAGANA LETTER SMALL I
   1655     case 0x3045: // HIRAGANA LETTER SMALL U
   1656     case 0x3047: // HIRAGANA LETTER SMALL E
   1657     case 0x3049: // HIRAGANA LETTER SMALL O
   1658     case 0x3063: // HIRAGANA LETTER SMALL TU
   1659     case 0x3083: // HIRAGANA LETTER SMALL YA
   1660     case 0x3085: // HIRAGANA LETTER SMALL YU
   1661     case 0x3087: // HIRAGANA LETTER SMALL YO
   1662     case 0x308E: // HIRAGANA LETTER SMALL WA
   1663     case 0x3095: // HIRAGANA LETTER SMALL KA
   1664     case 0x3096: // HIRAGANA LETTER SMALL KE
   1665     case 0x30A1: // KATAKANA LETTER SMALL A
   1666     case 0x30A3: // KATAKANA LETTER SMALL I
   1667     case 0x30A5: // KATAKANA LETTER SMALL U
   1668     case 0x30A7: // KATAKANA LETTER SMALL E
   1669     case 0x30A9: // KATAKANA LETTER SMALL O
   1670     case 0x30C3: // KATAKANA LETTER SMALL TU
   1671     case 0x30E3: // KATAKANA LETTER SMALL YA
   1672     case 0x30E5: // KATAKANA LETTER SMALL YU
   1673     case 0x30E7: // KATAKANA LETTER SMALL YO
   1674     case 0x30EE: // KATAKANA LETTER SMALL WA
   1675     case 0x30F5: // KATAKANA LETTER SMALL KA
   1676     case 0x30F6: // KATAKANA LETTER SMALL KE
   1677     case 0x31F0: // KATAKANA LETTER SMALL KU
   1678     case 0x31F1: // KATAKANA LETTER SMALL SI
   1679     case 0x31F2: // KATAKANA LETTER SMALL SU
   1680     case 0x31F3: // KATAKANA LETTER SMALL TO
   1681     case 0x31F4: // KATAKANA LETTER SMALL NU
   1682     case 0x31F5: // KATAKANA LETTER SMALL HA
   1683     case 0x31F6: // KATAKANA LETTER SMALL HI
   1684     case 0x31F7: // KATAKANA LETTER SMALL HU
   1685     case 0x31F8: // KATAKANA LETTER SMALL HE
   1686     case 0x31F9: // KATAKANA LETTER SMALL HO
   1687     case 0x31FA: // KATAKANA LETTER SMALL MU
   1688     case 0x31FB: // KATAKANA LETTER SMALL RA
   1689     case 0x31FC: // KATAKANA LETTER SMALL RI
   1690     case 0x31FD: // KATAKANA LETTER SMALL RU
   1691     case 0x31FE: // KATAKANA LETTER SMALL RE
   1692     case 0x31FF: // KATAKANA LETTER SMALL RO
   1693     case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A
   1694     case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I
   1695     case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U
   1696     case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E
   1697     case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O
   1698     case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA
   1699     case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU
   1700     case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO
   1701     case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU
   1702         return true;
   1703     }
   1704     return false;
   1705 }
   1706 
   1707 enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark };
   1708 
   1709 static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character)
   1710 {
   1711     ASSERT(isKanaLetter(character));
   1712 
   1713     switch (character) {
   1714     case 0x304C: // HIRAGANA LETTER GA
   1715     case 0x304E: // HIRAGANA LETTER GI
   1716     case 0x3050: // HIRAGANA LETTER GU
   1717     case 0x3052: // HIRAGANA LETTER GE
   1718     case 0x3054: // HIRAGANA LETTER GO
   1719     case 0x3056: // HIRAGANA LETTER ZA
   1720     case 0x3058: // HIRAGANA LETTER ZI
   1721     case 0x305A: // HIRAGANA LETTER ZU
   1722     case 0x305C: // HIRAGANA LETTER ZE
   1723     case 0x305E: // HIRAGANA LETTER ZO
   1724     case 0x3060: // HIRAGANA LETTER DA
   1725     case 0x3062: // HIRAGANA LETTER DI
   1726     case 0x3065: // HIRAGANA LETTER DU
   1727     case 0x3067: // HIRAGANA LETTER DE
   1728     case 0x3069: // HIRAGANA LETTER DO
   1729     case 0x3070: // HIRAGANA LETTER BA
   1730     case 0x3073: // HIRAGANA LETTER BI
   1731     case 0x3076: // HIRAGANA LETTER BU
   1732     case 0x3079: // HIRAGANA LETTER BE
   1733     case 0x307C: // HIRAGANA LETTER BO
   1734     case 0x3094: // HIRAGANA LETTER VU
   1735     case 0x30AC: // KATAKANA LETTER GA
   1736     case 0x30AE: // KATAKANA LETTER GI
   1737     case 0x30B0: // KATAKANA LETTER GU
   1738     case 0x30B2: // KATAKANA LETTER GE
   1739     case 0x30B4: // KATAKANA LETTER GO
   1740     case 0x30B6: // KATAKANA LETTER ZA
   1741     case 0x30B8: // KATAKANA LETTER ZI
   1742     case 0x30BA: // KATAKANA LETTER ZU
   1743     case 0x30BC: // KATAKANA LETTER ZE
   1744     case 0x30BE: // KATAKANA LETTER ZO
   1745     case 0x30C0: // KATAKANA LETTER DA
   1746     case 0x30C2: // KATAKANA LETTER DI
   1747     case 0x30C5: // KATAKANA LETTER DU
   1748     case 0x30C7: // KATAKANA LETTER DE
   1749     case 0x30C9: // KATAKANA LETTER DO
   1750     case 0x30D0: // KATAKANA LETTER BA
   1751     case 0x30D3: // KATAKANA LETTER BI
   1752     case 0x30D6: // KATAKANA LETTER BU
   1753     case 0x30D9: // KATAKANA LETTER BE
   1754     case 0x30DC: // KATAKANA LETTER BO
   1755     case 0x30F4: // KATAKANA LETTER VU
   1756     case 0x30F7: // KATAKANA LETTER VA
   1757     case 0x30F8: // KATAKANA LETTER VI
   1758     case 0x30F9: // KATAKANA LETTER VE
   1759     case 0x30FA: // KATAKANA LETTER VO
   1760         return VoicedSoundMark;
   1761     case 0x3071: // HIRAGANA LETTER PA
   1762     case 0x3074: // HIRAGANA LETTER PI
   1763     case 0x3077: // HIRAGANA LETTER PU
   1764     case 0x307A: // HIRAGANA LETTER PE
   1765     case 0x307D: // HIRAGANA LETTER PO
   1766     case 0x30D1: // KATAKANA LETTER PA
   1767     case 0x30D4: // KATAKANA LETTER PI
   1768     case 0x30D7: // KATAKANA LETTER PU
   1769     case 0x30DA: // KATAKANA LETTER PE
   1770     case 0x30DD: // KATAKANA LETTER PO
   1771         return SemiVoicedSoundMark;
   1772     }
   1773     return NoVoicedSoundMark;
   1774 }
   1775 
   1776 static inline bool isCombiningVoicedSoundMark(UChar character)
   1777 {
   1778     switch (character) {
   1779     case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
   1780     case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
   1781         return true;
   1782     }
   1783     return false;
   1784 }
   1785 
   1786 static inline bool containsKanaLetters(const String& pattern)
   1787 {
   1788     const UChar* characters = pattern.characters();
   1789     unsigned length = pattern.length();
   1790     for (unsigned i = 0; i < length; ++i) {
   1791         if (isKanaLetter(characters[i]))
   1792             return true;
   1793     }
   1794     return false;
   1795 }
   1796 
   1797 static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer)
   1798 {
   1799     ASSERT(length);
   1800 
   1801     buffer.resize(length);
   1802 
   1803     UErrorCode status = U_ZERO_ERROR;
   1804     size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
   1805     ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
   1806     ASSERT(bufferSize);
   1807 
   1808     buffer.resize(bufferSize);
   1809 
   1810     if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
   1811         return;
   1812 
   1813     status = U_ZERO_ERROR;
   1814     unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
   1815     ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
   1816 }
   1817 
   1818 static bool isNonLatin1Separator(UChar32 character)
   1819 {
   1820     ASSERT_ARG(character, character >= 256);
   1821 
   1822     return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
   1823 }
   1824 
   1825 static inline bool isSeparator(UChar32 character)
   1826 {
   1827     static const bool latin1SeparatorTable[256] = {
   1828         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1829         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1830         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
   1831         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, //                         : ; < = > ?
   1832         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   @
   1833         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, //                         [ \ ] ^ _
   1834         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   `
   1835         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //                           { | } ~
   1836         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1837         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1838         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
   1839         1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
   1840         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1841         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
   1842         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1843         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
   1844     };
   1845 
   1846     if (character < 256)
   1847         return latin1SeparatorTable[character];
   1848 
   1849     return isNonLatin1Separator(character);
   1850 }
   1851 
   1852 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
   1853     : m_target(target)
   1854     , m_options(options)
   1855     , m_prefixLength(0)
   1856     , m_atBreak(true)
   1857     , m_needsMoreContext(options & AtWordStarts)
   1858     , m_targetRequiresKanaWorkaround(containsKanaLetters(m_target))
   1859 {
   1860     ASSERT(!m_target.isEmpty());
   1861 
   1862     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
   1863     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
   1864     // to add tailoring on top of the locale-specific tailoring as of this writing.
   1865     foldQuoteMarksAndSoftHyphens(m_target);
   1866 
   1867     size_t targetLength = m_target.length();
   1868     m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
   1869     m_overlap = m_buffer.capacity() / 4;
   1870 
   1871     if ((m_options & AtWordStarts) && targetLength) {
   1872         UChar32 targetFirstCharacter;
   1873         U16_GET(m_target.characters(), 0, 0, targetLength, targetFirstCharacter);
   1874         // Characters in the separator category never really occur at the beginning of a word,
   1875         // so if the target begins with such a character, we just ignore the AtWordStart option.
   1876         if (isSeparator(targetFirstCharacter)) {
   1877             m_options &= ~AtWordStarts;
   1878             m_needsMoreContext = false;
   1879         }
   1880     }
   1881 
   1882     // Grab the single global searcher.
   1883     // If we ever have a reason to do more than once search buffer at once, we'll have
   1884     // to move to multiple searchers.
   1885     lockSearcher();
   1886 
   1887     UStringSearch* searcher = WebCore::searcher();
   1888     UCollator* collator = usearch_getCollator(searcher);
   1889 
   1890     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
   1891     if (ucol_getStrength(collator) != strength) {
   1892         ucol_setStrength(collator, strength);
   1893         usearch_reset(searcher);
   1894     }
   1895 
   1896     UErrorCode status = U_ZERO_ERROR;
   1897     usearch_setPattern(searcher, m_target.characters(), targetLength, &status);
   1898     ASSERT(status == U_ZERO_ERROR);
   1899 
   1900     // The kana workaround requires a normalized copy of the target string.
   1901     if (m_targetRequiresKanaWorkaround)
   1902         normalizeCharacters(m_target.characters(), m_target.length(), m_normalizedTarget);
   1903 }
   1904 
   1905 inline SearchBuffer::~SearchBuffer()
   1906 {
   1907     // Leave the static object pointing to a valid string.
   1908     UErrorCode status = U_ZERO_ERROR;
   1909     usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
   1910     ASSERT(status == U_ZERO_ERROR);
   1911 
   1912     unlockSearcher();
   1913 }
   1914 
   1915 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
   1916 {
   1917     ASSERT(length);
   1918 
   1919     if (m_atBreak) {
   1920         m_buffer.shrink(0);
   1921         m_prefixLength = 0;
   1922         m_atBreak = false;
   1923     } else if (m_buffer.size() == m_buffer.capacity()) {
   1924         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
   1925         m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap);
   1926         m_buffer.shrink(m_overlap);
   1927     }
   1928 
   1929     size_t oldLength = m_buffer.size();
   1930     size_t usableLength = min(m_buffer.capacity() - oldLength, length);
   1931     ASSERT(usableLength);
   1932     m_buffer.append(characters, usableLength);
   1933     foldQuoteMarksAndSoftHyphens(m_buffer.data() + oldLength, usableLength);
   1934     return usableLength;
   1935 }
   1936 
   1937 inline bool SearchBuffer::needsMoreContext() const
   1938 {
   1939     return m_needsMoreContext;
   1940 }
   1941 
   1942 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
   1943 {
   1944     ASSERT(m_needsMoreContext);
   1945     ASSERT(m_prefixLength == m_buffer.size());
   1946 
   1947     if (!length)
   1948         return;
   1949 
   1950     m_atBreak = false;
   1951 
   1952     size_t wordBoundaryContextStart = length;
   1953     if (wordBoundaryContextStart) {
   1954         U16_BACK_1(characters, 0, wordBoundaryContextStart);
   1955         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
   1956     }
   1957 
   1958     size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
   1959     m_buffer.prepend(characters + length - usableLength, usableLength);
   1960     m_prefixLength += usableLength;
   1961 
   1962     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
   1963         m_needsMoreContext = false;
   1964 }
   1965 
   1966 inline bool SearchBuffer::atBreak() const
   1967 {
   1968     return m_atBreak;
   1969 }
   1970 
   1971 inline void SearchBuffer::reachedBreak()
   1972 {
   1973     m_atBreak = true;
   1974 }
   1975 
   1976 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
   1977 {
   1978     // This function implements the kana workaround. If usearch treats
   1979     // it as a match, but we do not want to, then it's a "bad match".
   1980     if (!m_targetRequiresKanaWorkaround)
   1981         return false;
   1982 
   1983     // Normalize into a match buffer. We reuse a single buffer rather than
   1984     // creating a new one each time.
   1985     normalizeCharacters(match, matchLength, m_normalizedMatch);
   1986 
   1987     const UChar* a = m_normalizedTarget.begin();
   1988     const UChar* aEnd = m_normalizedTarget.end();
   1989 
   1990     const UChar* b = m_normalizedMatch.begin();
   1991     const UChar* bEnd = m_normalizedMatch.end();
   1992 
   1993     while (true) {
   1994         // Skip runs of non-kana-letter characters. This is necessary so we can
   1995         // correctly handle strings where the target and match have different-length
   1996         // runs of characters that match, while still double checking the correctness
   1997         // of matches of kana letters with other kana letters.
   1998         while (a != aEnd && !isKanaLetter(*a))
   1999             ++a;
   2000         while (b != bEnd && !isKanaLetter(*b))
   2001             ++b;
   2002 
   2003         // If we reached the end of either the target or the match, we should have
   2004         // reached the end of both; both should have the same number of kana letters.
   2005         if (a == aEnd || b == bEnd) {
   2006             ASSERT(a == aEnd);
   2007             ASSERT(b == bEnd);
   2008             return false;
   2009         }
   2010 
   2011         // Check for differences in the kana letter character itself.
   2012         if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b))
   2013             return true;
   2014         if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b))
   2015             return true;
   2016         ++a;
   2017         ++b;
   2018 
   2019         // Check for differences in combining voiced sound marks found after the letter.
   2020         while (1) {
   2021             if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) {
   2022                 if (b != bEnd && isCombiningVoicedSoundMark(*b))
   2023                     return true;
   2024                 break;
   2025             }
   2026             if (!(b != bEnd && isCombiningVoicedSoundMark(*b)))
   2027                 return true;
   2028             if (*a != *b)
   2029                 return true;
   2030             ++a;
   2031             ++b;
   2032         }
   2033     }
   2034 }
   2035 
   2036 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
   2037 {
   2038     ASSERT(m_options & AtWordStarts);
   2039 
   2040     if (!start)
   2041         return true;
   2042 
   2043     if (m_options & TreatMedialCapitalAsWordStart) {
   2044         int size = m_buffer.size();
   2045         int offset = start;
   2046         UChar32 firstCharacter;
   2047         U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
   2048         UChar32 previousCharacter;
   2049         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
   2050 
   2051         if (isSeparator(firstCharacter)) {
   2052             // The start of a separator run is a word start (".org" in "webkit.org").
   2053             if (!isSeparator(previousCharacter))
   2054                 return true;
   2055         } else if (isASCIIUpper(firstCharacter)) {
   2056             // The start of an uppercase run is a word start ("Kit" in "WebKit").
   2057             if (!isASCIIUpper(previousCharacter))
   2058                 return true;
   2059             // The last character of an uppercase run followed by a non-separator, non-digit
   2060             // is a word start ("Request" in "XMLHTTPRequest").
   2061             offset = start;
   2062             U16_FWD_1(m_buffer.data(), offset, size);
   2063             UChar32 nextCharacter = 0;
   2064             if (offset < size)
   2065                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
   2066             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
   2067                 return true;
   2068         } else if (isASCIIDigit(firstCharacter)) {
   2069             // The start of a digit run is a word start ("2" in "WebKit2").
   2070             if (!isASCIIDigit(previousCharacter))
   2071                 return true;
   2072         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
   2073             // The start of a non-separator, non-uppercase, non-digit run is a word start,
   2074             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
   2075             return true;
   2076         }
   2077     }
   2078 
   2079     size_t wordBreakSearchStart = start + length;
   2080     while (wordBreakSearchStart > start)
   2081         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
   2082     return wordBreakSearchStart == start;
   2083 }
   2084 
   2085 inline size_t SearchBuffer::search(size_t& start)
   2086 {
   2087     size_t size = m_buffer.size();
   2088     if (m_atBreak) {
   2089         if (!size)
   2090             return 0;
   2091     } else {
   2092         if (size != m_buffer.capacity())
   2093             return 0;
   2094     }
   2095 
   2096     UStringSearch* searcher = WebCore::searcher();
   2097 
   2098     UErrorCode status = U_ZERO_ERROR;
   2099     usearch_setText(searcher, m_buffer.data(), size, &status);
   2100     ASSERT(status == U_ZERO_ERROR);
   2101 
   2102     usearch_setOffset(searcher, m_prefixLength, &status);
   2103     ASSERT(status == U_ZERO_ERROR);
   2104 
   2105     int matchStart = usearch_next(searcher, &status);
   2106     ASSERT(status == U_ZERO_ERROR);
   2107 
   2108 nextMatch:
   2109     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
   2110         ASSERT(matchStart == USEARCH_DONE);
   2111         return 0;
   2112     }
   2113 
   2114     // Matches that start in the overlap area are only tentative.
   2115     // The same match may appear later, matching more characters,
   2116     // possibly including a combining character that's not yet in the buffer.
   2117     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
   2118         size_t overlap = m_overlap;
   2119         if (m_options & AtWordStarts) {
   2120             // Ensure that there is sufficient context before matchStart the next time around for
   2121             // determining if it is at a word boundary.
   2122             int wordBoundaryContextStart = matchStart;
   2123             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
   2124             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
   2125             overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart));
   2126         }
   2127         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
   2128         m_prefixLength -= min(m_prefixLength, size - overlap);
   2129         m_buffer.shrink(overlap);
   2130         return 0;
   2131     }
   2132 
   2133     size_t matchedLength = usearch_getMatchedLength(searcher);
   2134     ASSERT(matchStart + matchedLength <= size);
   2135 
   2136     // If this match is "bad", move on to the next match.
   2137     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
   2138         matchStart = usearch_next(searcher, &status);
   2139         ASSERT(status == U_ZERO_ERROR);
   2140         goto nextMatch;
   2141     }
   2142 
   2143     size_t newSize = size - (matchStart + 1);
   2144     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
   2145     m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1);
   2146     m_buffer.shrink(newSize);
   2147 
   2148     start = size - matchStart;
   2149     return matchedLength;
   2150 }
   2151 
   2152 #else // !ICU_UNICODE
   2153 
   2154 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
   2155     : m_target(options & CaseInsensitive ? target.foldCase() : target)
   2156     , m_options(options)
   2157     , m_buffer(m_target.length())
   2158     , m_isCharacterStartBuffer(m_target.length())
   2159     , m_isBufferFull(false)
   2160     , m_cursor(0)
   2161 {
   2162     ASSERT(!m_target.isEmpty());
   2163     m_target.replace(noBreakSpace, ' ');
   2164     foldQuoteMarksAndSoftHyphens(m_target);
   2165 }
   2166 
   2167 inline SearchBuffer::~SearchBuffer()
   2168 {
   2169 }
   2170 
   2171 inline void SearchBuffer::reachedBreak()
   2172 {
   2173     m_cursor = 0;
   2174     m_isBufferFull = false;
   2175 }
   2176 
   2177 inline bool SearchBuffer::atBreak() const
   2178 {
   2179     return !m_cursor && !m_isBufferFull;
   2180 }
   2181 
   2182 inline void SearchBuffer::append(UChar c, bool isStart)
   2183 {
   2184     m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMarkOrSoftHyphen(c);
   2185     m_isCharacterStartBuffer[m_cursor] = isStart;
   2186     if (++m_cursor == m_target.length()) {
   2187         m_cursor = 0;
   2188         m_isBufferFull = true;
   2189     }
   2190 }
   2191 
   2192 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
   2193 {
   2194     ASSERT(length);
   2195     if (!(m_options & CaseInsensitive)) {
   2196         append(characters[0], true);
   2197         return 1;
   2198     }
   2199     const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough
   2200     UChar foldedCharacters[maxFoldedCharacters];
   2201     bool error;
   2202     int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error);
   2203     ASSERT(!error);
   2204     ASSERT(numFoldedCharacters);
   2205     ASSERT(numFoldedCharacters <= maxFoldedCharacters);
   2206     if (!error && numFoldedCharacters) {
   2207         numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters);
   2208         append(foldedCharacters[0], true);
   2209         for (int i = 1; i < numFoldedCharacters; ++i)
   2210             append(foldedCharacters[i], false);
   2211     }
   2212     return 1;
   2213 }
   2214 
   2215 inline bool SearchBuffer::needsMoreContext() const
   2216 {
   2217     return false;
   2218 }
   2219 
   2220 void SearchBuffer::prependContext(const UChar*, size_t)
   2221 {
   2222     ASSERT_NOT_REACHED();
   2223 }
   2224 
   2225 inline size_t SearchBuffer::search(size_t& start)
   2226 {
   2227     if (!m_isBufferFull)
   2228         return 0;
   2229     if (!m_isCharacterStartBuffer[m_cursor])
   2230         return 0;
   2231 
   2232     size_t tailSpace = m_target.length() - m_cursor;
   2233     if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)
   2234         return 0;
   2235     if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)
   2236         return 0;
   2237 
   2238     start = length();
   2239 
   2240     // Now that we've found a match once, we don't want to find it again, because those
   2241     // are the SearchBuffer semantics, allowing for a buffer where you append more than one
   2242     // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if
   2243     // we want to get rid of that in the future we could track this with a separate boolean
   2244     // or even move the characters to the start of the buffer and set m_isBufferFull to false.
   2245     m_isCharacterStartBuffer[m_cursor] = false;
   2246 
   2247     return start;
   2248 }
   2249 
   2250 // Returns the number of characters that were appended to the buffer (what we are searching in).
   2251 // That's not necessarily the same length as the passed-in target string, because case folding
   2252 // can make two strings match even though they're not the same length.
   2253 size_t SearchBuffer::length() const
   2254 {
   2255     size_t bufferSize = m_target.length();
   2256     size_t length = 0;
   2257     for (size_t i = 0; i < bufferSize; ++i)
   2258         length += m_isCharacterStartBuffer[i];
   2259     return length;
   2260 }
   2261 
   2262 #endif // !ICU_UNICODE
   2263 
   2264 // --------
   2265 
   2266 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
   2267 {
   2268     int length = 0;
   2269     for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance())
   2270         length += it.length();
   2271 
   2272     return length;
   2273 }
   2274 
   2275 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
   2276 {
   2277     CharacterIterator entireRangeIterator(entireRange);
   2278     return characterSubrange(entireRangeIterator, characterOffset, characterCount);
   2279 }
   2280 
   2281 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
   2282 {
   2283     RefPtr<Range> resultRange = scope->document()->createRange();
   2284 
   2285     int docTextPosition = 0;
   2286     int rangeEnd = rangeLocation + rangeLength;
   2287     bool startRangeFound = false;
   2288 
   2289     RefPtr<Range> textRunRange;
   2290 
   2291     TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior);
   2292 
   2293     // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
   2294     if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
   2295         textRunRange = it.range();
   2296 
   2297         ExceptionCode ec = 0;
   2298         resultRange->setStart(textRunRange->startContainer(), 0, ec);
   2299         ASSERT(!ec);
   2300         resultRange->setEnd(textRunRange->startContainer(), 0, ec);
   2301         ASSERT(!ec);
   2302 
   2303         return resultRange.release();
   2304     }
   2305 
   2306     for (; !it.atEnd(); it.advance()) {
   2307         int len = it.length();
   2308         textRunRange = it.range();
   2309 
   2310         bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;
   2311         bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;
   2312 
   2313         // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only
   2314         // in those cases that textRunRange is used.
   2315         if (foundEnd) {
   2316             // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
   2317             // position for emitted '\n's.
   2318             if (len == 1 && it.characters()[0] == '\n') {
   2319                 scope->document()->updateLayoutIgnorePendingStylesheets();
   2320                 it.advance();
   2321                 if (!it.atEnd()) {
   2322                     RefPtr<Range> range = it.range();
   2323                     ExceptionCode ec = 0;
   2324                     textRunRange->setEnd(range->startContainer(), range->startOffset(), ec);
   2325                     ASSERT(!ec);
   2326                 } else {
   2327                     Position runStart = textRunRange->startPosition();
   2328                     Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
   2329                     if (runEnd.isNotNull()) {
   2330                         ExceptionCode ec = 0;
   2331                         textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ec);
   2332                         ASSERT(!ec);
   2333                     }
   2334                 }
   2335             }
   2336         }
   2337 
   2338         if (foundStart) {
   2339             startRangeFound = true;
   2340             int exception = 0;
   2341             if (textRunRange->startContainer()->isTextNode()) {
   2342                 int offset = rangeLocation - docTextPosition;
   2343                 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
   2344             } else {
   2345                 if (rangeLocation == docTextPosition)
   2346                     resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception);
   2347                 else
   2348                     resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception);
   2349             }
   2350         }
   2351 
   2352         if (foundEnd) {
   2353             int exception = 0;
   2354             if (textRunRange->startContainer()->isTextNode()) {
   2355                 int offset = rangeEnd - docTextPosition;
   2356                 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
   2357             } else {
   2358                 if (rangeEnd == docTextPosition)
   2359                     resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception);
   2360                 else
   2361                     resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
   2362             }
   2363             docTextPosition += len;
   2364             break;
   2365         }
   2366         docTextPosition += len;
   2367     }
   2368 
   2369     if (!startRangeFound)
   2370         return 0;
   2371 
   2372     if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
   2373         int exception = 0;
   2374         resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
   2375     }
   2376 
   2377     return resultRange.release();
   2378 }
   2379 
   2380 bool TextIterator::locationAndLengthFromRange(const Range* range, size_t& location, size_t& length)
   2381 {
   2382     location = notFound;
   2383     length = 0;
   2384 
   2385     if (!range->startContainer())
   2386         return false;
   2387 
   2388     Element* selectionRoot = range->ownerDocument()->frame()->selection()->rootEditableElement();
   2389     Element* scope = selectionRoot ? selectionRoot : range->ownerDocument()->documentElement();
   2390 
   2391     // The critical assumption is that this only gets called with ranges that
   2392     // concentrate on a given area containing the selection root. This is done
   2393     // because of text fields and textareas. The DOM for those is not
   2394     // directly in the document DOM, so ensure that the range does not cross a
   2395     // boundary of one of those.
   2396     if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope))
   2397         return false;
   2398     if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope))
   2399         return false;
   2400 
   2401     RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset());
   2402     ASSERT(testRange->startContainer() == scope);
   2403     location = TextIterator::rangeLength(testRange.get());
   2404 
   2405     ExceptionCode ec;
   2406     testRange->setEnd(range->endContainer(), range->endOffset(), ec);
   2407     ASSERT(testRange->startContainer() == scope);
   2408     length = TextIterator::rangeLength(testRange.get()) - location;
   2409     return true;
   2410 }
   2411 
   2412 // --------
   2413 
   2414 UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior defaultBehavior)
   2415 {
   2416     UChar* result = 0;
   2417 
   2418     // Do this in pieces to avoid massive reallocations if there is a large amount of text.
   2419     // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS.
   2420     static const unsigned cMaxSegmentSize = 1 << 16;
   2421     bufferLength = 0;
   2422     typedef pair<UChar*, unsigned> TextSegment;
   2423     OwnPtr<Vector<TextSegment> > textSegments;
   2424     Vector<UChar> textBuffer;
   2425     textBuffer.reserveInitialCapacity(cMaxSegmentSize);
   2426     TextIteratorBehavior behavior = defaultBehavior;
   2427     if (!isDisplayString)
   2428         behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding);
   2429 
   2430     for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
   2431         if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) {
   2432             UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar)));
   2433             if (!newSegmentBuffer)
   2434                 goto exit;
   2435             memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar));
   2436             if (!textSegments)
   2437                 textSegments = adoptPtr(new Vector<TextSegment>);
   2438             textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size()));
   2439             textBuffer.clear();
   2440         }
   2441         textBuffer.append(it.characters(), it.length());
   2442         bufferLength += it.length();
   2443     }
   2444 
   2445     if (!bufferLength)
   2446         return 0;
   2447 
   2448     // Since we know the size now, we can make a single buffer out of the pieces with one big alloc
   2449     result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar)));
   2450     if (!result)
   2451         goto exit;
   2452 
   2453     {
   2454         UChar* resultPos = result;
   2455         if (textSegments) {
   2456             unsigned size = textSegments->size();
   2457             for (unsigned i = 0; i < size; ++i) {
   2458                 const TextSegment& segment = textSegments->at(i);
   2459                 memcpy(resultPos, segment.first, segment.second * sizeof(UChar));
   2460                 resultPos += segment.second;
   2461             }
   2462         }
   2463         memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar));
   2464     }
   2465 
   2466 exit:
   2467     if (textSegments) {
   2468         unsigned size = textSegments->size();
   2469         for (unsigned i = 0; i < size; ++i)
   2470             free(textSegments->at(i).first);
   2471     }
   2472 
   2473     if (isDisplayString && r->ownerDocument())
   2474         r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength);
   2475 
   2476     return result;
   2477 }
   2478 
   2479 String plainText(const Range* r, TextIteratorBehavior defaultBehavior)
   2480 {
   2481     unsigned length;
   2482     UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false, defaultBehavior);
   2483     if (!buf)
   2484         return "";
   2485     String result(buf, length);
   2486     free(buf);
   2487     return result;
   2488 }
   2489 
   2490 static inline bool isAllCollapsibleWhitespace(const String& string)
   2491 {
   2492     const UChar* characters = string.characters();
   2493     unsigned length = string.length();
   2494     for (unsigned i = 0; i < length; ++i) {
   2495         if (!isCollapsibleWhitespace(characters[i]))
   2496             return false;
   2497     }
   2498     return true;
   2499 }
   2500 
   2501 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
   2502 {
   2503     ExceptionCode ec = 0;
   2504     RefPtr<Range> result = range->cloneRange(ec);
   2505     ASSERT(!ec);
   2506     result->collapse(!forward, ec);
   2507     ASSERT(!ec);
   2508     return result.release();
   2509 }
   2510 
   2511 static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
   2512 {
   2513     matchStart = 0;
   2514     size_t matchLength = 0;
   2515 
   2516     SearchBuffer buffer(target, options);
   2517 
   2518     if (buffer.needsMoreContext()) {
   2519         RefPtr<Range> startRange = it.range();
   2520         RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange();
   2521         ExceptionCode ec = 0;
   2522         beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), ec);
   2523         for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
   2524             buffer.prependContext(backwardsIterator.characters(), backwardsIterator.length());
   2525             if (!buffer.needsMoreContext())
   2526                 break;
   2527         }
   2528     }
   2529 
   2530     while (!it.atEnd()) {
   2531         it.advance(buffer.append(it.characters(), it.length()));
   2532 tryAgain:
   2533         size_t matchStartOffset;
   2534         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
   2535             // Note that we found a match, and where we found it.
   2536             size_t lastCharacterInBufferOffset = it.characterOffset();
   2537             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
   2538             matchStart = lastCharacterInBufferOffset - matchStartOffset;
   2539             matchLength = newMatchLength;
   2540             // If searching forward, stop on the first match.
   2541             // If searching backward, don't stop, so we end up with the last match.
   2542             if (!(options & Backwards))
   2543                 break;
   2544             goto tryAgain;
   2545         }
   2546         if (it.atBreak() && !buffer.atBreak()) {
   2547             buffer.reachedBreak();
   2548             goto tryAgain;
   2549         }
   2550     }
   2551 
   2552     return matchLength;
   2553 }
   2554 
   2555 PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
   2556 {
   2557     // First, find the text.
   2558     size_t matchStart;
   2559     size_t matchLength;
   2560     {
   2561         CharacterIterator findIterator(range, TextIteratorEntersTextControls);
   2562         matchLength = findPlainText(findIterator, target, options, matchStart);
   2563         if (!matchLength)
   2564             return collapsedToBoundary(range, !(options & Backwards));
   2565     }
   2566 
   2567     // Then, find the document position of the start and the end of the text.
   2568     CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls);
   2569     return characterSubrange(computeRangeIterator, matchStart, matchLength);
   2570 }
   2571 
   2572 }
   2573