Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef TextIterator_h
     27 #define TextIterator_h
     28 
     29 #include "core/dom/Range.h"
     30 #include "core/editing/FindOptions.h"
     31 #include "platform/heap/Handle.h"
     32 #include "wtf/Vector.h"
     33 
     34 namespace WebCore {
     35 
     36 class InlineTextBox;
     37 class RenderText;
     38 class RenderTextFragment;
     39 
     40 enum TextIteratorBehavior {
     41     TextIteratorDefaultBehavior = 0,
     42     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
     43     TextIteratorEntersTextControls = 1 << 1,
     44     TextIteratorIgnoresStyleVisibility = 1 << 2,
     45     TextIteratorEmitsOriginalText = 1 << 3,
     46     TextIteratorStopsOnFormControls = 1 << 4,
     47     TextIteratorEmitsImageAltText = 1 << 5,
     48     TextIteratorEntersAuthorShadowRoots = 1 << 6,
     49     TextIteratorEmitsObjectReplacementCharacter = 1 << 7
     50 };
     51 typedef unsigned TextIteratorBehaviorFlags;
     52 
     53 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     54 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range*, const String&, FindOptions);
     55 void findPlainText(const Position& inputStart, const Position& inputEnd, const String&, FindOptions, Position& resultStart, Position& resultEnd);
     56 
     57 class BitStack {
     58 public:
     59     BitStack();
     60     ~BitStack();
     61 
     62     void push(bool);
     63     void pop();
     64 
     65     bool top() const;
     66     unsigned size() const;
     67 
     68 private:
     69     unsigned m_size;
     70     Vector<unsigned, 1> m_words;
     71 };
     72 
     73 // Iterates through the DOM range, returning all the text, and 0-length boundaries
     74 // at points where replaced elements break up the text flow.  The text comes back in
     75 // chunks so as to optimize for performance of the iteration.
     76 
     77 class TextIterator {
     78     STACK_ALLOCATED();
     79 public:
     80     explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     81     // [start, end] indicates the document range that the iteration should take place within (both ends inclusive).
     82     TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     83     ~TextIterator();
     84 
     85     bool atEnd() const { return !m_positionNode || m_shouldStop; }
     86     void advance();
     87 
     88     int length() const { return m_textLength; }
     89     UChar characterAt(unsigned index) const;
     90     String substring(unsigned position, unsigned length) const;
     91     void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
     92 
     93     template<typename BufferType>
     94     void appendTextTo(BufferType& output, unsigned position = 0)
     95     {
     96         ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
     97         unsigned lengthToAppend = length() - position;
     98         if (!lengthToAppend)
     99             return;
    100         if (m_singleCharacterBuffer) {
    101             ASSERT(!position);
    102             ASSERT(length() == 1);
    103             output.append(&m_singleCharacterBuffer, 1);
    104         } else {
    105             string().appendTo(output, startOffset() + position, lengthToAppend);
    106         }
    107     }
    108 
    109     PassRefPtrWillBeRawPtr<Range> range() const;
    110     Node* node() const;
    111 
    112     // Computes the length of the given range using a text iterator. The default
    113     // iteration behavior is to always emit object replacement characters for
    114     // replaced elements. When |forSelectionPreservation| is set to true, it
    115     // also emits spaces for other non-text nodes using the
    116     // |TextIteratorEmitsCharactersBetweenAllVisiblePosition| mode.
    117     static int rangeLength(const Range*, bool forSelectionPreservation = false);
    118     static PassRefPtrWillBeRawPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
    119 
    120 private:
    121     enum IterationProgress {
    122         HandledNone,
    123         HandledAuthorShadowRoots,
    124         HandledUserAgentShadowRoot,
    125         HandledNode,
    126         HandledChildren
    127     };
    128 
    129     void initialize(const Position& start, const Position& end);
    130 
    131     int startOffset() const { return m_positionStartOffset; }
    132     const String& string() const { return m_text; }
    133     void exitNode();
    134     bool shouldRepresentNodeOffsetZero();
    135     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
    136     void representNodeOffsetZero();
    137     bool handleTextNode();
    138     bool handleReplacedElement();
    139     bool handleNonTextNode();
    140     void handleTextBox();
    141     void handleTextNodeFirstLetter(RenderTextFragment*);
    142     bool hasVisibleTextNode(RenderText*);
    143     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
    144     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
    145     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
    146 
    147     // Current position, not necessarily of the text being returned, but position
    148     // as we walk through the DOM tree.
    149     RawPtrWillBeMember<Node> m_node;
    150     int m_offset;
    151     IterationProgress m_iterationProgress;
    152     BitStack m_fullyClippedStack;
    153     int m_shadowDepth;
    154 
    155     // The range.
    156     RawPtrWillBeMember<Node> m_startContainer;
    157     int m_startOffset;
    158     RawPtrWillBeMember<Node> m_endContainer;
    159     int m_endOffset;
    160     RawPtrWillBeMember<Node> m_pastEndNode;
    161 
    162     // The current text and its position, in the form to be returned from the iterator.
    163     RawPtrWillBeMember<Node> m_positionNode;
    164     mutable RawPtrWillBeMember<Node> m_positionOffsetBaseNode;
    165     mutable int m_positionStartOffset;
    166     mutable int m_positionEndOffset;
    167     int m_textLength;
    168     String m_text;
    169 
    170     // Used when there is still some pending text from the current node; when these
    171     // are false and 0, we go back to normal iterating.
    172     bool m_needsAnotherNewline;
    173     InlineTextBox* m_textBox;
    174     // Used when iteration over :first-letter text to save pointer to
    175     // remaining text box.
    176     InlineTextBox* m_remainingTextBox;
    177     // Used to point to RenderText object for :first-letter.
    178     RenderText *m_firstLetterText;
    179 
    180     // Used to do the whitespace collapsing logic.
    181     RawPtrWillBeMember<Node> m_lastTextNode;
    182     bool m_lastTextNodeEndedWithCollapsedSpace;
    183     UChar m_lastCharacter;
    184 
    185     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    186     // If non-zero, overrides m_text.
    187     UChar m_singleCharacterBuffer;
    188 
    189     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    190     Vector<InlineTextBox*> m_sortedTextBoxes;
    191     size_t m_sortedTextBoxesPosition;
    192 
    193     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
    194     bool m_hasEmitted;
    195 
    196     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    197     // in the Range used to create the TextIterator.
    198     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
    199     // moveParagraphs to not clone/destroy moved content.
    200     bool m_emitsCharactersBetweenAllVisiblePositions;
    201     bool m_entersTextControls;
    202 
    203     // Used in pasting inside password field.
    204     bool m_emitsOriginalText;
    205     // Used when deciding text fragment created by :first-letter should be looked into.
    206     bool m_handledFirstLetter;
    207     // Used when the visibility of the style should not affect text gathering.
    208     bool m_ignoresStyleVisibility;
    209     // Used when the iteration should stop if form controls are reached.
    210     bool m_stopsOnFormControls;
    211     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    212     bool m_shouldStop;
    213 
    214     bool m_emitsImageAltText;
    215 
    216     bool m_entersAuthorShadowRoots;
    217 
    218     bool m_emitsObjectReplacementCharacter;
    219 };
    220 
    221 // Iterates through the DOM range, returning all the text, and 0-length boundaries
    222 // at points where replaced elements break up the text flow. The text comes back in
    223 // chunks so as to optimize for performance of the iteration.
    224 class SimplifiedBackwardsTextIterator {
    225     STACK_ALLOCATED();
    226 public:
    227     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    228 
    229     bool atEnd() const { return !m_positionNode || m_shouldStop; }
    230     void advance();
    231 
    232     int length() const { return m_textLength; }
    233 
    234     Node* node() const { return m_node; }
    235 
    236     template<typename BufferType>
    237     void prependTextTo(BufferType& output)
    238     {
    239         if (!m_textLength)
    240             return;
    241         if (m_singleCharacterBuffer)
    242             output.prepend(&m_singleCharacterBuffer, 1);
    243         else
    244             m_textContainer.prependTo(output, m_textOffset, m_textLength);
    245     }
    246 
    247     PassRefPtrWillBeRawPtr<Range> range() const;
    248 
    249 private:
    250     void exitNode();
    251     bool handleTextNode();
    252     RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
    253     bool handleReplacedElement();
    254     bool handleNonTextNode();
    255     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
    256     bool advanceRespectingRange(Node*);
    257 
    258     // Current position, not necessarily of the text being returned, but position
    259     // as we walk through the DOM tree.
    260     RawPtrWillBeMember<Node> m_node;
    261     int m_offset;
    262     bool m_handledNode;
    263     bool m_handledChildren;
    264     BitStack m_fullyClippedStack;
    265 
    266     // End of the range.
    267     RawPtrWillBeMember<Node> m_startNode;
    268     int m_startOffset;
    269     // Start of the range.
    270     RawPtrWillBeMember<Node> m_endNode;
    271     int m_endOffset;
    272 
    273     // The current text and its position, in the form to be returned from the iterator.
    274     RawPtrWillBeMember<Node> m_positionNode;
    275     int m_positionStartOffset;
    276     int m_positionEndOffset;
    277 
    278     String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
    279     int m_textOffset;
    280     int m_textLength;
    281 
    282     // Used to do the whitespace logic.
    283     RawPtrWillBeMember<Node> m_lastTextNode;
    284     UChar m_lastCharacter;
    285 
    286     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    287     UChar m_singleCharacterBuffer;
    288 
    289     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
    290     bool m_havePassedStartNode;
    291 
    292     // Should handle first-letter renderer in the next call to handleTextNode.
    293     bool m_shouldHandleFirstLetter;
    294 
    295     // Used when the iteration should stop if form controls are reached.
    296     bool m_stopsOnFormControls;
    297 
    298     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    299     bool m_shouldStop;
    300 
    301     // Used in pasting inside password field.
    302     bool m_emitsOriginalText;
    303 };
    304 
    305 // Builds on the text iterator, adding a character position so we can walk one
    306 // character at a time, or faster, as needed. Useful for searching.
    307 class CharacterIterator {
    308     STACK_ALLOCATED();
    309 public:
    310     explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    311     CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    312 
    313     void advance(int numCharacters);
    314 
    315     bool atBreak() const { return m_atBreak; }
    316     bool atEnd() const { return m_textIterator.atEnd(); }
    317 
    318     int length() const { return m_textIterator.length() - m_runOffset; }
    319     UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
    320 
    321     template<typename BufferType>
    322     void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
    323 
    324     int characterOffset() const { return m_offset; }
    325     PassRefPtrWillBeRawPtr<Range> range() const;
    326 
    327 private:
    328     void initialize();
    329 
    330     int m_offset;
    331     int m_runOffset;
    332     bool m_atBreak;
    333 
    334     TextIterator m_textIterator;
    335 };
    336 
    337 class BackwardsCharacterIterator {
    338     STACK_ALLOCATED();
    339 public:
    340     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    341 
    342     void advance(int);
    343 
    344     bool atEnd() const { return m_textIterator.atEnd(); }
    345 
    346     PassRefPtrWillBeRawPtr<Range> range() const;
    347 
    348 private:
    349     int m_offset;
    350     int m_runOffset;
    351     bool m_atBreak;
    352 
    353     SimplifiedBackwardsTextIterator m_textIterator;
    354 };
    355 
    356 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
    357 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
    358 class WordAwareIterator {
    359     STACK_ALLOCATED();
    360 public:
    361     explicit WordAwareIterator(const Range*);
    362     ~WordAwareIterator();
    363 
    364     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    365     void advance();
    366 
    367     String substring(unsigned position, unsigned length) const;
    368     UChar characterAt(unsigned index) const;
    369     int length() const;
    370 
    371 private:
    372     Vector<UChar> m_buffer;
    373     // Did we have to look ahead in the textIterator to confirm the current chunk?
    374     bool m_didLookAhead;
    375     RefPtrWillBeMember<Range> m_range;
    376     TextIterator m_textIterator;
    377 };
    378 
    379 }
    380 
    381 #endif
    382