Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef TextIterator_h
     27 #define TextIterator_h
     28 
     29 #include "core/dom/Range.h"
     30 #include "core/editing/FindOptions.h"
     31 #include "platform/heap/Handle.h"
     32 #include "wtf/Vector.h"
     33 
     34 namespace blink {
     35 
     36 class InlineTextBox;
     37 class RenderText;
     38 class RenderTextFragment;
     39 
     40 enum TextIteratorBehavior {
     41     TextIteratorDefaultBehavior = 0,
     42     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
     43     TextIteratorEntersTextControls = 1 << 1,
     44     TextIteratorIgnoresStyleVisibility = 1 << 2,
     45     TextIteratorEmitsOriginalText = 1 << 3,
     46     TextIteratorStopsOnFormControls = 1 << 4,
     47     TextIteratorEmitsImageAltText = 1 << 5,
     48     TextIteratorEntersAuthorShadowRoots = 1 << 6,
     49     TextIteratorEmitsObjectReplacementCharacter = 1 << 7
     50 };
     51 typedef unsigned TextIteratorBehaviorFlags;
     52 
     53 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     54 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     55 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range*, const String&, FindOptions);
     56 void findPlainText(const Position& inputStart, const Position& inputEnd, const String&, FindOptions, Position& resultStart, Position& resultEnd);
     57 
     58 class BitStack {
     59 public:
     60     BitStack();
     61     ~BitStack();
     62 
     63     void push(bool);
     64     void pop();
     65 
     66     bool top() const;
     67     unsigned size() const;
     68 
     69 private:
     70     unsigned m_size;
     71     Vector<unsigned, 1> m_words;
     72 };
     73 
     74 // Iterates through the DOM range, returning all the text, and 0-length boundaries
     75 // at points where replaced elements break up the text flow.  The text comes back in
     76 // chunks so as to optimize for performance of the iteration.
     77 
     78 class TextIterator {
     79     STACK_ALLOCATED();
     80 public:
     81     explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     82     // [start, end] indicates the document range that the iteration should take place within (both ends inclusive).
     83     TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
     84     ~TextIterator();
     85 
     86     bool atEnd() const { return !m_positionNode || m_shouldStop; }
     87     void advance();
     88     bool isInsideReplacedElement() const;
     89 
     90     int length() const { return m_textLength; }
     91     UChar characterAt(unsigned index) const;
     92     String substring(unsigned position, unsigned length) const;
     93     void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
     94 
     95     template<typename BufferType>
     96     void appendTextTo(BufferType& output, unsigned position = 0)
     97     {
     98         ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
     99         unsigned lengthToAppend = length() - position;
    100         if (!lengthToAppend)
    101             return;
    102         if (m_singleCharacterBuffer) {
    103             ASSERT(!position);
    104             ASSERT(length() == 1);
    105             output.append(&m_singleCharacterBuffer, 1);
    106         } else {
    107             string().appendTo(output, startOffset() + position, lengthToAppend);
    108         }
    109     }
    110 
    111     PassRefPtrWillBeRawPtr<Range> createRange() const;
    112     Node* node() const;
    113 
    114     Document* ownerDocument() const;
    115     Node* startContainer() const;
    116     Node* endContainer() const;
    117     int startOffset() const;
    118     int endOffset() const;
    119     Position startPosition() const;
    120     Position endPosition() const;
    121 
    122     // Computes the length of the given range using a text iterator. The default
    123     // iteration behavior is to always emit object replacement characters for
    124     // replaced elements. When |forSelectionPreservation| is set to true, it
    125     // also emits spaces for other non-text nodes using the
    126     // |TextIteratorEmitsCharactersBetweenAllVisiblePosition| mode.
    127     static int rangeLength(const Range*, bool forSelectionPreservation = false);
    128     static int rangeLength(const Position& start, const Position& end, bool forSelectionPreservation = false);
    129     static PassRefPtrWillBeRawPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
    130     static void subrange(Position& start, Position& end, int characterOffset, int characterCount);
    131 
    132 private:
    133     enum IterationProgress {
    134         HandledNone,
    135         HandledAuthorShadowRoots,
    136         HandledUserAgentShadowRoot,
    137         HandledNode,
    138         HandledChildren
    139     };
    140 
    141     void initialize(const Position& start, const Position& end);
    142 
    143     void flushPositionOffsets() const;
    144     int positionStartOffset() const { return m_positionStartOffset; }
    145     const String& string() const { return m_text; }
    146     void exitNode();
    147     bool shouldRepresentNodeOffsetZero();
    148     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
    149     void representNodeOffsetZero();
    150     bool handleTextNode();
    151     bool handleReplacedElement();
    152     bool handleNonTextNode();
    153     void handleTextBox();
    154     void handleTextNodeFirstLetter(RenderTextFragment*);
    155     bool hasVisibleTextNode(RenderText*);
    156     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
    157     void emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset);
    158 
    159     // Current position, not necessarily of the text being returned, but position
    160     // as we walk through the DOM tree.
    161     RawPtrWillBeMember<Node> m_node;
    162     int m_offset;
    163     IterationProgress m_iterationProgress;
    164     BitStack m_fullyClippedStack;
    165     int m_shadowDepth;
    166 
    167     // The range.
    168     RawPtrWillBeMember<Node> m_startContainer;
    169     int m_startOffset;
    170     RawPtrWillBeMember<Node> m_endContainer;
    171     int m_endOffset;
    172     RawPtrWillBeMember<Node> m_pastEndNode;
    173 
    174     // The current text and its position, in the form to be returned from the iterator.
    175     RawPtrWillBeMember<Node> m_positionNode;
    176     mutable RawPtrWillBeMember<Node> m_positionOffsetBaseNode;
    177     mutable int m_positionStartOffset;
    178     mutable int m_positionEndOffset;
    179     int m_textLength;
    180     String m_text;
    181 
    182     // Used when there is still some pending text from the current node; when these
    183     // are false and 0, we go back to normal iterating.
    184     bool m_needsAnotherNewline;
    185     InlineTextBox* m_textBox;
    186     // Used when iteration over :first-letter text to save pointer to
    187     // remaining text box.
    188     InlineTextBox* m_remainingTextBox;
    189     // Used to point to RenderText object for :first-letter.
    190     RawPtrWillBeMember<RenderText> m_firstLetterText;
    191 
    192     // Used to do the whitespace collapsing logic.
    193     RawPtrWillBeMember<Text> m_lastTextNode;
    194     bool m_lastTextNodeEndedWithCollapsedSpace;
    195     UChar m_lastCharacter;
    196 
    197     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    198     // If non-zero, overrides m_text.
    199     UChar m_singleCharacterBuffer;
    200 
    201     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    202     Vector<InlineTextBox*> m_sortedTextBoxes;
    203     size_t m_sortedTextBoxesPosition;
    204 
    205     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
    206     bool m_hasEmitted;
    207 
    208     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    209     // in the Range used to create the TextIterator.
    210     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
    211     // moveParagraphs to not clone/destroy moved content.
    212     bool m_emitsCharactersBetweenAllVisiblePositions;
    213     bool m_entersTextControls;
    214 
    215     // Used in pasting inside password field.
    216     bool m_emitsOriginalText;
    217     // Used when deciding text fragment created by :first-letter should be looked into.
    218     bool m_handledFirstLetter;
    219     // Used when the visibility of the style should not affect text gathering.
    220     bool m_ignoresStyleVisibility;
    221     // Used when the iteration should stop if form controls are reached.
    222     bool m_stopsOnFormControls;
    223     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    224     bool m_shouldStop;
    225 
    226     bool m_emitsImageAltText;
    227 
    228     bool m_entersAuthorShadowRoots;
    229 
    230     bool m_emitsObjectReplacementCharacter;
    231 };
    232 
    233 // Iterates through the DOM range, returning all the text, and 0-length boundaries
    234 // at points where replaced elements break up the text flow. The text comes back in
    235 // chunks so as to optimize for performance of the iteration.
    236 class SimplifiedBackwardsTextIterator {
    237     STACK_ALLOCATED();
    238 public:
    239     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    240     SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    241 
    242     bool atEnd() const { return !m_positionNode || m_shouldStop; }
    243     void advance();
    244 
    245     int length() const { return m_textLength; }
    246 
    247     Node* node() const { return m_node; }
    248 
    249     template<typename BufferType>
    250     void prependTextTo(BufferType& output)
    251     {
    252         if (!m_textLength)
    253             return;
    254         if (m_singleCharacterBuffer)
    255             output.prepend(&m_singleCharacterBuffer, 1);
    256         else
    257             m_textContainer.prependTo(output, m_textOffset, m_textLength);
    258     }
    259 
    260     Node* startContainer() const;
    261     int endOffset() const;
    262     Position startPosition() const;
    263     Position endPosition() const;
    264 
    265 private:
    266     void init(Node* startNode, Node* endNode, int startOffset, int endOffset);
    267     void exitNode();
    268     bool handleTextNode();
    269     RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
    270     bool handleReplacedElement();
    271     bool handleNonTextNode();
    272     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
    273     bool advanceRespectingRange(Node*);
    274 
    275     // Current position, not necessarily of the text being returned, but position
    276     // as we walk through the DOM tree.
    277     RawPtrWillBeMember<Node> m_node;
    278     int m_offset;
    279     bool m_handledNode;
    280     bool m_handledChildren;
    281     BitStack m_fullyClippedStack;
    282 
    283     // End of the range.
    284     RawPtrWillBeMember<Node> m_startNode;
    285     int m_startOffset;
    286     // Start of the range.
    287     RawPtrWillBeMember<Node> m_endNode;
    288     int m_endOffset;
    289 
    290     // The current text and its position, in the form to be returned from the iterator.
    291     RawPtrWillBeMember<Node> m_positionNode;
    292     int m_positionStartOffset;
    293     int m_positionEndOffset;
    294 
    295     String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
    296     int m_textOffset;
    297     int m_textLength;
    298 
    299     // Used to do the whitespace logic.
    300     RawPtrWillBeMember<Text> m_lastTextNode;
    301     UChar m_lastCharacter;
    302 
    303     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    304     UChar m_singleCharacterBuffer;
    305 
    306     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
    307     bool m_havePassedStartNode;
    308 
    309     // Should handle first-letter renderer in the next call to handleTextNode.
    310     bool m_shouldHandleFirstLetter;
    311 
    312     // Used when the iteration should stop if form controls are reached.
    313     bool m_stopsOnFormControls;
    314 
    315     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    316     bool m_shouldStop;
    317 
    318     // Used in pasting inside password field.
    319     bool m_emitsOriginalText;
    320 };
    321 
    322 // Builds on the text iterator, adding a character position so we can walk one
    323 // character at a time, or faster, as needed. Useful for searching.
    324 class CharacterIterator {
    325     STACK_ALLOCATED();
    326 public:
    327     explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    328     CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    329 
    330     void advance(int numCharacters);
    331 
    332     bool atBreak() const { return m_atBreak; }
    333     bool atEnd() const { return m_textIterator.atEnd(); }
    334 
    335     int length() const { return m_textIterator.length() - m_runOffset; }
    336     UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
    337 
    338     template<typename BufferType>
    339     void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
    340 
    341     int characterOffset() const { return m_offset; }
    342     PassRefPtrWillBeRawPtr<Range> createRange() const;
    343 
    344     Document* ownerDocument() const;
    345     Node* startContainer() const;
    346     Node* endContainer() const;
    347     int startOffset() const;
    348     int endOffset() const;
    349     Position startPosition() const;
    350     Position endPosition() const;
    351 
    352 private:
    353     void initialize();
    354 
    355     int m_offset;
    356     int m_runOffset;
    357     bool m_atBreak;
    358 
    359     TextIterator m_textIterator;
    360 };
    361 
    362 class BackwardsCharacterIterator {
    363     STACK_ALLOCATED();
    364 public:
    365     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    366     BackwardsCharacterIterator(const Position&, const Position&, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
    367 
    368     void advance(int);
    369 
    370     bool atEnd() const { return m_textIterator.atEnd(); }
    371 
    372     Position endPosition() const;
    373 
    374 private:
    375     int m_offset;
    376     int m_runOffset;
    377     bool m_atBreak;
    378 
    379     SimplifiedBackwardsTextIterator m_textIterator;
    380 };
    381 
    382 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
    383 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
    384 class WordAwareIterator {
    385     STACK_ALLOCATED();
    386 public:
    387     explicit WordAwareIterator(const Position& start, const Position& end);
    388     ~WordAwareIterator();
    389 
    390     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    391     void advance();
    392 
    393     String substring(unsigned position, unsigned length) const;
    394     UChar characterAt(unsigned index) const;
    395     int length() const;
    396 
    397 private:
    398     Vector<UChar> m_buffer;
    399     // Did we have to look ahead in the textIterator to confirm the current chunk?
    400     bool m_didLookAhead;
    401     TextIterator m_textIterator;
    402 };
    403 
    404 }
    405 
    406 #endif
    407