Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef TextIterator_h
     27 #define TextIterator_h
     28 
     29 #include "InlineTextBox.h"
     30 #include "Range.h"
     31 #include <wtf/Vector.h>
     32 
     33 namespace WebCore {
     34 
     35 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
     36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
     37 inline bool isCollapsibleWhitespace(UChar c)
     38 {
     39     switch (c) {
     40         case ' ':
     41         case '\n':
     42             return true;
     43         default:
     44             return false;
     45     }
     46 }
     47 
     48 String plainText(const Range*);
     49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString);
     50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive);
     51 
     52 class BitStack {
     53 public:
     54     BitStack();
     55 
     56     void push(bool);
     57     void pop();
     58 
     59     bool top() const;
     60     unsigned size() const;
     61 
     62 private:
     63     unsigned m_size;
     64     Vector<unsigned, 1> m_words;
     65 };
     66 
     67 // Iterates through the DOM range, returning all the text, and 0-length boundaries
     68 // at points where replaced elements break up the text flow.  The text comes back in
     69 // chunks so as to optimize for performance of the iteration.
     70 
     71 class TextIterator {
     72 public:
     73     TextIterator();
     74     explicit TextIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
     75 
     76     bool atEnd() const { return !m_positionNode; }
     77     void advance();
     78 
     79     int length() const { return m_textLength; }
     80     const UChar* characters() const { return m_textCharacters; }
     81 
     82     PassRefPtr<Range> range() const;
     83     Node* node() const;
     84 
     85     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
     86     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
     87     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
     88 
     89 private:
     90     void exitNode();
     91     bool shouldRepresentNodeOffsetZero();
     92     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
     93     void representNodeOffsetZero();
     94     bool handleTextNode();
     95     bool handleReplacedElement();
     96     bool handleNonTextNode();
     97     void handleTextBox();
     98     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
     99     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
    100 
    101     // Current position, not necessarily of the text being returned, but position
    102     // as we walk through the DOM tree.
    103     Node* m_node;
    104     int m_offset;
    105     bool m_handledNode;
    106     bool m_handledChildren;
    107     BitStack m_fullyClippedStack;
    108 
    109     // The range.
    110     Node* m_startContainer;
    111     int m_startOffset;
    112     Node* m_endContainer;
    113     int m_endOffset;
    114     Node* m_pastEndNode;
    115 
    116     // The current text and its position, in the form to be returned from the iterator.
    117     Node* m_positionNode;
    118     mutable Node* m_positionOffsetBaseNode;
    119     mutable int m_positionStartOffset;
    120     mutable int m_positionEndOffset;
    121     const UChar* m_textCharacters;
    122     int m_textLength;
    123 
    124     // Used when there is still some pending text from the current node; when these
    125     // are false and 0, we go back to normal iterating.
    126     bool m_needAnotherNewline;
    127     InlineTextBox* m_textBox;
    128 
    129     // Used to do the whitespace collapsing logic.
    130     Node* m_lastTextNode;
    131     bool m_lastTextNodeEndedWithCollapsedSpace;
    132     UChar m_lastCharacter;
    133 
    134     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    135     UChar m_singleCharacterBuffer;
    136 
    137     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    138     Vector<InlineTextBox*> m_sortedTextBoxes;
    139     size_t m_sortedTextBoxesPosition;
    140 
    141     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
    142     bool m_haveEmitted;
    143 
    144     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    145     // in the Range used to create the TextIterator.
    146     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
    147     // moveParagraphs to not clone/destroy moved content.
    148     bool m_emitCharactersBetweenAllVisiblePositions;
    149     bool m_enterTextControls;
    150 };
    151 
    152 // Iterates through the DOM range, returning all the text, and 0-length boundaries
    153 // at points where replaced elements break up the text flow. The text comes back in
    154 // chunks so as to optimize for performance of the iteration.
    155 class SimplifiedBackwardsTextIterator {
    156 public:
    157     SimplifiedBackwardsTextIterator();
    158     explicit SimplifiedBackwardsTextIterator(const Range*);
    159 
    160     bool atEnd() const { return !m_positionNode; }
    161     void advance();
    162 
    163     int length() const { return m_textLength; }
    164     const UChar* characters() const { return m_textCharacters; }
    165 
    166     PassRefPtr<Range> range() const;
    167 
    168 private:
    169     void exitNode();
    170     bool handleTextNode();
    171     bool handleReplacedElement();
    172     bool handleNonTextNode();
    173     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
    174 
    175     // Current position, not necessarily of the text being returned, but position
    176     // as we walk through the DOM tree.
    177     Node* m_node;
    178     int m_offset;
    179     bool m_handledNode;
    180     bool m_handledChildren;
    181     BitStack m_fullyClippedStack;
    182 
    183     // End of the range.
    184     Node* m_startNode;
    185     int m_startOffset;
    186     // Start of the range.
    187     Node* m_endNode;
    188     int m_endOffset;
    189 
    190     // The current text and its position, in the form to be returned from the iterator.
    191     Node* m_positionNode;
    192     int m_positionStartOffset;
    193     int m_positionEndOffset;
    194     const UChar* m_textCharacters;
    195     int m_textLength;
    196 
    197     // Used to do the whitespace logic.
    198     Node* m_lastTextNode;
    199     UChar m_lastCharacter;
    200 
    201     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    202     UChar m_singleCharacterBuffer;
    203 
    204     // The node after the last node this iterator should process.
    205     Node* m_pastStartNode;
    206 };
    207 
    208 // Builds on the text iterator, adding a character position so we can walk one
    209 // character at a time, or faster, as needed. Useful for searching.
    210 class CharacterIterator {
    211 public:
    212     CharacterIterator();
    213     explicit CharacterIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
    214 
    215     void advance(int numCharacters);
    216 
    217     bool atBreak() const { return m_atBreak; }
    218     bool atEnd() const { return m_textIterator.atEnd(); }
    219 
    220     int length() const { return m_textIterator.length() - m_runOffset; }
    221     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
    222     String string(int numChars);
    223 
    224     int characterOffset() const { return m_offset; }
    225     PassRefPtr<Range> range() const;
    226 
    227 private:
    228     int m_offset;
    229     int m_runOffset;
    230     bool m_atBreak;
    231 
    232     TextIterator m_textIterator;
    233 };
    234 
    235 class BackwardsCharacterIterator {
    236 public:
    237     BackwardsCharacterIterator();
    238     explicit BackwardsCharacterIterator(const Range*);
    239 
    240     void advance(int);
    241 
    242     bool atEnd() const { return m_textIterator.atEnd(); }
    243 
    244     PassRefPtr<Range> range() const;
    245 
    246 private:
    247     int m_offset;
    248     int m_runOffset;
    249     bool m_atBreak;
    250 
    251     SimplifiedBackwardsTextIterator m_textIterator;
    252 };
    253 
    254 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
    255 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
    256 class WordAwareIterator {
    257 public:
    258     WordAwareIterator();
    259     explicit WordAwareIterator(const Range*);
    260 
    261     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    262     void advance();
    263 
    264     int length() const;
    265     const UChar* characters() const;
    266 
    267     // Range of the text we're currently returning
    268     PassRefPtr<Range> range() const { return m_range; }
    269 
    270 private:
    271     // text from the previous chunk from the textIterator
    272     const UChar* m_previousText;
    273     int m_previousLength;
    274 
    275     // many chunks from textIterator concatenated
    276     Vector<UChar> m_buffer;
    277 
    278     // Did we have to look ahead in the textIterator to confirm the current chunk?
    279     bool m_didLookAhead;
    280 
    281     RefPtr<Range> m_range;
    282 
    283     TextIterator m_textIterator;
    284 };
    285 
    286 }
    287 
    288 #endif
    289