Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef TextIterator_h
     27 #define TextIterator_h
     28 
     29 #include "FindOptions.h"
     30 #include "InlineTextBox.h"
     31 #include "Range.h"
     32 #include <wtf/Vector.h>
     33 
     34 namespace WebCore {
     35 
     36 class RenderText;
     37 class RenderTextFragment;
     38 
     39 enum TextIteratorBehavior {
     40     TextIteratorDefaultBehavior = 0,
     41     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
     42     TextIteratorEntersTextControls = 1 << 1,
     43     TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
     44     TextIteratorIgnoresStyleVisibility = 1 << 3,
     45     TextIteratorEmitsObjectReplacementCharacters = 1 << 4
     46 };
     47 
     48 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
     49 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
     50 inline bool isCollapsibleWhitespace(UChar c)
     51 {
     52     switch (c) {
     53         case ' ':
     54         case '\n':
     55             return true;
     56         default:
     57             return false;
     58     }
     59 }
     60 
     61 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
     62 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
     63 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
     64 
     65 class BitStack {
     66 public:
     67     BitStack();
     68     ~BitStack();
     69 
     70     void push(bool);
     71     void pop();
     72 
     73     bool top() const;
     74     unsigned size() const;
     75 
     76 private:
     77     unsigned m_size;
     78     Vector<unsigned, 1> m_words;
     79 };
     80 
     81 // Iterates through the DOM range, returning all the text, and 0-length boundaries
     82 // at points where replaced elements break up the text flow.  The text comes back in
     83 // chunks so as to optimize for performance of the iteration.
     84 
     85 class TextIterator {
     86 public:
     87     TextIterator();
     88     ~TextIterator();
     89     explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
     90 
     91     bool atEnd() const { return !m_positionNode; }
     92     void advance();
     93 
     94     int length() const { return m_textLength; }
     95     const UChar* characters() const { return m_textCharacters; }
     96 
     97     PassRefPtr<Range> range() const;
     98     Node* node() const;
     99 
    100     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
    101     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
    102     static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
    103     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
    104 
    105 private:
    106     void exitNode();
    107     bool shouldRepresentNodeOffsetZero();
    108     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
    109     void representNodeOffsetZero();
    110     bool handleTextNode();
    111     bool handleReplacedElement();
    112     bool handleNonTextNode();
    113     void handleTextBox();
    114     void handleTextNodeFirstLetter(RenderTextFragment*);
    115     bool hasVisibleTextNode(RenderText*);
    116     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
    117     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
    118     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
    119 
    120     // Current position, not necessarily of the text being returned, but position
    121     // as we walk through the DOM tree.
    122     Node* m_node;
    123     int m_offset;
    124     bool m_handledNode;
    125     bool m_handledChildren;
    126     BitStack m_fullyClippedStack;
    127 
    128     // The range.
    129     Node* m_startContainer;
    130     int m_startOffset;
    131     Node* m_endContainer;
    132     int m_endOffset;
    133     Node* m_pastEndNode;
    134 
    135     // The current text and its position, in the form to be returned from the iterator.
    136     Node* m_positionNode;
    137     mutable Node* m_positionOffsetBaseNode;
    138     mutable int m_positionStartOffset;
    139     mutable int m_positionEndOffset;
    140     const UChar* m_textCharacters;
    141     int m_textLength;
    142     // Hold string m_textCharacters points to so we ensure it won't be deleted.
    143     String m_text;
    144 
    145     // Used when there is still some pending text from the current node; when these
    146     // are false and 0, we go back to normal iterating.
    147     bool m_needsAnotherNewline;
    148     InlineTextBox* m_textBox;
    149     // Used when iteration over :first-letter text to save pointer to
    150     // remaining text box.
    151     InlineTextBox* m_remainingTextBox;
    152     // Used to point to RenderText object for :first-letter.
    153     RenderText *m_firstLetterText;
    154 
    155     // Used to do the whitespace collapsing logic.
    156     Node* m_lastTextNode;
    157     bool m_lastTextNodeEndedWithCollapsedSpace;
    158     UChar m_lastCharacter;
    159 
    160     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    161     UChar m_singleCharacterBuffer;
    162 
    163     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    164     Vector<InlineTextBox*> m_sortedTextBoxes;
    165     size_t m_sortedTextBoxesPosition;
    166 
    167     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
    168     bool m_hasEmitted;
    169 
    170     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    171     // in the Range used to create the TextIterator.
    172     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
    173     // moveParagraphs to not clone/destroy moved content.
    174     bool m_emitsCharactersBetweenAllVisiblePositions;
    175     bool m_entersTextControls;
    176 
    177     // Used when we want texts for copying, pasting, and transposing.
    178     bool m_emitsTextWithoutTranscoding;
    179     // Used when deciding text fragment created by :first-letter should be looked into.
    180     bool m_handledFirstLetter;
    181     // Used when the visibility of the style should not affect text gathering.
    182     bool m_ignoresStyleVisibility;
    183     // Used when emitting the special 0xFFFC character is required.
    184     bool m_emitsObjectReplacementCharacters;
    185 };
    186 
    187 // Iterates through the DOM range, returning all the text, and 0-length boundaries
    188 // at points where replaced elements break up the text flow. The text comes back in
    189 // chunks so as to optimize for performance of the iteration.
    190 class SimplifiedBackwardsTextIterator {
    191 public:
    192     SimplifiedBackwardsTextIterator();
    193     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    194 
    195     bool atEnd() const { return !m_positionNode; }
    196     void advance();
    197 
    198     int length() const { return m_textLength; }
    199     const UChar* characters() const { return m_textCharacters; }
    200 
    201     PassRefPtr<Range> range() const;
    202 
    203 private:
    204     void exitNode();
    205     bool handleTextNode();
    206     bool handleReplacedElement();
    207     bool handleNonTextNode();
    208     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
    209     bool advanceRespectingRange(Node*);
    210 
    211     TextIteratorBehavior m_behavior;
    212     // Current position, not necessarily of the text being returned, but position
    213     // as we walk through the DOM tree.
    214     Node* m_node;
    215     int m_offset;
    216     bool m_handledNode;
    217     bool m_handledChildren;
    218     BitStack m_fullyClippedStack;
    219 
    220     // End of the range.
    221     Node* m_startNode;
    222     int m_startOffset;
    223     // Start of the range.
    224     Node* m_endNode;
    225     int m_endOffset;
    226 
    227     // The current text and its position, in the form to be returned from the iterator.
    228     Node* m_positionNode;
    229     int m_positionStartOffset;
    230     int m_positionEndOffset;
    231     const UChar* m_textCharacters;
    232     int m_textLength;
    233 
    234     // Used to do the whitespace logic.
    235     Node* m_lastTextNode;
    236     UChar m_lastCharacter;
    237 
    238     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    239     UChar m_singleCharacterBuffer;
    240 
    241     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
    242     bool m_havePassedStartNode;
    243 };
    244 
    245 // Builds on the text iterator, adding a character position so we can walk one
    246 // character at a time, or faster, as needed. Useful for searching.
    247 class CharacterIterator {
    248 public:
    249     CharacterIterator();
    250     explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    251 
    252     void advance(int numCharacters);
    253 
    254     bool atBreak() const { return m_atBreak; }
    255     bool atEnd() const { return m_textIterator.atEnd(); }
    256 
    257     int length() const { return m_textIterator.length() - m_runOffset; }
    258     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
    259     String string(int numChars);
    260 
    261     int characterOffset() const { return m_offset; }
    262     PassRefPtr<Range> range() const;
    263 
    264 private:
    265     int m_offset;
    266     int m_runOffset;
    267     bool m_atBreak;
    268 
    269     TextIterator m_textIterator;
    270 };
    271 
    272 class BackwardsCharacterIterator {
    273 public:
    274     BackwardsCharacterIterator();
    275     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    276 
    277     void advance(int);
    278 
    279     bool atEnd() const { return m_textIterator.atEnd(); }
    280 
    281     PassRefPtr<Range> range() const;
    282 
    283 private:
    284     TextIteratorBehavior m_behavior;
    285     int m_offset;
    286     int m_runOffset;
    287     bool m_atBreak;
    288 
    289     SimplifiedBackwardsTextIterator m_textIterator;
    290 };
    291 
    292 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
    293 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
    294 class WordAwareIterator {
    295 public:
    296     WordAwareIterator();
    297     explicit WordAwareIterator(const Range*);
    298     ~WordAwareIterator();
    299 
    300     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    301     void advance();
    302 
    303     int length() const;
    304     const UChar* characters() const;
    305 
    306     // Range of the text we're currently returning
    307     PassRefPtr<Range> range() const { return m_range; }
    308 
    309 private:
    310     // text from the previous chunk from the textIterator
    311     const UChar* m_previousText;
    312     int m_previousLength;
    313 
    314     // many chunks from textIterator concatenated
    315     Vector<UChar> m_buffer;
    316 
    317     // Did we have to look ahead in the textIterator to confirm the current chunk?
    318     bool m_didLookAhead;
    319 
    320     RefPtr<Range> m_range;
    321 
    322     TextIterator m_textIterator;
    323 };
    324 
    325 }
    326 
    327 #endif
    328