Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef TextIterator_h
     27 #define TextIterator_h
     28 
     29 #include "FindOptions.h"
     30 #include "InlineTextBox.h"
     31 #include "Range.h"
     32 #include <wtf/Vector.h>
     33 
     34 namespace WebCore {
     35 
     36 class RenderText;
     37 class RenderTextFragment;
     38 
     39 enum TextIteratorBehavior {
     40     TextIteratorDefaultBehavior = 0,
     41     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
     42     TextIteratorEntersTextControls = 1 << 1,
     43     TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
     44     TextIteratorIgnoresStyleVisibility = 1 << 3,
     45     TextIteratorEmitsObjectReplacementCharacters = 1 << 4,
     46 #if OS(ANDROID)
     47     TextIteratorStopsOnFormControls = 1 << 6
     48 #endif
     49 };
     50 
     51 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
     52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
     53 inline bool isCollapsibleWhitespace(UChar c)
     54 {
     55     switch (c) {
     56         case ' ':
     57         case '\n':
     58             return true;
     59         default:
     60             return false;
     61     }
     62 }
     63 
     64 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
     65 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
     66 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
     67 
     68 class BitStack {
     69 public:
     70     BitStack();
     71     ~BitStack();
     72 
     73     void push(bool);
     74     void pop();
     75 
     76     bool top() const;
     77     unsigned size() const;
     78 
     79 private:
     80     unsigned m_size;
     81     Vector<unsigned, 1> m_words;
     82 };
     83 
     84 // Iterates through the DOM range, returning all the text, and 0-length boundaries
     85 // at points where replaced elements break up the text flow.  The text comes back in
     86 // chunks so as to optimize for performance of the iteration.
     87 
     88 class TextIterator {
     89 public:
     90     TextIterator();
     91     ~TextIterator();
     92     explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
     93 
     94     bool atEnd() const;
     95     void advance();
     96 
     97     int length() const { return m_textLength; }
     98     const UChar* characters() const { return m_textCharacters; }
     99 
    100     PassRefPtr<Range> range() const;
    101     Node* node() const;
    102 
    103     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
    104     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
    105     static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
    106     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
    107 
    108 private:
    109     void exitNode();
    110     bool shouldRepresentNodeOffsetZero();
    111     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
    112     void representNodeOffsetZero();
    113     bool handleTextNode();
    114     bool handleReplacedElement();
    115     bool handleNonTextNode();
    116     void handleTextBox();
    117     void handleTextNodeFirstLetter(RenderTextFragment*);
    118     bool hasVisibleTextNode(RenderText*);
    119     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
    120     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
    121     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
    122 
    123     // Current position, not necessarily of the text being returned, but position
    124     // as we walk through the DOM tree.
    125     Node* m_node;
    126     int m_offset;
    127     bool m_handledNode;
    128     bool m_handledChildren;
    129     BitStack m_fullyClippedStack;
    130 
    131     // The range.
    132     Node* m_startContainer;
    133     int m_startOffset;
    134     Node* m_endContainer;
    135     int m_endOffset;
    136     Node* m_pastEndNode;
    137 
    138     // The current text and its position, in the form to be returned from the iterator.
    139     Node* m_positionNode;
    140     mutable Node* m_positionOffsetBaseNode;
    141     mutable int m_positionStartOffset;
    142     mutable int m_positionEndOffset;
    143     const UChar* m_textCharacters;
    144     int m_textLength;
    145     // Hold string m_textCharacters points to so we ensure it won't be deleted.
    146     String m_text;
    147 
    148     // Used when there is still some pending text from the current node; when these
    149     // are false and 0, we go back to normal iterating.
    150     bool m_needsAnotherNewline;
    151     InlineTextBox* m_textBox;
    152     // Used when iteration over :first-letter text to save pointer to
    153     // remaining text box.
    154     InlineTextBox* m_remainingTextBox;
    155     // Used to point to RenderText object for :first-letter.
    156     RenderText *m_firstLetterText;
    157 
    158     // Used to do the whitespace collapsing logic.
    159     Node* m_lastTextNode;
    160     bool m_lastTextNodeEndedWithCollapsedSpace;
    161     UChar m_lastCharacter;
    162 
    163     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    164     UChar m_singleCharacterBuffer;
    165 
    166     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    167     Vector<InlineTextBox*> m_sortedTextBoxes;
    168     size_t m_sortedTextBoxesPosition;
    169 
    170     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
    171     bool m_hasEmitted;
    172 
    173     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    174     // in the Range used to create the TextIterator.
    175     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
    176     // moveParagraphs to not clone/destroy moved content.
    177     bool m_emitsCharactersBetweenAllVisiblePositions;
    178     bool m_entersTextControls;
    179 
    180     // Used when we want texts for copying, pasting, and transposing.
    181     bool m_emitsTextWithoutTranscoding;
    182     // Used when deciding text fragment created by :first-letter should be looked into.
    183     bool m_handledFirstLetter;
    184     // Used when the visibility of the style should not affect text gathering.
    185     bool m_ignoresStyleVisibility;
    186     // Used when emitting the special 0xFFFC character is required.
    187     bool m_emitsObjectReplacementCharacters;
    188 #if OS(ANDROID)
    189     // Used when the iteration should stop if form controls are reached.
    190     bool m_stopsOnFormControls;
    191     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    192     bool m_shouldStop;
    193 #endif
    194 };
    195 
    196 // Iterates through the DOM range, returning all the text, and 0-length boundaries
    197 // at points where replaced elements break up the text flow. The text comes back in
    198 // chunks so as to optimize for performance of the iteration.
    199 class SimplifiedBackwardsTextIterator {
    200 public:
    201     SimplifiedBackwardsTextIterator();
    202     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    203 
    204     bool atEnd() const;
    205     void advance();
    206 
    207     int length() const { return m_textLength; }
    208     const UChar* characters() const { return m_textCharacters; }
    209 
    210     PassRefPtr<Range> range() const;
    211 
    212 private:
    213     void exitNode();
    214     bool handleTextNode();
    215     bool handleReplacedElement();
    216     bool handleNonTextNode();
    217     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
    218     bool advanceRespectingRange(Node*);
    219 
    220     TextIteratorBehavior m_behavior;
    221     // Current position, not necessarily of the text being returned, but position
    222     // as we walk through the DOM tree.
    223     Node* m_node;
    224     int m_offset;
    225     bool m_handledNode;
    226     bool m_handledChildren;
    227     BitStack m_fullyClippedStack;
    228 
    229     // End of the range.
    230     Node* m_startNode;
    231     int m_startOffset;
    232     // Start of the range.
    233     Node* m_endNode;
    234     int m_endOffset;
    235 
    236     // The current text and its position, in the form to be returned from the iterator.
    237     Node* m_positionNode;
    238     int m_positionStartOffset;
    239     int m_positionEndOffset;
    240     const UChar* m_textCharacters;
    241     int m_textLength;
    242 
    243     // Used to do the whitespace logic.
    244     Node* m_lastTextNode;
    245     UChar m_lastCharacter;
    246 
    247     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    248     UChar m_singleCharacterBuffer;
    249 
    250     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
    251     bool m_havePassedStartNode;
    252 
    253 #if OS(ANDROID)
    254     // Used when the iteration should stop if form controls are reached.
    255     bool m_stopsOnFormControls;
    256     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    257     bool m_shouldStop;
    258 #endif
    259 };
    260 
    261 // Builds on the text iterator, adding a character position so we can walk one
    262 // character at a time, or faster, as needed. Useful for searching.
    263 class CharacterIterator {
    264 public:
    265     CharacterIterator();
    266     explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    267 
    268     void advance(int numCharacters);
    269 
    270     bool atBreak() const { return m_atBreak; }
    271     bool atEnd() const { return m_textIterator.atEnd(); }
    272 
    273     int length() const { return m_textIterator.length() - m_runOffset; }
    274     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
    275     String string(int numChars);
    276 
    277     int characterOffset() const { return m_offset; }
    278     PassRefPtr<Range> range() const;
    279 
    280 private:
    281     int m_offset;
    282     int m_runOffset;
    283     bool m_atBreak;
    284 
    285     TextIterator m_textIterator;
    286 };
    287 
    288 class BackwardsCharacterIterator {
    289 public:
    290     BackwardsCharacterIterator();
    291     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    292 
    293     void advance(int);
    294 
    295     bool atEnd() const { return m_textIterator.atEnd(); }
    296 
    297     PassRefPtr<Range> range() const;
    298 
    299 private:
    300     TextIteratorBehavior m_behavior;
    301     int m_offset;
    302     int m_runOffset;
    303     bool m_atBreak;
    304 
    305     SimplifiedBackwardsTextIterator m_textIterator;
    306 };
    307 
    308 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
    309 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
    310 class WordAwareIterator {
    311 public:
    312     WordAwareIterator();
    313     explicit WordAwareIterator(const Range*);
    314     ~WordAwareIterator();
    315 
    316     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    317     void advance();
    318 
    319     int length() const;
    320     const UChar* characters() const;
    321 
    322     // Range of the text we're currently returning
    323     PassRefPtr<Range> range() const { return m_range; }
    324 
    325 private:
    326     // text from the previous chunk from the textIterator
    327     const UChar* m_previousText;
    328     int m_previousLength;
    329 
    330     // many chunks from textIterator concatenated
    331     Vector<UChar> m_buffer;
    332 
    333     // Did we have to look ahead in the textIterator to confirm the current chunk?
    334     bool m_didLookAhead;
    335 
    336     RefPtr<Range> m_range;
    337 
    338     TextIterator m_textIterator;
    339 };
    340 
    341 }
    342 
    343 #endif
    344