Home | History | Annotate | Download | only in editing
      1 /*
      2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef TextIterator_h
     27 #define TextIterator_h
     28 
     29 #include "core/dom/Range.h"
     30 #include "core/editing/FindOptions.h"
     31 #include "wtf/Vector.h"
     32 
     33 namespace WebCore {
     34 
     35 class InlineTextBox;
     36 class RenderText;
     37 class RenderTextFragment;
     38 
     39 enum TextIteratorBehavior {
     40     TextIteratorDefaultBehavior = 0,
     41     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
     42     TextIteratorEntersTextControls = 1 << 1,
     43     TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
     44     TextIteratorIgnoresStyleVisibility = 1 << 3,
     45     TextIteratorEmitsObjectReplacementCharacters = 1 << 4,
     46     TextIteratorEmitsOriginalText = 1 << 5,
     47     TextIteratorStopsOnFormControls = 1 << 6,
     48     TextIteratorEmitsImageAltText = 1 << 7,
     49 };
     50 
     51 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
     52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
     53 inline bool isCollapsibleWhitespace(UChar c)
     54 {
     55     switch (c) {
     56         case ' ':
     57         case '\n':
     58             return true;
     59         default:
     60             return false;
     61     }
     62 }
     63 
     64 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
     65 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
     66 
     67 class BitStack {
     68 public:
     69     BitStack();
     70     ~BitStack();
     71 
     72     void push(bool);
     73     void pop();
     74 
     75     bool top() const;
     76     unsigned size() const;
     77 
     78 private:
     79     unsigned m_size;
     80     Vector<unsigned, 1> m_words;
     81 };
     82 
     83 // Iterates through the DOM range, returning all the text, and 0-length boundaries
     84 // at points where replaced elements break up the text flow.  The text comes back in
     85 // chunks so as to optimize for performance of the iteration.
     86 
     87 class TextIterator {
     88 public:
     89     explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
     90     ~TextIterator();
     91 
     92     bool atEnd() const { return !m_positionNode || m_shouldStop; }
     93     void advance();
     94 
     95     int length() const { return m_textLength; }
     96     UChar characterAt(unsigned index) const;
     97     String substring(unsigned position, unsigned length) const;
     98     void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
     99 
    100     template<typename BufferType>
    101     void appendTextTo(BufferType& output, unsigned position = 0)
    102     {
    103         ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
    104         unsigned lengthToAppend = length() - position;
    105         if (!lengthToAppend)
    106             return;
    107         if (m_singleCharacterBuffer) {
    108             ASSERT(!position);
    109             ASSERT(length() == 1);
    110             output.append(&m_singleCharacterBuffer, 1);
    111         } else {
    112             string().appendTo(output, startOffset() + position, lengthToAppend);
    113         }
    114     }
    115 
    116     PassRefPtr<Range> range() const;
    117     Node* node() const;
    118 
    119     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
    120     static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
    121     static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length);
    122     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
    123 
    124 private:
    125     int startOffset() const { return m_positionStartOffset; }
    126     const String& string() const { return m_text; }
    127     void exitNode();
    128     bool shouldRepresentNodeOffsetZero();
    129     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
    130     void representNodeOffsetZero();
    131     bool handleTextNode();
    132     bool handleReplacedElement();
    133     bool handleNonTextNode();
    134     void handleTextBox();
    135     void handleTextNodeFirstLetter(RenderTextFragment*);
    136     bool hasVisibleTextNode(RenderText*);
    137     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
    138     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
    139     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
    140 
    141     // Current position, not necessarily of the text being returned, but position
    142     // as we walk through the DOM tree.
    143     Node* m_node;
    144     int m_offset;
    145     bool m_handledNode;
    146     bool m_handledChildren;
    147     BitStack m_fullyClippedStack;
    148 
    149     // The range.
    150     Node* m_startContainer;
    151     int m_startOffset;
    152     Node* m_endContainer;
    153     int m_endOffset;
    154     Node* m_pastEndNode;
    155 
    156     // The current text and its position, in the form to be returned from the iterator.
    157     Node* m_positionNode;
    158     mutable Node* m_positionOffsetBaseNode;
    159     mutable int m_positionStartOffset;
    160     mutable int m_positionEndOffset;
    161     int m_textLength;
    162     String m_text;
    163 
    164     // Used when there is still some pending text from the current node; when these
    165     // are false and 0, we go back to normal iterating.
    166     bool m_needsAnotherNewline;
    167     InlineTextBox* m_textBox;
    168     // Used when iteration over :first-letter text to save pointer to
    169     // remaining text box.
    170     InlineTextBox* m_remainingTextBox;
    171     // Used to point to RenderText object for :first-letter.
    172     RenderText *m_firstLetterText;
    173 
    174     // Used to do the whitespace collapsing logic.
    175     Node* m_lastTextNode;
    176     bool m_lastTextNodeEndedWithCollapsedSpace;
    177     UChar m_lastCharacter;
    178 
    179     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    180     // If non-zero, overrides m_text.
    181     UChar m_singleCharacterBuffer;
    182 
    183     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
    184     Vector<InlineTextBox*> m_sortedTextBoxes;
    185     size_t m_sortedTextBoxesPosition;
    186 
    187     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
    188     bool m_hasEmitted;
    189 
    190     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
    191     // in the Range used to create the TextIterator.
    192     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
    193     // moveParagraphs to not clone/destroy moved content.
    194     bool m_emitsCharactersBetweenAllVisiblePositions;
    195     bool m_entersTextControls;
    196 
    197     // Used when we want texts for copying, pasting, and transposing.
    198     bool m_emitsTextWithoutTranscoding;
    199     // Used in pasting inside password field.
    200     bool m_emitsOriginalText;
    201     // Used when deciding text fragment created by :first-letter should be looked into.
    202     bool m_handledFirstLetter;
    203     // Used when the visibility of the style should not affect text gathering.
    204     bool m_ignoresStyleVisibility;
    205     // Used when emitting the special 0xFFFC character is required.
    206     bool m_emitsObjectReplacementCharacters;
    207     // Used when the iteration should stop if form controls are reached.
    208     bool m_stopsOnFormControls;
    209     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    210     bool m_shouldStop;
    211 
    212     bool m_emitsImageAltText;
    213 };
    214 
    215 // Iterates through the DOM range, returning all the text, and 0-length boundaries
    216 // at points where replaced elements break up the text flow. The text comes back in
    217 // chunks so as to optimize for performance of the iteration.
    218 class SimplifiedBackwardsTextIterator {
    219 public:
    220     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    221 
    222     bool atEnd() const { return !m_positionNode || m_shouldStop; }
    223     void advance();
    224 
    225     int length() const { return m_textLength; }
    226 
    227     template<typename BufferType>
    228     void prependTextTo(BufferType& output)
    229     {
    230         if (!m_textLength)
    231             return;
    232         if (m_singleCharacterBuffer)
    233             output.prepend(&m_singleCharacterBuffer, 1);
    234         else
    235             m_textContainer.prependTo(output, m_textOffset, m_textLength);
    236     }
    237 
    238     PassRefPtr<Range> range() const;
    239 
    240 private:
    241     void exitNode();
    242     bool handleTextNode();
    243     RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
    244     bool handleReplacedElement();
    245     bool handleNonTextNode();
    246     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
    247     bool advanceRespectingRange(Node*);
    248 
    249     // Current position, not necessarily of the text being returned, but position
    250     // as we walk through the DOM tree.
    251     Node* m_node;
    252     int m_offset;
    253     bool m_handledNode;
    254     bool m_handledChildren;
    255     BitStack m_fullyClippedStack;
    256 
    257     // End of the range.
    258     Node* m_startNode;
    259     int m_startOffset;
    260     // Start of the range.
    261     Node* m_endNode;
    262     int m_endOffset;
    263 
    264     // The current text and its position, in the form to be returned from the iterator.
    265     Node* m_positionNode;
    266     int m_positionStartOffset;
    267     int m_positionEndOffset;
    268 
    269     String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
    270     int m_textOffset;
    271     int m_textLength;
    272 
    273     // Used to do the whitespace logic.
    274     Node* m_lastTextNode;
    275     UChar m_lastCharacter;
    276 
    277     // Used for whitespace characters that aren't in the DOM, so we can point at them.
    278     UChar m_singleCharacterBuffer;
    279 
    280     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
    281     bool m_havePassedStartNode;
    282 
    283     // Should handle first-letter renderer in the next call to handleTextNode.
    284     bool m_shouldHandleFirstLetter;
    285 
    286     // Used when the iteration should stop if form controls are reached.
    287     bool m_stopsOnFormControls;
    288 
    289     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
    290     bool m_shouldStop;
    291 
    292     // Used in pasting inside password field.
    293     bool m_emitsOriginalText;
    294 };
    295 
    296 // Builds on the text iterator, adding a character position so we can walk one
    297 // character at a time, or faster, as needed. Useful for searching.
    298 class CharacterIterator {
    299 public:
    300     explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    301 
    302     void advance(int numCharacters);
    303 
    304     bool atBreak() const { return m_atBreak; }
    305     bool atEnd() const { return m_textIterator.atEnd(); }
    306 
    307     int length() const { return m_textIterator.length() - m_runOffset; }
    308     UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
    309 
    310     template<typename BufferType>
    311     void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
    312 
    313     String string(int numChars);
    314 
    315     int characterOffset() const { return m_offset; }
    316     PassRefPtr<Range> range() const;
    317 
    318 private:
    319     int m_offset;
    320     int m_runOffset;
    321     bool m_atBreak;
    322 
    323     TextIterator m_textIterator;
    324 };
    325 
    326 class BackwardsCharacterIterator {
    327 public:
    328     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
    329 
    330     void advance(int);
    331 
    332     bool atEnd() const { return m_textIterator.atEnd(); }
    333 
    334     PassRefPtr<Range> range() const;
    335 
    336 private:
    337     int m_offset;
    338     int m_runOffset;
    339     bool m_atBreak;
    340 
    341     SimplifiedBackwardsTextIterator m_textIterator;
    342 };
    343 
    344 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
    345 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
    346 class WordAwareIterator {
    347 public:
    348     explicit WordAwareIterator(const Range*);
    349     ~WordAwareIterator();
    350 
    351     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
    352     void advance();
    353 
    354     String substring(unsigned position, unsigned length) const;
    355     UChar characterAt(unsigned index) const;
    356     int length() const;
    357 
    358     // Range of the text we're currently returning
    359     PassRefPtr<Range> range() const { return m_range; }
    360 
    361 private:
    362     Vector<UChar> m_buffer;
    363     // Did we have to look ahead in the textIterator to confirm the current chunk?
    364     bool m_didLookAhead;
    365     RefPtr<Range> m_range;
    366     TextIterator m_textIterator;
    367 };
    368 
    369 }
    370 
    371 #endif
    372