1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "InlineTextBox.h" 30 #include "Range.h" 31 #include <wtf/Vector.h> 32 33 namespace WebCore { 34 35 // FIXME: Can't really answer this question correctly without knowing the white-space mode. 36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 37 inline bool isCollapsibleWhitespace(UChar c) 38 { 39 switch (c) { 40 case ' ': 41 case '\n': 42 return true; 43 default: 44 return false; 45 } 46 } 47 48 String plainText(const Range*); 49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString); 50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive); 51 52 class BitStack { 53 public: 54 BitStack(); 55 56 void push(bool); 57 void pop(); 58 59 bool top() const; 60 unsigned size() const; 61 62 private: 63 unsigned m_size; 64 Vector<unsigned, 1> m_words; 65 }; 66 67 // Iterates through the DOM range, returning all the text, and 0-length boundaries 68 // at points where replaced elements break up the text flow. The text comes back in 69 // chunks so as to optimize for performance of the iteration. 70 71 class TextIterator { 72 public: 73 TextIterator(); 74 explicit TextIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false); 75 76 bool atEnd() const { return !m_positionNode; } 77 void advance(); 78 79 int length() const { return m_textLength; } 80 const UChar* characters() const { return m_textCharacters; } 81 82 PassRefPtr<Range> range() const; 83 Node* node() const; 84 85 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 86 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); 87 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 88 89 private: 90 void exitNode(); 91 bool shouldRepresentNodeOffsetZero(); 92 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 93 void representNodeOffsetZero(); 94 bool handleTextNode(); 95 bool handleReplacedElement(); 96 bool handleNonTextNode(); 97 void handleTextBox(); 98 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 99 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 100 101 // Current position, not necessarily of the text being returned, but position 102 // as we walk through the DOM tree. 103 Node* m_node; 104 int m_offset; 105 bool m_handledNode; 106 bool m_handledChildren; 107 BitStack m_fullyClippedStack; 108 109 // The range. 110 Node* m_startContainer; 111 int m_startOffset; 112 Node* m_endContainer; 113 int m_endOffset; 114 Node* m_pastEndNode; 115 116 // The current text and its position, in the form to be returned from the iterator. 117 Node* m_positionNode; 118 mutable Node* m_positionOffsetBaseNode; 119 mutable int m_positionStartOffset; 120 mutable int m_positionEndOffset; 121 const UChar* m_textCharacters; 122 int m_textLength; 123 124 // Used when there is still some pending text from the current node; when these 125 // are false and 0, we go back to normal iterating. 126 bool m_needAnotherNewline; 127 InlineTextBox* m_textBox; 128 129 // Used to do the whitespace collapsing logic. 130 Node* m_lastTextNode; 131 bool m_lastTextNodeEndedWithCollapsedSpace; 132 UChar m_lastCharacter; 133 134 // Used for whitespace characters that aren't in the DOM, so we can point at them. 135 UChar m_singleCharacterBuffer; 136 137 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 138 Vector<InlineTextBox*> m_sortedTextBoxes; 139 size_t m_sortedTextBoxesPosition; 140 141 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 142 bool m_haveEmitted; 143 144 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 145 // in the Range used to create the TextIterator. 146 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 147 // moveParagraphs to not clone/destroy moved content. 148 bool m_emitCharactersBetweenAllVisiblePositions; 149 bool m_enterTextControls; 150 }; 151 152 // Iterates through the DOM range, returning all the text, and 0-length boundaries 153 // at points where replaced elements break up the text flow. The text comes back in 154 // chunks so as to optimize for performance of the iteration. 155 class SimplifiedBackwardsTextIterator { 156 public: 157 SimplifiedBackwardsTextIterator(); 158 explicit SimplifiedBackwardsTextIterator(const Range*); 159 160 bool atEnd() const { return !m_positionNode; } 161 void advance(); 162 163 int length() const { return m_textLength; } 164 const UChar* characters() const { return m_textCharacters; } 165 166 PassRefPtr<Range> range() const; 167 168 private: 169 void exitNode(); 170 bool handleTextNode(); 171 bool handleReplacedElement(); 172 bool handleNonTextNode(); 173 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 174 175 // Current position, not necessarily of the text being returned, but position 176 // as we walk through the DOM tree. 177 Node* m_node; 178 int m_offset; 179 bool m_handledNode; 180 bool m_handledChildren; 181 BitStack m_fullyClippedStack; 182 183 // End of the range. 184 Node* m_startNode; 185 int m_startOffset; 186 // Start of the range. 187 Node* m_endNode; 188 int m_endOffset; 189 190 // The current text and its position, in the form to be returned from the iterator. 191 Node* m_positionNode; 192 int m_positionStartOffset; 193 int m_positionEndOffset; 194 const UChar* m_textCharacters; 195 int m_textLength; 196 197 // Used to do the whitespace logic. 198 Node* m_lastTextNode; 199 UChar m_lastCharacter; 200 201 // Used for whitespace characters that aren't in the DOM, so we can point at them. 202 UChar m_singleCharacterBuffer; 203 204 // The node after the last node this iterator should process. 205 Node* m_pastStartNode; 206 }; 207 208 // Builds on the text iterator, adding a character position so we can walk one 209 // character at a time, or faster, as needed. Useful for searching. 210 class CharacterIterator { 211 public: 212 CharacterIterator(); 213 explicit CharacterIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false); 214 215 void advance(int numCharacters); 216 217 bool atBreak() const { return m_atBreak; } 218 bool atEnd() const { return m_textIterator.atEnd(); } 219 220 int length() const { return m_textIterator.length() - m_runOffset; } 221 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; } 222 String string(int numChars); 223 224 int characterOffset() const { return m_offset; } 225 PassRefPtr<Range> range() const; 226 227 private: 228 int m_offset; 229 int m_runOffset; 230 bool m_atBreak; 231 232 TextIterator m_textIterator; 233 }; 234 235 class BackwardsCharacterIterator { 236 public: 237 BackwardsCharacterIterator(); 238 explicit BackwardsCharacterIterator(const Range*); 239 240 void advance(int); 241 242 bool atEnd() const { return m_textIterator.atEnd(); } 243 244 PassRefPtr<Range> range() const; 245 246 private: 247 int m_offset; 248 int m_runOffset; 249 bool m_atBreak; 250 251 SimplifiedBackwardsTextIterator m_textIterator; 252 }; 253 254 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 255 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 256 class WordAwareIterator { 257 public: 258 WordAwareIterator(); 259 explicit WordAwareIterator(const Range*); 260 261 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 262 void advance(); 263 264 int length() const; 265 const UChar* characters() const; 266 267 // Range of the text we're currently returning 268 PassRefPtr<Range> range() const { return m_range; } 269 270 private: 271 // text from the previous chunk from the textIterator 272 const UChar* m_previousText; 273 int m_previousLength; 274 275 // many chunks from textIterator concatenated 276 Vector<UChar> m_buffer; 277 278 // Did we have to look ahead in the textIterator to confirm the current chunk? 279 bool m_didLookAhead; 280 281 RefPtr<Range> m_range; 282 283 TextIterator m_textIterator; 284 }; 285 286 } 287 288 #endif 289