1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "FindOptions.h" 30 #include "InlineTextBox.h" 31 #include "Range.h" 32 #include <wtf/Vector.h> 33 34 namespace WebCore { 35 36 class RenderText; 37 class RenderTextFragment; 38 39 enum TextIteratorBehavior { 40 TextIteratorDefaultBehavior = 0, 41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 42 TextIteratorEntersTextControls = 1 << 1, 43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2, 44 TextIteratorIgnoresStyleVisibility = 1 << 3, 45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4 46 }; 47 48 // FIXME: Can't really answer this question correctly without knowing the white-space mode. 49 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 50 inline bool isCollapsibleWhitespace(UChar c) 51 { 52 switch (c) { 53 case ' ': 54 case '\n': 55 return true; 56 default: 57 return false; 58 } 59 } 60 61 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior); 62 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior); 63 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions); 64 65 class BitStack { 66 public: 67 BitStack(); 68 ~BitStack(); 69 70 void push(bool); 71 void pop(); 72 73 bool top() const; 74 unsigned size() const; 75 76 private: 77 unsigned m_size; 78 Vector<unsigned, 1> m_words; 79 }; 80 81 // Iterates through the DOM range, returning all the text, and 0-length boundaries 82 // at points where replaced elements break up the text flow. The text comes back in 83 // chunks so as to optimize for performance of the iteration. 84 85 class TextIterator { 86 public: 87 TextIterator(); 88 ~TextIterator(); 89 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 90 91 bool atEnd() const { return !m_positionNode; } 92 void advance(); 93 94 int length() const { return m_textLength; } 95 const UChar* characters() const { return m_textCharacters; } 96 97 PassRefPtr<Range> range() const; 98 Node* node() const; 99 100 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 101 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); 102 static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length); 103 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 104 105 private: 106 void exitNode(); 107 bool shouldRepresentNodeOffsetZero(); 108 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 109 void representNodeOffsetZero(); 110 bool handleTextNode(); 111 bool handleReplacedElement(); 112 bool handleNonTextNode(); 113 void handleTextBox(); 114 void handleTextNodeFirstLetter(RenderTextFragment*); 115 bool hasVisibleTextNode(RenderText*); 116 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 117 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset); 118 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 119 120 // Current position, not necessarily of the text being returned, but position 121 // as we walk through the DOM tree. 122 Node* m_node; 123 int m_offset; 124 bool m_handledNode; 125 bool m_handledChildren; 126 BitStack m_fullyClippedStack; 127 128 // The range. 129 Node* m_startContainer; 130 int m_startOffset; 131 Node* m_endContainer; 132 int m_endOffset; 133 Node* m_pastEndNode; 134 135 // The current text and its position, in the form to be returned from the iterator. 136 Node* m_positionNode; 137 mutable Node* m_positionOffsetBaseNode; 138 mutable int m_positionStartOffset; 139 mutable int m_positionEndOffset; 140 const UChar* m_textCharacters; 141 int m_textLength; 142 // Hold string m_textCharacters points to so we ensure it won't be deleted. 143 String m_text; 144 145 // Used when there is still some pending text from the current node; when these 146 // are false and 0, we go back to normal iterating. 147 bool m_needsAnotherNewline; 148 InlineTextBox* m_textBox; 149 // Used when iteration over :first-letter text to save pointer to 150 // remaining text box. 151 InlineTextBox* m_remainingTextBox; 152 // Used to point to RenderText object for :first-letter. 153 RenderText *m_firstLetterText; 154 155 // Used to do the whitespace collapsing logic. 156 Node* m_lastTextNode; 157 bool m_lastTextNodeEndedWithCollapsedSpace; 158 UChar m_lastCharacter; 159 160 // Used for whitespace characters that aren't in the DOM, so we can point at them. 161 UChar m_singleCharacterBuffer; 162 163 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 164 Vector<InlineTextBox*> m_sortedTextBoxes; 165 size_t m_sortedTextBoxesPosition; 166 167 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 168 bool m_hasEmitted; 169 170 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 171 // in the Range used to create the TextIterator. 172 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 173 // moveParagraphs to not clone/destroy moved content. 174 bool m_emitsCharactersBetweenAllVisiblePositions; 175 bool m_entersTextControls; 176 177 // Used when we want texts for copying, pasting, and transposing. 178 bool m_emitsTextWithoutTranscoding; 179 // Used when deciding text fragment created by :first-letter should be looked into. 180 bool m_handledFirstLetter; 181 // Used when the visibility of the style should not affect text gathering. 182 bool m_ignoresStyleVisibility; 183 // Used when emitting the special 0xFFFC character is required. 184 bool m_emitsObjectReplacementCharacters; 185 }; 186 187 // Iterates through the DOM range, returning all the text, and 0-length boundaries 188 // at points where replaced elements break up the text flow. The text comes back in 189 // chunks so as to optimize for performance of the iteration. 190 class SimplifiedBackwardsTextIterator { 191 public: 192 SimplifiedBackwardsTextIterator(); 193 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 194 195 bool atEnd() const { return !m_positionNode; } 196 void advance(); 197 198 int length() const { return m_textLength; } 199 const UChar* characters() const { return m_textCharacters; } 200 201 PassRefPtr<Range> range() const; 202 203 private: 204 void exitNode(); 205 bool handleTextNode(); 206 bool handleReplacedElement(); 207 bool handleNonTextNode(); 208 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 209 bool advanceRespectingRange(Node*); 210 211 TextIteratorBehavior m_behavior; 212 // Current position, not necessarily of the text being returned, but position 213 // as we walk through the DOM tree. 214 Node* m_node; 215 int m_offset; 216 bool m_handledNode; 217 bool m_handledChildren; 218 BitStack m_fullyClippedStack; 219 220 // End of the range. 221 Node* m_startNode; 222 int m_startOffset; 223 // Start of the range. 224 Node* m_endNode; 225 int m_endOffset; 226 227 // The current text and its position, in the form to be returned from the iterator. 228 Node* m_positionNode; 229 int m_positionStartOffset; 230 int m_positionEndOffset; 231 const UChar* m_textCharacters; 232 int m_textLength; 233 234 // Used to do the whitespace logic. 235 Node* m_lastTextNode; 236 UChar m_lastCharacter; 237 238 // Used for whitespace characters that aren't in the DOM, so we can point at them. 239 UChar m_singleCharacterBuffer; 240 241 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 242 bool m_havePassedStartNode; 243 }; 244 245 // Builds on the text iterator, adding a character position so we can walk one 246 // character at a time, or faster, as needed. Useful for searching. 247 class CharacterIterator { 248 public: 249 CharacterIterator(); 250 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 251 252 void advance(int numCharacters); 253 254 bool atBreak() const { return m_atBreak; } 255 bool atEnd() const { return m_textIterator.atEnd(); } 256 257 int length() const { return m_textIterator.length() - m_runOffset; } 258 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; } 259 String string(int numChars); 260 261 int characterOffset() const { return m_offset; } 262 PassRefPtr<Range> range() const; 263 264 private: 265 int m_offset; 266 int m_runOffset; 267 bool m_atBreak; 268 269 TextIterator m_textIterator; 270 }; 271 272 class BackwardsCharacterIterator { 273 public: 274 BackwardsCharacterIterator(); 275 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 276 277 void advance(int); 278 279 bool atEnd() const { return m_textIterator.atEnd(); } 280 281 PassRefPtr<Range> range() const; 282 283 private: 284 TextIteratorBehavior m_behavior; 285 int m_offset; 286 int m_runOffset; 287 bool m_atBreak; 288 289 SimplifiedBackwardsTextIterator m_textIterator; 290 }; 291 292 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 293 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 294 class WordAwareIterator { 295 public: 296 WordAwareIterator(); 297 explicit WordAwareIterator(const Range*); 298 ~WordAwareIterator(); 299 300 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 301 void advance(); 302 303 int length() const; 304 const UChar* characters() const; 305 306 // Range of the text we're currently returning 307 PassRefPtr<Range> range() const { return m_range; } 308 309 private: 310 // text from the previous chunk from the textIterator 311 const UChar* m_previousText; 312 int m_previousLength; 313 314 // many chunks from textIterator concatenated 315 Vector<UChar> m_buffer; 316 317 // Did we have to look ahead in the textIterator to confirm the current chunk? 318 bool m_didLookAhead; 319 320 RefPtr<Range> m_range; 321 322 TextIterator m_textIterator; 323 }; 324 325 } 326 327 #endif 328