1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "FindOptions.h" 30 #include "InlineTextBox.h" 31 #include "Range.h" 32 #include <wtf/Vector.h> 33 34 namespace WebCore { 35 36 class RenderText; 37 class RenderTextFragment; 38 39 enum TextIteratorBehavior { 40 TextIteratorDefaultBehavior = 0, 41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 42 TextIteratorEntersTextControls = 1 << 1, 43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2, 44 TextIteratorIgnoresStyleVisibility = 1 << 3, 45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4, 46 #if OS(ANDROID) 47 TextIteratorStopsOnFormControls = 1 << 6 48 #endif 49 }; 50 51 // FIXME: Can't really answer this question correctly without knowing the white-space mode. 52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 53 inline bool isCollapsibleWhitespace(UChar c) 54 { 55 switch (c) { 56 case ' ': 57 case '\n': 58 return true; 59 default: 60 return false; 61 } 62 } 63 64 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior); 65 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior); 66 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions); 67 68 class BitStack { 69 public: 70 BitStack(); 71 ~BitStack(); 72 73 void push(bool); 74 void pop(); 75 76 bool top() const; 77 unsigned size() const; 78 79 private: 80 unsigned m_size; 81 Vector<unsigned, 1> m_words; 82 }; 83 84 // Iterates through the DOM range, returning all the text, and 0-length boundaries 85 // at points where replaced elements break up the text flow. The text comes back in 86 // chunks so as to optimize for performance of the iteration. 87 88 class TextIterator { 89 public: 90 TextIterator(); 91 ~TextIterator(); 92 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 93 94 bool atEnd() const; 95 void advance(); 96 97 int length() const { return m_textLength; } 98 const UChar* characters() const { return m_textCharacters; } 99 100 PassRefPtr<Range> range() const; 101 Node* node() const; 102 103 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 104 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); 105 static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length); 106 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 107 108 private: 109 void exitNode(); 110 bool shouldRepresentNodeOffsetZero(); 111 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 112 void representNodeOffsetZero(); 113 bool handleTextNode(); 114 bool handleReplacedElement(); 115 bool handleNonTextNode(); 116 void handleTextBox(); 117 void handleTextNodeFirstLetter(RenderTextFragment*); 118 bool hasVisibleTextNode(RenderText*); 119 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 120 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset); 121 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 122 123 // Current position, not necessarily of the text being returned, but position 124 // as we walk through the DOM tree. 125 Node* m_node; 126 int m_offset; 127 bool m_handledNode; 128 bool m_handledChildren; 129 BitStack m_fullyClippedStack; 130 131 // The range. 132 Node* m_startContainer; 133 int m_startOffset; 134 Node* m_endContainer; 135 int m_endOffset; 136 Node* m_pastEndNode; 137 138 // The current text and its position, in the form to be returned from the iterator. 139 Node* m_positionNode; 140 mutable Node* m_positionOffsetBaseNode; 141 mutable int m_positionStartOffset; 142 mutable int m_positionEndOffset; 143 const UChar* m_textCharacters; 144 int m_textLength; 145 // Hold string m_textCharacters points to so we ensure it won't be deleted. 146 String m_text; 147 148 // Used when there is still some pending text from the current node; when these 149 // are false and 0, we go back to normal iterating. 150 bool m_needsAnotherNewline; 151 InlineTextBox* m_textBox; 152 // Used when iteration over :first-letter text to save pointer to 153 // remaining text box. 154 InlineTextBox* m_remainingTextBox; 155 // Used to point to RenderText object for :first-letter. 156 RenderText *m_firstLetterText; 157 158 // Used to do the whitespace collapsing logic. 159 Node* m_lastTextNode; 160 bool m_lastTextNodeEndedWithCollapsedSpace; 161 UChar m_lastCharacter; 162 163 // Used for whitespace characters that aren't in the DOM, so we can point at them. 164 UChar m_singleCharacterBuffer; 165 166 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 167 Vector<InlineTextBox*> m_sortedTextBoxes; 168 size_t m_sortedTextBoxesPosition; 169 170 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 171 bool m_hasEmitted; 172 173 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 174 // in the Range used to create the TextIterator. 175 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 176 // moveParagraphs to not clone/destroy moved content. 177 bool m_emitsCharactersBetweenAllVisiblePositions; 178 bool m_entersTextControls; 179 180 // Used when we want texts for copying, pasting, and transposing. 181 bool m_emitsTextWithoutTranscoding; 182 // Used when deciding text fragment created by :first-letter should be looked into. 183 bool m_handledFirstLetter; 184 // Used when the visibility of the style should not affect text gathering. 185 bool m_ignoresStyleVisibility; 186 // Used when emitting the special 0xFFFC character is required. 187 bool m_emitsObjectReplacementCharacters; 188 #if OS(ANDROID) 189 // Used when the iteration should stop if form controls are reached. 190 bool m_stopsOnFormControls; 191 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 192 bool m_shouldStop; 193 #endif 194 }; 195 196 // Iterates through the DOM range, returning all the text, and 0-length boundaries 197 // at points where replaced elements break up the text flow. The text comes back in 198 // chunks so as to optimize for performance of the iteration. 199 class SimplifiedBackwardsTextIterator { 200 public: 201 SimplifiedBackwardsTextIterator(); 202 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 203 204 bool atEnd() const; 205 void advance(); 206 207 int length() const { return m_textLength; } 208 const UChar* characters() const { return m_textCharacters; } 209 210 PassRefPtr<Range> range() const; 211 212 private: 213 void exitNode(); 214 bool handleTextNode(); 215 bool handleReplacedElement(); 216 bool handleNonTextNode(); 217 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 218 bool advanceRespectingRange(Node*); 219 220 TextIteratorBehavior m_behavior; 221 // Current position, not necessarily of the text being returned, but position 222 // as we walk through the DOM tree. 223 Node* m_node; 224 int m_offset; 225 bool m_handledNode; 226 bool m_handledChildren; 227 BitStack m_fullyClippedStack; 228 229 // End of the range. 230 Node* m_startNode; 231 int m_startOffset; 232 // Start of the range. 233 Node* m_endNode; 234 int m_endOffset; 235 236 // The current text and its position, in the form to be returned from the iterator. 237 Node* m_positionNode; 238 int m_positionStartOffset; 239 int m_positionEndOffset; 240 const UChar* m_textCharacters; 241 int m_textLength; 242 243 // Used to do the whitespace logic. 244 Node* m_lastTextNode; 245 UChar m_lastCharacter; 246 247 // Used for whitespace characters that aren't in the DOM, so we can point at them. 248 UChar m_singleCharacterBuffer; 249 250 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 251 bool m_havePassedStartNode; 252 253 #if OS(ANDROID) 254 // Used when the iteration should stop if form controls are reached. 255 bool m_stopsOnFormControls; 256 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 257 bool m_shouldStop; 258 #endif 259 }; 260 261 // Builds on the text iterator, adding a character position so we can walk one 262 // character at a time, or faster, as needed. Useful for searching. 263 class CharacterIterator { 264 public: 265 CharacterIterator(); 266 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 267 268 void advance(int numCharacters); 269 270 bool atBreak() const { return m_atBreak; } 271 bool atEnd() const { return m_textIterator.atEnd(); } 272 273 int length() const { return m_textIterator.length() - m_runOffset; } 274 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; } 275 String string(int numChars); 276 277 int characterOffset() const { return m_offset; } 278 PassRefPtr<Range> range() const; 279 280 private: 281 int m_offset; 282 int m_runOffset; 283 bool m_atBreak; 284 285 TextIterator m_textIterator; 286 }; 287 288 class BackwardsCharacterIterator { 289 public: 290 BackwardsCharacterIterator(); 291 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 292 293 void advance(int); 294 295 bool atEnd() const { return m_textIterator.atEnd(); } 296 297 PassRefPtr<Range> range() const; 298 299 private: 300 TextIteratorBehavior m_behavior; 301 int m_offset; 302 int m_runOffset; 303 bool m_atBreak; 304 305 SimplifiedBackwardsTextIterator m_textIterator; 306 }; 307 308 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 309 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 310 class WordAwareIterator { 311 public: 312 WordAwareIterator(); 313 explicit WordAwareIterator(const Range*); 314 ~WordAwareIterator(); 315 316 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 317 void advance(); 318 319 int length() const; 320 const UChar* characters() const; 321 322 // Range of the text we're currently returning 323 PassRefPtr<Range> range() const { return m_range; } 324 325 private: 326 // text from the previous chunk from the textIterator 327 const UChar* m_previousText; 328 int m_previousLength; 329 330 // many chunks from textIterator concatenated 331 Vector<UChar> m_buffer; 332 333 // Did we have to look ahead in the textIterator to confirm the current chunk? 334 bool m_didLookAhead; 335 336 RefPtr<Range> m_range; 337 338 TextIterator m_textIterator; 339 }; 340 341 } 342 343 #endif 344