1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "core/dom/Range.h" 30 #include "core/editing/FindOptions.h" 31 #include "wtf/Vector.h" 32 33 namespace WebCore { 34 35 class InlineTextBox; 36 class RenderText; 37 class RenderTextFragment; 38 39 enum TextIteratorBehavior { 40 TextIteratorDefaultBehavior = 0, 41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 42 TextIteratorEntersTextControls = 1 << 1, 43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2, 44 TextIteratorIgnoresStyleVisibility = 1 << 3, 45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4, 46 TextIteratorEmitsOriginalText = 1 << 5, 47 TextIteratorStopsOnFormControls = 1 << 6, 48 TextIteratorEmitsImageAltText = 1 << 7, 49 }; 50 51 // FIXME: Can't really answer this question correctly without knowing the white-space mode. 52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 53 inline bool isCollapsibleWhitespace(UChar c) 54 { 55 switch (c) { 56 case ' ': 57 case '\n': 58 return true; 59 default: 60 return false; 61 } 62 } 63 64 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); 65 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions); 66 67 class BitStack { 68 public: 69 BitStack(); 70 ~BitStack(); 71 72 void push(bool); 73 void pop(); 74 75 bool top() const; 76 unsigned size() const; 77 78 private: 79 unsigned m_size; 80 Vector<unsigned, 1> m_words; 81 }; 82 83 // Iterates through the DOM range, returning all the text, and 0-length boundaries 84 // at points where replaced elements break up the text flow. The text comes back in 85 // chunks so as to optimize for performance of the iteration. 86 87 class TextIterator { 88 public: 89 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 90 ~TextIterator(); 91 92 bool atEnd() const { return !m_positionNode || m_shouldStop; } 93 void advance(); 94 95 int length() const { return m_textLength; } 96 UChar characterAt(unsigned index) const; 97 String substring(unsigned position, unsigned length) const; 98 void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const; 99 100 template<typename BufferType> 101 void appendTextTo(BufferType& output, unsigned position = 0) 102 { 103 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length())); 104 unsigned lengthToAppend = length() - position; 105 if (!lengthToAppend) 106 return; 107 if (m_singleCharacterBuffer) { 108 ASSERT(!position); 109 ASSERT(length() == 1); 110 output.append(&m_singleCharacterBuffer, 1); 111 } else { 112 string().appendTo(output, startOffset() + position, lengthToAppend); 113 } 114 } 115 116 PassRefPtr<Range> range() const; 117 Node* node() const; 118 119 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 120 static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); 121 static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length); 122 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 123 124 private: 125 int startOffset() const { return m_positionStartOffset; } 126 const String& string() const { return m_text; } 127 void exitNode(); 128 bool shouldRepresentNodeOffsetZero(); 129 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 130 void representNodeOffsetZero(); 131 bool handleTextNode(); 132 bool handleReplacedElement(); 133 bool handleNonTextNode(); 134 void handleTextBox(); 135 void handleTextNodeFirstLetter(RenderTextFragment*); 136 bool hasVisibleTextNode(RenderText*); 137 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 138 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset); 139 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 140 141 // Current position, not necessarily of the text being returned, but position 142 // as we walk through the DOM tree. 143 Node* m_node; 144 int m_offset; 145 bool m_handledNode; 146 bool m_handledChildren; 147 BitStack m_fullyClippedStack; 148 149 // The range. 150 Node* m_startContainer; 151 int m_startOffset; 152 Node* m_endContainer; 153 int m_endOffset; 154 Node* m_pastEndNode; 155 156 // The current text and its position, in the form to be returned from the iterator. 157 Node* m_positionNode; 158 mutable Node* m_positionOffsetBaseNode; 159 mutable int m_positionStartOffset; 160 mutable int m_positionEndOffset; 161 int m_textLength; 162 String m_text; 163 164 // Used when there is still some pending text from the current node; when these 165 // are false and 0, we go back to normal iterating. 166 bool m_needsAnotherNewline; 167 InlineTextBox* m_textBox; 168 // Used when iteration over :first-letter text to save pointer to 169 // remaining text box. 170 InlineTextBox* m_remainingTextBox; 171 // Used to point to RenderText object for :first-letter. 172 RenderText *m_firstLetterText; 173 174 // Used to do the whitespace collapsing logic. 175 Node* m_lastTextNode; 176 bool m_lastTextNodeEndedWithCollapsedSpace; 177 UChar m_lastCharacter; 178 179 // Used for whitespace characters that aren't in the DOM, so we can point at them. 180 // If non-zero, overrides m_text. 181 UChar m_singleCharacterBuffer; 182 183 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 184 Vector<InlineTextBox*> m_sortedTextBoxes; 185 size_t m_sortedTextBoxesPosition; 186 187 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 188 bool m_hasEmitted; 189 190 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 191 // in the Range used to create the TextIterator. 192 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 193 // moveParagraphs to not clone/destroy moved content. 194 bool m_emitsCharactersBetweenAllVisiblePositions; 195 bool m_entersTextControls; 196 197 // Used when we want texts for copying, pasting, and transposing. 198 bool m_emitsTextWithoutTranscoding; 199 // Used in pasting inside password field. 200 bool m_emitsOriginalText; 201 // Used when deciding text fragment created by :first-letter should be looked into. 202 bool m_handledFirstLetter; 203 // Used when the visibility of the style should not affect text gathering. 204 bool m_ignoresStyleVisibility; 205 // Used when emitting the special 0xFFFC character is required. 206 bool m_emitsObjectReplacementCharacters; 207 // Used when the iteration should stop if form controls are reached. 208 bool m_stopsOnFormControls; 209 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 210 bool m_shouldStop; 211 212 bool m_emitsImageAltText; 213 }; 214 215 // Iterates through the DOM range, returning all the text, and 0-length boundaries 216 // at points where replaced elements break up the text flow. The text comes back in 217 // chunks so as to optimize for performance of the iteration. 218 class SimplifiedBackwardsTextIterator { 219 public: 220 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 221 222 bool atEnd() const { return !m_positionNode || m_shouldStop; } 223 void advance(); 224 225 int length() const { return m_textLength; } 226 227 template<typename BufferType> 228 void prependTextTo(BufferType& output) 229 { 230 if (!m_textLength) 231 return; 232 if (m_singleCharacterBuffer) 233 output.prepend(&m_singleCharacterBuffer, 1); 234 else 235 m_textContainer.prependTo(output, m_textOffset, m_textLength); 236 } 237 238 PassRefPtr<Range> range() const; 239 240 private: 241 void exitNode(); 242 bool handleTextNode(); 243 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); 244 bool handleReplacedElement(); 245 bool handleNonTextNode(); 246 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 247 bool advanceRespectingRange(Node*); 248 249 // Current position, not necessarily of the text being returned, but position 250 // as we walk through the DOM tree. 251 Node* m_node; 252 int m_offset; 253 bool m_handledNode; 254 bool m_handledChildren; 255 BitStack m_fullyClippedStack; 256 257 // End of the range. 258 Node* m_startNode; 259 int m_startOffset; 260 // Start of the range. 261 Node* m_endNode; 262 int m_endOffset; 263 264 // The current text and its position, in the form to be returned from the iterator. 265 Node* m_positionNode; 266 int m_positionStartOffset; 267 int m_positionEndOffset; 268 269 String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer. 270 int m_textOffset; 271 int m_textLength; 272 273 // Used to do the whitespace logic. 274 Node* m_lastTextNode; 275 UChar m_lastCharacter; 276 277 // Used for whitespace characters that aren't in the DOM, so we can point at them. 278 UChar m_singleCharacterBuffer; 279 280 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 281 bool m_havePassedStartNode; 282 283 // Should handle first-letter renderer in the next call to handleTextNode. 284 bool m_shouldHandleFirstLetter; 285 286 // Used when the iteration should stop if form controls are reached. 287 bool m_stopsOnFormControls; 288 289 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 290 bool m_shouldStop; 291 292 // Used in pasting inside password field. 293 bool m_emitsOriginalText; 294 }; 295 296 // Builds on the text iterator, adding a character position so we can walk one 297 // character at a time, or faster, as needed. Useful for searching. 298 class CharacterIterator { 299 public: 300 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 301 302 void advance(int numCharacters); 303 304 bool atBreak() const { return m_atBreak; } 305 bool atEnd() const { return m_textIterator.atEnd(); } 306 307 int length() const { return m_textIterator.length() - m_runOffset; } 308 UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); } 309 310 template<typename BufferType> 311 void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); } 312 313 String string(int numChars); 314 315 int characterOffset() const { return m_offset; } 316 PassRefPtr<Range> range() const; 317 318 private: 319 int m_offset; 320 int m_runOffset; 321 bool m_atBreak; 322 323 TextIterator m_textIterator; 324 }; 325 326 class BackwardsCharacterIterator { 327 public: 328 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 329 330 void advance(int); 331 332 bool atEnd() const { return m_textIterator.atEnd(); } 333 334 PassRefPtr<Range> range() const; 335 336 private: 337 int m_offset; 338 int m_runOffset; 339 bool m_atBreak; 340 341 SimplifiedBackwardsTextIterator m_textIterator; 342 }; 343 344 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 345 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 346 class WordAwareIterator { 347 public: 348 explicit WordAwareIterator(const Range*); 349 ~WordAwareIterator(); 350 351 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 352 void advance(); 353 354 String substring(unsigned position, unsigned length) const; 355 UChar characterAt(unsigned index) const; 356 int length() const; 357 358 // Range of the text we're currently returning 359 PassRefPtr<Range> range() const { return m_range; } 360 361 private: 362 Vector<UChar> m_buffer; 363 // Did we have to look ahead in the textIterator to confirm the current chunk? 364 bool m_didLookAhead; 365 RefPtr<Range> m_range; 366 TextIterator m_textIterator; 367 }; 368 369 } 370 371 #endif 372