1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "core/dom/Range.h" 30 #include "core/editing/FindOptions.h" 31 #include "wtf/Vector.h" 32 33 namespace WebCore { 34 35 class InlineTextBox; 36 class RenderText; 37 class RenderTextFragment; 38 39 enum TextIteratorBehavior { 40 TextIteratorDefaultBehavior = 0, 41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 42 TextIteratorEntersTextControls = 1 << 1, 43 TextIteratorIgnoresStyleVisibility = 1 << 2, 44 TextIteratorEmitsOriginalText = 1 << 3, 45 TextIteratorStopsOnFormControls = 1 << 4, 46 TextIteratorEmitsImageAltText = 1 << 5, 47 TextIteratorEntersAuthorShadowRoots = 1 << 6 48 }; 49 typedef unsigned TextIteratorBehaviorFlags; 50 51 // FIXME: Can't really answer this question correctly without knowing the white-space mode. 52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 53 inline bool isCollapsibleWhitespace(UChar c) 54 { 55 switch (c) { 56 case ' ': 57 case '\n': 58 return true; 59 default: 60 return false; 61 } 62 } 63 64 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 65 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions); 66 67 class BitStack { 68 public: 69 BitStack(); 70 ~BitStack(); 71 72 void push(bool); 73 void pop(); 74 75 bool top() const; 76 unsigned size() const; 77 78 private: 79 unsigned m_size; 80 Vector<unsigned, 1> m_words; 81 }; 82 83 // Iterates through the DOM range, returning all the text, and 0-length boundaries 84 // at points where replaced elements break up the text flow. The text comes back in 85 // chunks so as to optimize for performance of the iteration. 86 87 class TextIterator { 88 public: 89 explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 90 ~TextIterator(); 91 92 bool atEnd() const { return !m_positionNode || m_shouldStop; } 93 void advance(); 94 95 int length() const { return m_textLength; } 96 UChar characterAt(unsigned index) const; 97 String substring(unsigned position, unsigned length) const; 98 void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const; 99 100 template<typename BufferType> 101 void appendTextTo(BufferType& output, unsigned position = 0) 102 { 103 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length())); 104 unsigned lengthToAppend = length() - position; 105 if (!lengthToAppend) 106 return; 107 if (m_singleCharacterBuffer) { 108 ASSERT(!position); 109 ASSERT(length() == 1); 110 output.append(&m_singleCharacterBuffer, 1); 111 } else { 112 string().appendTo(output, startOffset() + position, lengthToAppend); 113 } 114 } 115 116 PassRefPtr<Range> range() const; 117 Node* node() const; 118 119 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 120 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 121 122 private: 123 enum IterationProgress { 124 HandledNone, 125 HandledAuthorShadowRoots, 126 HandledUserAgentShadowRoot, 127 HandledNode, 128 HandledChildren 129 }; 130 131 int startOffset() const { return m_positionStartOffset; } 132 const String& string() const { return m_text; } 133 void exitNode(); 134 bool shouldRepresentNodeOffsetZero(); 135 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 136 void representNodeOffsetZero(); 137 bool handleTextNode(); 138 bool handleReplacedElement(); 139 bool handleNonTextNode(); 140 void handleTextBox(); 141 void handleTextNodeFirstLetter(RenderTextFragment*); 142 bool hasVisibleTextNode(RenderText*); 143 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 144 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset); 145 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 146 147 // Current position, not necessarily of the text being returned, but position 148 // as we walk through the DOM tree. 149 Node* m_node; 150 int m_offset; 151 IterationProgress m_iterationProgress; 152 BitStack m_fullyClippedStack; 153 int m_shadowDepth; 154 155 // The range. 156 Node* m_startContainer; 157 int m_startOffset; 158 Node* m_endContainer; 159 int m_endOffset; 160 Node* m_pastEndNode; 161 162 // The current text and its position, in the form to be returned from the iterator. 163 Node* m_positionNode; 164 mutable Node* m_positionOffsetBaseNode; 165 mutable int m_positionStartOffset; 166 mutable int m_positionEndOffset; 167 int m_textLength; 168 String m_text; 169 170 // Used when there is still some pending text from the current node; when these 171 // are false and 0, we go back to normal iterating. 172 bool m_needsAnotherNewline; 173 InlineTextBox* m_textBox; 174 // Used when iteration over :first-letter text to save pointer to 175 // remaining text box. 176 InlineTextBox* m_remainingTextBox; 177 // Used to point to RenderText object for :first-letter. 178 RenderText *m_firstLetterText; 179 180 // Used to do the whitespace collapsing logic. 181 Node* m_lastTextNode; 182 bool m_lastTextNodeEndedWithCollapsedSpace; 183 UChar m_lastCharacter; 184 185 // Used for whitespace characters that aren't in the DOM, so we can point at them. 186 // If non-zero, overrides m_text. 187 UChar m_singleCharacterBuffer; 188 189 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 190 Vector<InlineTextBox*> m_sortedTextBoxes; 191 size_t m_sortedTextBoxesPosition; 192 193 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 194 bool m_hasEmitted; 195 196 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 197 // in the Range used to create the TextIterator. 198 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 199 // moveParagraphs to not clone/destroy moved content. 200 bool m_emitsCharactersBetweenAllVisiblePositions; 201 bool m_entersTextControls; 202 203 // Used in pasting inside password field. 204 bool m_emitsOriginalText; 205 // Used when deciding text fragment created by :first-letter should be looked into. 206 bool m_handledFirstLetter; 207 // Used when the visibility of the style should not affect text gathering. 208 bool m_ignoresStyleVisibility; 209 // Used when the iteration should stop if form controls are reached. 210 bool m_stopsOnFormControls; 211 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 212 bool m_shouldStop; 213 214 bool m_emitsImageAltText; 215 216 bool m_entersAuthorShadowRoots; 217 }; 218 219 // Iterates through the DOM range, returning all the text, and 0-length boundaries 220 // at points where replaced elements break up the text flow. The text comes back in 221 // chunks so as to optimize for performance of the iteration. 222 class SimplifiedBackwardsTextIterator { 223 public: 224 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 225 226 bool atEnd() const { return !m_positionNode || m_shouldStop; } 227 void advance(); 228 229 int length() const { return m_textLength; } 230 231 Node* node() const { return m_node; } 232 233 template<typename BufferType> 234 void prependTextTo(BufferType& output) 235 { 236 if (!m_textLength) 237 return; 238 if (m_singleCharacterBuffer) 239 output.prepend(&m_singleCharacterBuffer, 1); 240 else 241 m_textContainer.prependTo(output, m_textOffset, m_textLength); 242 } 243 244 PassRefPtr<Range> range() const; 245 246 private: 247 void exitNode(); 248 bool handleTextNode(); 249 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); 250 bool handleReplacedElement(); 251 bool handleNonTextNode(); 252 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 253 bool advanceRespectingRange(Node*); 254 255 // Current position, not necessarily of the text being returned, but position 256 // as we walk through the DOM tree. 257 Node* m_node; 258 int m_offset; 259 bool m_handledNode; 260 bool m_handledChildren; 261 BitStack m_fullyClippedStack; 262 263 // End of the range. 264 Node* m_startNode; 265 int m_startOffset; 266 // Start of the range. 267 Node* m_endNode; 268 int m_endOffset; 269 270 // The current text and its position, in the form to be returned from the iterator. 271 Node* m_positionNode; 272 int m_positionStartOffset; 273 int m_positionEndOffset; 274 275 String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer. 276 int m_textOffset; 277 int m_textLength; 278 279 // Used to do the whitespace logic. 280 Node* m_lastTextNode; 281 UChar m_lastCharacter; 282 283 // Used for whitespace characters that aren't in the DOM, so we can point at them. 284 UChar m_singleCharacterBuffer; 285 286 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 287 bool m_havePassedStartNode; 288 289 // Should handle first-letter renderer in the next call to handleTextNode. 290 bool m_shouldHandleFirstLetter; 291 292 // Used when the iteration should stop if form controls are reached. 293 bool m_stopsOnFormControls; 294 295 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 296 bool m_shouldStop; 297 298 // Used in pasting inside password field. 299 bool m_emitsOriginalText; 300 }; 301 302 // Builds on the text iterator, adding a character position so we can walk one 303 // character at a time, or faster, as needed. Useful for searching. 304 class CharacterIterator { 305 public: 306 explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 307 308 void advance(int numCharacters); 309 310 bool atBreak() const { return m_atBreak; } 311 bool atEnd() const { return m_textIterator.atEnd(); } 312 313 int length() const { return m_textIterator.length() - m_runOffset; } 314 UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); } 315 316 template<typename BufferType> 317 void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); } 318 319 String string(int numChars); 320 321 int characterOffset() const { return m_offset; } 322 PassRefPtr<Range> range() const; 323 324 private: 325 int m_offset; 326 int m_runOffset; 327 bool m_atBreak; 328 329 TextIterator m_textIterator; 330 }; 331 332 class BackwardsCharacterIterator { 333 public: 334 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 335 336 void advance(int); 337 338 bool atEnd() const { return m_textIterator.atEnd(); } 339 340 PassRefPtr<Range> range() const; 341 342 private: 343 int m_offset; 344 int m_runOffset; 345 bool m_atBreak; 346 347 SimplifiedBackwardsTextIterator m_textIterator; 348 }; 349 350 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 351 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 352 class WordAwareIterator { 353 public: 354 explicit WordAwareIterator(const Range*); 355 ~WordAwareIterator(); 356 357 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 358 void advance(); 359 360 String substring(unsigned position, unsigned length) const; 361 UChar characterAt(unsigned index) const; 362 int length() const; 363 364 // Range of the text we're currently returning 365 PassRefPtr<Range> range() const { return m_range; } 366 367 private: 368 Vector<UChar> m_buffer; 369 // Did we have to look ahead in the textIterator to confirm the current chunk? 370 bool m_didLookAhead; 371 RefPtr<Range> m_range; 372 TextIterator m_textIterator; 373 }; 374 375 } 376 377 #endif 378