1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "TextIterator.h" 29 30 #include "Document.h" 31 #include "Frame.h" 32 #include "HTMLElement.h" 33 #include "HTMLNames.h" 34 #include "htmlediting.h" 35 #include "InlineTextBox.h" 36 #include "Range.h" 37 #include "RenderTableCell.h" 38 #include "RenderTableRow.h" 39 #include "RenderTextControl.h" 40 #include "RenderTextFragment.h" 41 #include "TextBoundaries.h" 42 #include "TextBreakIterator.h" 43 #include "VisiblePosition.h" 44 #include "visible_units.h" 45 #include <wtf/unicode/CharacterNames.h> 46 47 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION 48 #include "TextBreakIteratorInternalICU.h" 49 #include <unicode/usearch.h> 50 #endif 51 52 using namespace WTF::Unicode; 53 using namespace std; 54 55 namespace WebCore { 56 57 using namespace HTMLNames; 58 59 // Buffer that knows how to compare with a search target. 60 // Keeps enough of the previous text to be able to search in the future, but no more. 61 // Non-breaking spaces are always equal to normal spaces. 62 // Case folding is also done if the CaseInsensitive option is specified. 63 // Matches are further filtered if the AtWordStarts option is specified, although some 64 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 65 class SearchBuffer { 66 WTF_MAKE_NONCOPYABLE(SearchBuffer); 67 public: 68 SearchBuffer(const String& target, FindOptions); 69 ~SearchBuffer(); 70 71 // Returns number of characters appended; guaranteed to be in the range [1, length]. 72 size_t append(const UChar*, size_t length); 73 bool needsMoreContext() const; 74 void prependContext(const UChar*, size_t length); 75 void reachedBreak(); 76 77 // Result is the size in characters of what was found. 78 // And <startOffset> is the number of characters back to the start of what was found. 79 size_t search(size_t& startOffset); 80 bool atBreak() const; 81 82 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION 83 84 private: 85 bool isBadMatch(const UChar*, size_t length) const; 86 bool isWordStartMatch(size_t start, size_t length) const; 87 88 String m_target; 89 FindOptions m_options; 90 91 Vector<UChar> m_buffer; 92 size_t m_overlap; 93 size_t m_prefixLength; 94 bool m_atBreak; 95 bool m_needsMoreContext; 96 97 bool m_targetRequiresKanaWorkaround; 98 Vector<UChar> m_normalizedTarget; 99 mutable Vector<UChar> m_normalizedMatch; 100 101 #else 102 103 private: 104 void append(UChar, bool isCharacterStart); 105 size_t length() const; 106 107 String m_target; 108 FindOptions m_options; 109 110 Vector<UChar> m_buffer; 111 Vector<bool> m_isCharacterStartBuffer; 112 bool m_isBufferFull; 113 size_t m_cursor; 114 115 #endif 116 }; 117 118 // -------- 119 120 static const unsigned bitsInWord = sizeof(unsigned) * 8; 121 static const unsigned bitInWordMask = bitsInWord - 1; 122 123 BitStack::BitStack() 124 : m_size(0) 125 { 126 } 127 128 BitStack::~BitStack() 129 { 130 } 131 132 void BitStack::push(bool bit) 133 { 134 unsigned index = m_size / bitsInWord; 135 unsigned shift = m_size & bitInWordMask; 136 if (!shift && index == m_words.size()) { 137 m_words.grow(index + 1); 138 m_words[index] = 0; 139 } 140 unsigned& word = m_words[index]; 141 unsigned mask = 1U << shift; 142 if (bit) 143 word |= mask; 144 else 145 word &= ~mask; 146 ++m_size; 147 } 148 149 void BitStack::pop() 150 { 151 if (m_size) 152 --m_size; 153 } 154 155 bool BitStack::top() const 156 { 157 if (!m_size) 158 return false; 159 unsigned shift = (m_size - 1) & bitInWordMask; 160 return m_words.last() & (1U << shift); 161 } 162 163 unsigned BitStack::size() const 164 { 165 return m_size; 166 } 167 168 // -------- 169 170 #if !ASSERT_DISABLED 171 172 static unsigned depthCrossingShadowBoundaries(Node* node) 173 { 174 unsigned depth = 0; 175 for (Node* parent = node->parentOrHostNode(); parent; parent = parent->parentOrHostNode()) 176 ++depth; 177 return depth; 178 } 179 180 #endif 181 182 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 183 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) 184 { 185 if (!rangeEndContainer) 186 return 0; 187 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { 188 if (Node* next = rangeEndContainer->childNode(rangeEndOffset)) 189 return next; 190 } 191 for (Node* node = rangeEndContainer; node; node = node->parentOrHostNode()) { 192 if (Node* next = node->nextSibling()) 193 return next; 194 } 195 return 0; 196 } 197 198 // -------- 199 200 static inline bool fullyClipsContents(Node* node) 201 { 202 RenderObject* renderer = node->renderer(); 203 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 204 return false; 205 return toRenderBox(renderer)->size().isEmpty(); 206 } 207 208 static inline bool ignoresContainerClip(Node* node) 209 { 210 RenderObject* renderer = node->renderer(); 211 if (!renderer || renderer->isText()) 212 return false; 213 EPosition position = renderer->style()->position(); 214 return position == AbsolutePosition || position == FixedPosition; 215 } 216 217 static void pushFullyClippedState(BitStack& stack, Node* node) 218 { 219 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 220 221 // Push true if this node full clips its contents, or if a parent already has fully 222 // clipped and this is not a node that ignores its container's clip. 223 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 224 } 225 226 static void setUpFullyClippedStack(BitStack& stack, Node* node) 227 { 228 // Put the nodes in a vector so we can iterate in reverse order. 229 Vector<Node*, 100> ancestry; 230 for (Node* parent = node->parentOrHostNode(); parent; parent = parent->parentOrHostNode()) 231 ancestry.append(parent); 232 233 // Call pushFullyClippedState on each node starting with the earliest ancestor. 234 size_t size = ancestry.size(); 235 for (size_t i = 0; i < size; ++i) 236 pushFullyClippedState(stack, ancestry[size - i - 1]); 237 pushFullyClippedState(stack, node); 238 239 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 240 } 241 242 #if OS(ANDROID) 243 static bool checkFormControlElement(Node* startNode) 244 { 245 Node* node = startNode; 246 while (node) { 247 if (node->isElementNode() && static_cast<Element*>(node)->isFormControlElement()) 248 return true; 249 node = node->parentNode(); 250 } 251 return false; 252 } 253 #endif 254 255 256 // -------- 257 258 TextIterator::TextIterator() 259 : m_startContainer(0) 260 , m_startOffset(0) 261 , m_endContainer(0) 262 , m_endOffset(0) 263 , m_positionNode(0) 264 , m_textCharacters(0) 265 , m_textLength(0) 266 , m_remainingTextBox(0) 267 , m_firstLetterText(0) 268 , m_lastCharacter(0) 269 , m_emitsCharactersBetweenAllVisiblePositions(false) 270 , m_entersTextControls(false) 271 , m_emitsTextWithoutTranscoding(false) 272 , m_handledFirstLetter(false) 273 , m_ignoresStyleVisibility(false) 274 , m_emitsObjectReplacementCharacters(false) 275 #if OS(ANDROID) 276 , m_stopsOnFormControls(false) 277 , m_shouldStop(false) 278 #endif 279 { 280 } 281 282 TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior) 283 : m_startContainer(0) 284 , m_startOffset(0) 285 , m_endContainer(0) 286 , m_endOffset(0) 287 , m_positionNode(0) 288 , m_textCharacters(0) 289 , m_textLength(0) 290 , m_remainingTextBox(0) 291 , m_firstLetterText(0) 292 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 293 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 294 , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding) 295 , m_handledFirstLetter(false) 296 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 297 , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters) 298 #if OS(ANDROID) 299 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 300 , m_shouldStop(false) 301 #endif 302 { 303 if (!r) 304 return; 305 306 // get and validate the range endpoints 307 Node* startContainer = r->startContainer(); 308 if (!startContainer) 309 return; 310 int startOffset = r->startOffset(); 311 Node* endContainer = r->endContainer(); 312 int endOffset = r->endOffset(); 313 314 // Callers should be handing us well-formed ranges. If we discover that this isn't 315 // the case, we could consider changing this assertion to an early return. 316 ASSERT(r->boundaryPointsValid()); 317 318 // remember range - this does not change 319 m_startContainer = startContainer; 320 m_startOffset = startOffset; 321 m_endContainer = endContainer; 322 m_endOffset = endOffset; 323 324 // set up the current node for processing 325 m_node = r->firstNode(); 326 if (!m_node) 327 return; 328 setUpFullyClippedStack(m_fullyClippedStack, m_node); 329 m_offset = m_node == m_startContainer ? m_startOffset : 0; 330 m_handledNode = false; 331 m_handledChildren = false; 332 333 // calculate first out of bounds node 334 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); 335 336 // initialize node processing state 337 m_needsAnotherNewline = false; 338 m_textBox = 0; 339 340 // initialize record of previous node processing 341 m_hasEmitted = false; 342 m_lastTextNode = 0; 343 m_lastTextNodeEndedWithCollapsedSpace = false; 344 m_lastCharacter = 0; 345 346 #ifndef NDEBUG 347 // need this just because of the assert in advance() 348 m_positionNode = m_node; 349 #endif 350 351 // identify the first run 352 advance(); 353 } 354 355 TextIterator::~TextIterator() 356 { 357 } 358 359 bool TextIterator::atEnd() const 360 { 361 #if OS(ANDROID) 362 return !m_positionNode || m_shouldStop; 363 #else 364 return !m_positionNode; 365 #endif 366 } 367 368 void TextIterator::advance() 369 { 370 #if OS(ANDROID) 371 if (m_shouldStop) 372 return; 373 #endif 374 // reset the run information 375 m_positionNode = 0; 376 m_textLength = 0; 377 378 // handle remembered node that needed a newline after the text node's newline 379 if (m_needsAnotherNewline) { 380 // Emit the extra newline, and position it *inside* m_node, after m_node's 381 // contents, in case it's a block, in the same way that we position the first 382 // newline. The range for the emitted newline should start where the line 383 // break begins. 384 // FIXME: It would be cleaner if we emitted two newlines during the last 385 // iteration, instead of using m_needsAnotherNewline. 386 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 387 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 388 m_needsAnotherNewline = false; 389 return; 390 } 391 392 if (!m_textBox && m_remainingTextBox) { 393 m_textBox = m_remainingTextBox; 394 m_remainingTextBox = 0; 395 m_firstLetterText = 0; 396 m_offset = 0; 397 } 398 // handle remembered text box 399 if (m_textBox) { 400 handleTextBox(); 401 if (m_positionNode) 402 return; 403 } 404 405 while (m_node && m_node != m_pastEndNode) { 406 #if OS(ANDROID) 407 if (!m_shouldStop && m_stopsOnFormControls && checkFormControlElement(m_node)) 408 m_shouldStop = true; 409 #endif 410 // if the range ends at offset 0 of an element, represent the 411 // position, but not the content, of that element e.g. if the 412 // node is a blockflow element, emit a newline that 413 // precedes the element 414 if (m_node == m_endContainer && m_endOffset == 0) { 415 representNodeOffsetZero(); 416 m_node = 0; 417 return; 418 } 419 420 RenderObject* renderer = m_node->renderer(); 421 if (!renderer) { 422 m_handledNode = true; 423 m_handledChildren = true; 424 } else { 425 // handle current node according to its type 426 if (!m_handledNode) { 427 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE? 428 m_handledNode = handleTextNode(); 429 else if (renderer && (renderer->isImage() || renderer->isWidget() || 430 (renderer->node() && renderer->node()->isElementNode() && 431 static_cast<Element*>(renderer->node())->isFormControlElement()))) 432 m_handledNode = handleReplacedElement(); 433 else 434 m_handledNode = handleNonTextNode(); 435 if (m_positionNode) 436 return; 437 } 438 } 439 440 // find a new current node to handle in depth-first manner, 441 // calling exitNode() as we come back thru a parent node 442 Node* next = m_handledChildren ? 0 : m_node->firstChild(); 443 m_offset = 0; 444 if (!next) { 445 next = m_node->nextSibling(); 446 if (!next) { 447 bool pastEnd = m_node->traverseNextNode() == m_pastEndNode; 448 Node* parentNode = m_node->parentOrHostNode(); 449 while (!next && parentNode) { 450 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 451 return; 452 bool haveRenderer = m_node->renderer(); 453 m_node = parentNode; 454 m_fullyClippedStack.pop(); 455 parentNode = m_node->parentOrHostNode(); 456 if (haveRenderer) 457 exitNode(); 458 if (m_positionNode) { 459 m_handledNode = true; 460 m_handledChildren = true; 461 return; 462 } 463 next = m_node->nextSibling(); 464 } 465 } 466 m_fullyClippedStack.pop(); 467 } 468 469 // set the new current node 470 m_node = next; 471 if (m_node) 472 pushFullyClippedState(m_fullyClippedStack, m_node); 473 m_handledNode = false; 474 m_handledChildren = false; 475 m_handledFirstLetter = false; 476 m_firstLetterText = 0; 477 478 // how would this ever be? 479 if (m_positionNode) 480 return; 481 } 482 } 483 484 bool TextIterator::handleTextNode() 485 { 486 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) 487 return false; 488 489 RenderText* renderer = toRenderText(m_node->renderer()); 490 491 m_lastTextNode = m_node; 492 String str = renderer->text(); 493 494 // handle pre-formatted text 495 if (!renderer->style()->collapseWhiteSpace()) { 496 int runStart = m_offset; 497 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 498 emitCharacter(' ', m_node, 0, runStart, runStart); 499 return false; 500 } 501 if (!m_handledFirstLetter && renderer->isTextFragment()) { 502 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 503 if (m_firstLetterText) { 504 String firstLetter = m_firstLetterText->text(); 505 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length()); 506 m_firstLetterText = 0; 507 m_textBox = 0; 508 return false; 509 } 510 } 511 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 512 return false; 513 int strLength = str.length(); 514 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; 515 int runEnd = min(strLength, end); 516 517 if (runStart >= runEnd) 518 return true; 519 520 emitText(m_node, runStart, runEnd); 521 return true; 522 } 523 524 if (!renderer->firstTextBox() && str.length() > 0) { 525 if (!m_handledFirstLetter && renderer->isTextFragment()) { 526 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 527 if (m_firstLetterText) { 528 handleTextBox(); 529 return false; 530 } 531 } 532 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 533 return false; 534 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 535 return true; 536 } 537 538 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 539 if (renderer->containsReversedText()) { 540 m_sortedTextBoxes.clear(); 541 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { 542 m_sortedTextBoxes.append(textBox); 543 } 544 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 545 m_sortedTextBoxesPosition = 0; 546 } 547 548 m_textBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 549 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) 550 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 551 handleTextBox(); 552 return true; 553 } 554 555 void TextIterator::handleTextBox() 556 { 557 RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer()); 558 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) { 559 m_textBox = 0; 560 return; 561 } 562 String str = renderer->text(); 563 unsigned start = m_offset; 564 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX; 565 while (m_textBox) { 566 unsigned textBoxStart = m_textBox->start(); 567 unsigned runStart = max(textBoxStart, start); 568 569 // Check for collapsed space at the start of this run. 570 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 571 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace 572 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); 573 if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) { 574 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { 575 unsigned spaceRunStart = runStart - 1; 576 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') 577 --spaceRunStart; 578 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); 579 } else 580 emitCharacter(' ', m_node, 0, runStart, runStart); 581 return; 582 } 583 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 584 unsigned runEnd = min(textBoxEnd, end); 585 586 // Determine what the next text box will be, but don't advance yet 587 InlineTextBox* nextTextBox = 0; 588 if (renderer->containsReversedText()) { 589 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 590 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 591 } else 592 nextTextBox = m_textBox->nextTextBox(); 593 594 if (runStart < runEnd) { 595 // Handle either a single newline character (which becomes a space), 596 // or a run of characters that does not include a newline. 597 // This effectively translates newlines to spaces without copying the text. 598 if (str[runStart] == '\n') { 599 emitCharacter(' ', m_node, 0, runStart, runStart + 1); 600 m_offset = runStart + 1; 601 } else { 602 size_t subrunEnd = str.find('\n', runStart); 603 if (subrunEnd == notFound || subrunEnd > runEnd) 604 subrunEnd = runEnd; 605 606 m_offset = subrunEnd; 607 emitText(m_node, renderer, runStart, subrunEnd); 608 } 609 610 // If we are doing a subrun that doesn't go to the end of the text box, 611 // come back again to finish handling this text box; don't advance to the next one. 612 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 613 return; 614 615 // Advance and return 616 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); 617 if (nextRunStart > runEnd) 618 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 619 m_textBox = nextTextBox; 620 if (renderer->containsReversedText()) 621 ++m_sortedTextBoxesPosition; 622 return; 623 } 624 // Advance and continue 625 m_textBox = nextTextBox; 626 if (renderer->containsReversedText()) 627 ++m_sortedTextBoxesPosition; 628 } 629 if (!m_textBox && m_remainingTextBox) { 630 m_textBox = m_remainingTextBox; 631 m_remainingTextBox = 0; 632 m_firstLetterText = 0; 633 m_offset = 0; 634 handleTextBox(); 635 } 636 } 637 638 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) 639 { 640 if (renderer->firstLetter()) { 641 RenderObject* r = renderer->firstLetter(); 642 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 643 return; 644 for (RenderObject *currChild = r->firstChild(); currChild; currChild->nextSibling()) { 645 if (currChild->isText()) { 646 RenderText* firstLetter = toRenderText(currChild); 647 m_handledFirstLetter = true; 648 m_remainingTextBox = m_textBox; 649 m_textBox = firstLetter->firstTextBox(); 650 m_firstLetterText = firstLetter; 651 return; 652 } 653 } 654 } 655 m_handledFirstLetter = true; 656 } 657 658 bool TextIterator::handleReplacedElement() 659 { 660 if (m_fullyClippedStack.top()) 661 return false; 662 663 RenderObject* renderer = m_node->renderer(); 664 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 665 return false; 666 667 if (m_lastTextNodeEndedWithCollapsedSpace) { 668 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 669 return false; 670 } 671 672 if (m_entersTextControls && renderer->isTextControl()) { 673 if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->innerTextElement()) { 674 m_node = innerTextElement->shadowTreeRootNode(); 675 pushFullyClippedState(m_fullyClippedStack, m_node); 676 m_offset = 0; 677 return false; 678 } 679 } 680 681 m_hasEmitted = true; 682 683 if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) { 684 emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1); 685 return true; 686 } 687 688 if (m_emitsCharactersBetweenAllVisiblePositions) { 689 // We want replaced elements to behave like punctuation for boundary 690 // finding, and to simply take up space for the selection preservation 691 // code in moveParagraphs, so we use a comma. 692 emitCharacter(',', m_node->parentNode(), m_node, 0, 1); 693 return true; 694 } 695 696 m_positionNode = m_node->parentNode(); 697 m_positionOffsetBaseNode = m_node; 698 m_positionStartOffset = 0; 699 m_positionEndOffset = 1; 700 701 m_textCharacters = 0; 702 m_textLength = 0; 703 704 m_lastCharacter = 0; 705 706 return true; 707 } 708 709 bool TextIterator::hasVisibleTextNode(RenderText* renderer) 710 { 711 if (renderer->style()->visibility() == VISIBLE) 712 return true; 713 if (renderer->isTextFragment()) { 714 RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer); 715 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE) 716 return true; 717 } 718 return false; 719 } 720 721 static bool shouldEmitTabBeforeNode(Node* node) 722 { 723 RenderObject* r = node->renderer(); 724 725 // Table cells are delimited by tabs. 726 if (!r || !isTableCell(node)) 727 return false; 728 729 // Want a tab before every cell other than the first one 730 RenderTableCell* rc = toRenderTableCell(r); 731 RenderTable* t = rc->table(); 732 return t && (t->cellBefore(rc) || t->cellAbove(rc)); 733 } 734 735 static bool shouldEmitNewlineForNode(Node* node) 736 { 737 // br elements are represented by a single newline. 738 RenderObject* r = node->renderer(); 739 if (!r) 740 return node->hasTagName(brTag); 741 742 return r->isBR(); 743 } 744 745 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node) 746 { 747 // Block flow (versus inline flow) is represented by having 748 // a newline both before and after the element. 749 RenderObject* r = node->renderer(); 750 if (!r) { 751 return (node->hasTagName(blockquoteTag) 752 || node->hasTagName(ddTag) 753 || node->hasTagName(divTag) 754 || node->hasTagName(dlTag) 755 || node->hasTagName(dtTag) 756 || node->hasTagName(h1Tag) 757 || node->hasTagName(h2Tag) 758 || node->hasTagName(h3Tag) 759 || node->hasTagName(h4Tag) 760 || node->hasTagName(h5Tag) 761 || node->hasTagName(h6Tag) 762 || node->hasTagName(hrTag) 763 || node->hasTagName(liTag) 764 || node->hasTagName(listingTag) 765 || node->hasTagName(olTag) 766 || node->hasTagName(pTag) 767 || node->hasTagName(preTag) 768 || node->hasTagName(trTag) 769 || node->hasTagName(ulTag)); 770 } 771 772 // Need to make an exception for table cells, because they are blocks, but we 773 // want them tab-delimited rather than having newlines before and after. 774 if (isTableCell(node)) 775 return false; 776 777 // Need to make an exception for table row elements, because they are neither 778 // "inline" or "RenderBlock", but we want newlines for them. 779 if (r->isTableRow()) { 780 RenderTable* t = toRenderTableRow(r)->table(); 781 if (t && !t->isInline()) 782 return true; 783 } 784 785 return !r->isInline() && r->isRenderBlock() && !r->isFloatingOrPositioned() && !r->isBody(); 786 } 787 788 static bool shouldEmitNewlineAfterNode(Node* node) 789 { 790 // FIXME: It should be better but slower to create a VisiblePosition here. 791 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 792 return false; 793 // Check if this is the very last renderer in the document. 794 // If so, then we should not emit a newline. 795 while ((node = node->traverseNextSibling())) 796 if (node->renderer()) 797 return true; 798 return false; 799 } 800 801 static bool shouldEmitNewlineBeforeNode(Node* node) 802 { 803 return shouldEmitNewlinesBeforeAndAfterNode(node); 804 } 805 806 static bool shouldEmitExtraNewlineForNode(Node* node) 807 { 808 // When there is a significant collapsed bottom margin, emit an extra 809 // newline for a more realistic result. We end up getting the right 810 // result even without margin collapsing. For example: <div><p>text</p></div> 811 // will work right even if both the <div> and the <p> have bottom margins. 812 RenderObject* r = node->renderer(); 813 if (!r || !r->isBox()) 814 return false; 815 816 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears 817 // not to do this at all 818 if (node->hasTagName(h1Tag) 819 || node->hasTagName(h2Tag) 820 || node->hasTagName(h3Tag) 821 || node->hasTagName(h4Tag) 822 || node->hasTagName(h5Tag) 823 || node->hasTagName(h6Tag) 824 || node->hasTagName(pTag)) { 825 RenderStyle* style = r->style(); 826 if (style) { 827 int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); 828 int fontSize = style->fontDescription().computedPixelSize(); 829 if (bottomMargin * 2 >= fontSize) 830 return true; 831 } 832 } 833 834 return false; 835 } 836 837 static int collapsedSpaceLength(RenderText* renderer, int textEnd) 838 { 839 const UChar* characters = renderer->text()->characters(); 840 int length = renderer->text()->length(); 841 for (int i = textEnd; i < length; ++i) { 842 if (!renderer->style()->isCollapsibleWhiteSpace(characters[i])) 843 return i - textEnd; 844 } 845 846 return length - textEnd; 847 } 848 849 static int maxOffsetIncludingCollapsedSpaces(Node* node) 850 { 851 int offset = caretMaxOffset(node); 852 853 if (node->renderer() && node->renderer()->isText()) 854 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); 855 856 return offset; 857 } 858 859 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 860 bool TextIterator::shouldRepresentNodeOffsetZero() 861 { 862 if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable()) 863 return true; 864 865 // Leave element positioned flush with start of a paragraph 866 // (e.g. do not insert tab before a table cell at the start of a paragraph) 867 if (m_lastCharacter == '\n') 868 return false; 869 870 // Otherwise, show the position if we have emitted any characters 871 if (m_hasEmitted) 872 return true; 873 874 // We've not emitted anything yet. Generally, there is no need for any positioning then. 875 // The only exception is when the element is visually not in the same line as 876 // the start of the range (e.g. the range starts at the end of the previous paragraph). 877 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 878 // make quicker checks to possibly avoid that. Another check that we could make is 879 // is whether the inline vs block flow changed since the previous visible element. 880 // I think we're already in a special enough case that that won't be needed, tho. 881 882 // No character needed if this is the first node in the range. 883 if (m_node == m_startContainer) 884 return false; 885 886 // If we are outside the start container's subtree, assume we need to emit. 887 // FIXME: m_startContainer could be an inline block 888 if (!m_node->isDescendantOf(m_startContainer)) 889 return true; 890 891 // If we started as m_startContainer offset 0 and the current node is a descendant of 892 // the start container, we already had enough context to correctly decide whether to 893 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 894 // so don't second guess that now. 895 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 896 // immaterial since we likely would have already emitted something by now. 897 if (m_startOffset == 0) 898 return false; 899 900 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 901 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 902 // we would create VisiblePositions on every call to this function without this check. 903 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE) 904 return false; 905 906 // The startPos.isNotNull() check is needed because the start could be before the body, 907 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 908 // The currPos.isNotNull() check is needed because positions in non-HTML content 909 // (like SVG) do not have visible positions, and we don't want to emit for them either. 910 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 911 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 912 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 913 } 914 915 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) 916 { 917 return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); 918 } 919 920 void TextIterator::representNodeOffsetZero() 921 { 922 // Emit a character to show the positioning of m_node. 923 924 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 925 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 926 // on m_node to see if it necessitates emitting a character first and will early return 927 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 928 if (shouldEmitTabBeforeNode(m_node)) { 929 if (shouldRepresentNodeOffsetZero()) 930 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); 931 } else if (shouldEmitNewlineBeforeNode(m_node)) { 932 if (shouldRepresentNodeOffsetZero()) 933 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); 934 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { 935 if (shouldRepresentNodeOffsetZero()) 936 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0); 937 } 938 } 939 940 bool TextIterator::handleNonTextNode() 941 { 942 if (shouldEmitNewlineForNode(m_node)) 943 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); 944 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) 945 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1); 946 else 947 representNodeOffsetZero(); 948 949 return true; 950 } 951 952 void TextIterator::exitNode() 953 { 954 // prevent emitting a newline when exiting a collapsed block at beginning of the range 955 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 956 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 957 // therefore look like a blank line. 958 if (!m_hasEmitted) 959 return; 960 961 // Emit with a position *inside* m_node, after m_node's contents, in 962 // case it is a block, because the run should start where the 963 // emitted character is positioned visually. 964 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 965 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 966 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 967 // TextIterator in _web_attributedStringFromRange. 968 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 969 if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) { 970 // use extra newline to represent margin bottom, as needed 971 bool addNewline = shouldEmitExtraNewlineForNode(m_node); 972 973 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 974 // contain a VisiblePosition when doing selection preservation. 975 if (m_lastCharacter != '\n') { 976 // insert a newline with a position following this block's contents. 977 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 978 // remember whether to later add a newline for the current node 979 ASSERT(!m_needsAnotherNewline); 980 m_needsAnotherNewline = addNewline; 981 } else if (addNewline) 982 // insert a newline with a position following this block's contents. 983 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 984 } 985 986 // If nothing was emitted, see if we need to emit a space. 987 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) 988 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1); 989 } 990 991 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 992 { 993 m_hasEmitted = true; 994 995 // remember information with which to construct the TextIterator::range() 996 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode 997 m_positionNode = textNode; 998 m_positionOffsetBaseNode = offsetBaseNode; 999 m_positionStartOffset = textStartOffset; 1000 m_positionEndOffset = textEndOffset; 1001 1002 // remember information with which to construct the TextIterator::characters() and length() 1003 m_singleCharacterBuffer = c; 1004 m_textCharacters = &m_singleCharacterBuffer; 1005 m_textLength = 1; 1006 1007 // remember some iteration state 1008 m_lastTextNodeEndedWithCollapsedSpace = false; 1009 m_lastCharacter = c; 1010 } 1011 1012 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset) 1013 { 1014 RenderText* renderer = toRenderText(renderObject); 1015 m_text = m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text(); 1016 ASSERT(m_text.characters()); 1017 1018 m_positionNode = textNode; 1019 m_positionOffsetBaseNode = 0; 1020 m_positionStartOffset = textStartOffset; 1021 m_positionEndOffset = textEndOffset; 1022 m_textCharacters = m_text.characters() + textStartOffset; 1023 m_textLength = textEndOffset - textStartOffset; 1024 m_lastCharacter = m_text[textEndOffset - 1]; 1025 1026 m_lastTextNodeEndedWithCollapsedSpace = false; 1027 m_hasEmitted = true; 1028 } 1029 1030 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) 1031 { 1032 emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset); 1033 } 1034 1035 PassRefPtr<Range> TextIterator::range() const 1036 { 1037 // use the current run information, if we have it 1038 if (m_positionNode) { 1039 if (m_positionOffsetBaseNode) { 1040 int index = m_positionOffsetBaseNode->nodeIndex(); 1041 m_positionStartOffset += index; 1042 m_positionEndOffset += index; 1043 m_positionOffsetBaseNode = 0; 1044 } 1045 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1046 } 1047 1048 // otherwise, return the end of the overall range we were given 1049 if (m_endContainer) 1050 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); 1051 1052 return 0; 1053 } 1054 1055 Node* TextIterator::node() const 1056 { 1057 RefPtr<Range> textRange = range(); 1058 if (!textRange) 1059 return 0; 1060 1061 Node* node = textRange->startContainer(); 1062 if (!node) 1063 return 0; 1064 if (node->offsetInCharacters()) 1065 return node; 1066 1067 return node->childNode(textRange->startOffset()); 1068 } 1069 1070 // -------- 1071 1072 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator() 1073 : m_behavior(TextIteratorDefaultBehavior) 1074 , m_node(0) 1075 , m_positionNode(0) 1076 #if OS(ANDROID) 1077 , m_stopsOnFormControls(false) 1078 , m_shouldStop(false) 1079 #endif 1080 { 1081 } 1082 1083 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior) 1084 : m_behavior(behavior) 1085 , m_node(0) 1086 , m_positionNode(0) 1087 #if OS(ANDROID) 1088 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1089 , m_shouldStop(false) 1090 #endif 1091 { 1092 #if OS(ANDROID) 1093 ASSERT(m_behavior == TextIteratorDefaultBehavior || m_behavior == TextIteratorStopsOnFormControls); 1094 #else 1095 ASSERT(m_behavior == TextIteratorDefaultBehavior); 1096 #endif 1097 1098 if (!r) 1099 return; 1100 1101 Node* startNode = r->startContainer(); 1102 if (!startNode) 1103 return; 1104 Node* endNode = r->endContainer(); 1105 int startOffset = r->startOffset(); 1106 int endOffset = r->endOffset(); 1107 1108 if (!startNode->offsetInCharacters()) { 1109 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { 1110 startNode = startNode->childNode(startOffset); 1111 startOffset = 0; 1112 } 1113 } 1114 if (!endNode->offsetInCharacters()) { 1115 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) { 1116 endNode = endNode->childNode(endOffset - 1); 1117 endOffset = lastOffsetInNode(endNode); 1118 } 1119 } 1120 1121 m_node = endNode; 1122 setUpFullyClippedStack(m_fullyClippedStack, m_node); 1123 m_offset = endOffset; 1124 m_handledNode = false; 1125 m_handledChildren = endOffset == 0; 1126 1127 m_startNode = startNode; 1128 m_startOffset = startOffset; 1129 m_endNode = endNode; 1130 m_endOffset = endOffset; 1131 1132 #ifndef NDEBUG 1133 // Need this just because of the assert. 1134 m_positionNode = endNode; 1135 #endif 1136 1137 m_lastTextNode = 0; 1138 m_lastCharacter = '\n'; 1139 1140 m_havePassedStartNode = false; 1141 1142 advance(); 1143 } 1144 1145 bool SimplifiedBackwardsTextIterator::atEnd() const 1146 { 1147 #if OS(ANDROID) 1148 return !m_positionNode || m_shouldStop; 1149 #else 1150 return !m_positionNode; 1151 #endif 1152 } 1153 1154 void SimplifiedBackwardsTextIterator::advance() 1155 { 1156 ASSERT(m_positionNode); 1157 1158 #if OS(ANDROID) 1159 if (m_shouldStop) 1160 return; 1161 1162 // Prevent changing the iterator position if a form control element was found and advance should stop on it. 1163 if (m_stopsOnFormControls && checkFormControlElement(m_node)) { 1164 m_shouldStop = true; 1165 return; 1166 } 1167 #endif 1168 1169 m_positionNode = 0; 1170 m_textLength = 0; 1171 1172 while (m_node && !m_havePassedStartNode) { 1173 // Don't handle node if we start iterating at [node, 0]. 1174 if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) { 1175 RenderObject* renderer = m_node->renderer(); 1176 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { 1177 // FIXME: What about CDATA_SECTION_NODE? 1178 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1179 m_handledNode = handleTextNode(); 1180 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1181 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1182 m_handledNode = handleReplacedElement(); 1183 } else 1184 m_handledNode = handleNonTextNode(); 1185 if (m_positionNode) 1186 return; 1187 } 1188 1189 if (!m_handledChildren && m_node->hasChildNodes()) { 1190 m_node = m_node->lastChild(); 1191 pushFullyClippedState(m_fullyClippedStack, m_node); 1192 } else { 1193 // Exit empty containers as we pass over them or containers 1194 // where [container, 0] is where we started iterating. 1195 if (!m_handledNode 1196 && canHaveChildrenForEditing(m_node) 1197 && m_node->parentNode() 1198 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { 1199 exitNode(); 1200 if (m_positionNode) { 1201 m_handledNode = true; 1202 m_handledChildren = true; 1203 return; 1204 } 1205 } 1206 1207 // Exit all other containers. 1208 while (!m_node->previousSibling()) { 1209 if (!advanceRespectingRange(m_node->parentOrHostNode())) 1210 break; 1211 m_fullyClippedStack.pop(); 1212 exitNode(); 1213 if (m_positionNode) { 1214 m_handledNode = true; 1215 m_handledChildren = true; 1216 return; 1217 } 1218 } 1219 1220 m_fullyClippedStack.pop(); 1221 if (advanceRespectingRange(m_node->previousSibling())) 1222 pushFullyClippedState(m_fullyClippedStack, m_node); 1223 else 1224 m_node = 0; 1225 } 1226 1227 // For the purpose of word boundary detection, 1228 // we should iterate all visible text and trailing (collapsed) whitespaces. 1229 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; 1230 m_handledNode = false; 1231 m_handledChildren = false; 1232 1233 if (m_positionNode) 1234 return; 1235 } 1236 } 1237 1238 bool SimplifiedBackwardsTextIterator::handleTextNode() 1239 { 1240 m_lastTextNode = m_node; 1241 1242 RenderText* renderer = toRenderText(m_node->renderer()); 1243 String str = renderer->text(); 1244 1245 if (!renderer->firstTextBox() && str.length() > 0) 1246 return true; 1247 1248 m_positionEndOffset = m_offset; 1249 1250 m_offset = (m_node == m_startNode) ? m_startOffset : 0; 1251 m_positionNode = m_node; 1252 m_positionStartOffset = m_offset; 1253 m_textLength = m_positionEndOffset - m_positionStartOffset; 1254 m_textCharacters = str.characters() + m_positionStartOffset; 1255 1256 m_lastCharacter = str[m_positionEndOffset - 1]; 1257 1258 return true; 1259 } 1260 1261 bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1262 { 1263 unsigned index = m_node->nodeIndex(); 1264 // We want replaced elements to behave like punctuation for boundary 1265 // finding, and to simply take up space for the selection preservation 1266 // code in moveParagraphs, so we use a comma. Unconditionally emit 1267 // here because this iterator is only used for boundary finding. 1268 emitCharacter(',', m_node->parentNode(), index, index + 1); 1269 return true; 1270 } 1271 1272 bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1273 { 1274 // We can use a linefeed in place of a tab because this simple iterator is only used to 1275 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1276 if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) { 1277 unsigned index = m_node->nodeIndex(); 1278 // The start of this emitted range is wrong. Ensuring correctness would require 1279 // VisiblePositions and so would be slow. previousBoundary expects this. 1280 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); 1281 } 1282 return true; 1283 } 1284 1285 void SimplifiedBackwardsTextIterator::exitNode() 1286 { 1287 if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) { 1288 // The start of this emitted range is wrong. Ensuring correctness would require 1289 // VisiblePositions and so would be slow. previousBoundary expects this. 1290 emitCharacter('\n', m_node, 0, 0); 1291 } 1292 } 1293 1294 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) 1295 { 1296 m_singleCharacterBuffer = c; 1297 m_positionNode = node; 1298 m_positionStartOffset = startOffset; 1299 m_positionEndOffset = endOffset; 1300 m_textCharacters = &m_singleCharacterBuffer; 1301 m_textLength = 1; 1302 m_lastCharacter = c; 1303 } 1304 1305 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1306 { 1307 if (!next) 1308 return false; 1309 m_havePassedStartNode |= m_node == m_startNode; 1310 if (m_havePassedStartNode) 1311 return false; 1312 m_node = next; 1313 return true; 1314 } 1315 1316 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const 1317 { 1318 if (m_positionNode) 1319 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1320 1321 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset); 1322 } 1323 1324 // -------- 1325 1326 CharacterIterator::CharacterIterator() 1327 : m_offset(0) 1328 , m_runOffset(0) 1329 , m_atBreak(true) 1330 { 1331 } 1332 1333 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior) 1334 : m_offset(0) 1335 , m_runOffset(0) 1336 , m_atBreak(true) 1337 , m_textIterator(r, behavior) 1338 { 1339 while (!atEnd() && m_textIterator.length() == 0) 1340 m_textIterator.advance(); 1341 } 1342 1343 PassRefPtr<Range> CharacterIterator::range() const 1344 { 1345 RefPtr<Range> r = m_textIterator.range(); 1346 if (!m_textIterator.atEnd()) { 1347 if (m_textIterator.length() <= 1) { 1348 ASSERT(m_runOffset == 0); 1349 } else { 1350 Node* n = r->startContainer(); 1351 ASSERT(n == r->endContainer()); 1352 int offset = r->startOffset() + m_runOffset; 1353 ExceptionCode ec = 0; 1354 r->setStart(n, offset, ec); 1355 r->setEnd(n, offset + 1, ec); 1356 ASSERT(!ec); 1357 } 1358 } 1359 return r.release(); 1360 } 1361 1362 void CharacterIterator::advance(int count) 1363 { 1364 if (count <= 0) { 1365 ASSERT(count == 0); 1366 return; 1367 } 1368 1369 m_atBreak = false; 1370 1371 // easy if there is enough left in the current m_textIterator run 1372 int remaining = m_textIterator.length() - m_runOffset; 1373 if (count < remaining) { 1374 m_runOffset += count; 1375 m_offset += count; 1376 return; 1377 } 1378 1379 // exhaust the current m_textIterator run 1380 count -= remaining; 1381 m_offset += remaining; 1382 1383 // move to a subsequent m_textIterator run 1384 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1385 int runLength = m_textIterator.length(); 1386 if (runLength == 0) 1387 m_atBreak = true; 1388 else { 1389 // see whether this is m_textIterator to use 1390 if (count < runLength) { 1391 m_runOffset = count; 1392 m_offset += count; 1393 return; 1394 } 1395 1396 // exhaust this m_textIterator run 1397 count -= runLength; 1398 m_offset += runLength; 1399 } 1400 } 1401 1402 // ran to the end of the m_textIterator... no more runs left 1403 m_atBreak = true; 1404 m_runOffset = 0; 1405 } 1406 1407 String CharacterIterator::string(int numChars) 1408 { 1409 Vector<UChar> result; 1410 result.reserveInitialCapacity(numChars); 1411 while (numChars > 0 && !atEnd()) { 1412 int runSize = min(numChars, length()); 1413 result.append(characters(), runSize); 1414 numChars -= runSize; 1415 advance(runSize); 1416 } 1417 return String::adopt(result); 1418 } 1419 1420 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) 1421 { 1422 it.advance(offset); 1423 RefPtr<Range> start = it.range(); 1424 1425 if (length > 1) 1426 it.advance(length - 1); 1427 RefPtr<Range> end = it.range(); 1428 1429 return Range::create(start->startContainer()->document(), 1430 start->startContainer(), start->startOffset(), 1431 end->endContainer(), end->endOffset()); 1432 } 1433 1434 BackwardsCharacterIterator::BackwardsCharacterIterator() 1435 : m_offset(0) 1436 , m_runOffset(0) 1437 , m_atBreak(true) 1438 { 1439 } 1440 1441 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior) 1442 : m_offset(0) 1443 , m_runOffset(0) 1444 , m_atBreak(true) 1445 , m_textIterator(range, behavior) 1446 { 1447 while (!atEnd() && !m_textIterator.length()) 1448 m_textIterator.advance(); 1449 } 1450 1451 PassRefPtr<Range> BackwardsCharacterIterator::range() const 1452 { 1453 RefPtr<Range> r = m_textIterator.range(); 1454 if (!m_textIterator.atEnd()) { 1455 if (m_textIterator.length() <= 1) 1456 ASSERT(m_runOffset == 0); 1457 else { 1458 Node* n = r->startContainer(); 1459 ASSERT(n == r->endContainer()); 1460 int offset = r->endOffset() - m_runOffset; 1461 ExceptionCode ec = 0; 1462 r->setStart(n, offset - 1, ec); 1463 r->setEnd(n, offset, ec); 1464 ASSERT(!ec); 1465 } 1466 } 1467 return r.release(); 1468 } 1469 1470 void BackwardsCharacterIterator::advance(int count) 1471 { 1472 if (count <= 0) { 1473 ASSERT(!count); 1474 return; 1475 } 1476 1477 m_atBreak = false; 1478 1479 int remaining = m_textIterator.length() - m_runOffset; 1480 if (count < remaining) { 1481 m_runOffset += count; 1482 m_offset += count; 1483 return; 1484 } 1485 1486 count -= remaining; 1487 m_offset += remaining; 1488 1489 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1490 int runLength = m_textIterator.length(); 1491 if (runLength == 0) 1492 m_atBreak = true; 1493 else { 1494 if (count < runLength) { 1495 m_runOffset = count; 1496 m_offset += count; 1497 return; 1498 } 1499 1500 count -= runLength; 1501 m_offset += runLength; 1502 } 1503 } 1504 1505 m_atBreak = true; 1506 m_runOffset = 0; 1507 } 1508 1509 // -------- 1510 1511 WordAwareIterator::WordAwareIterator() 1512 : m_previousText(0) 1513 , m_didLookAhead(false) 1514 { 1515 } 1516 1517 WordAwareIterator::WordAwareIterator(const Range* r) 1518 : m_previousText(0) 1519 , m_didLookAhead(true) // so we consider the first chunk from the text iterator 1520 , m_textIterator(r) 1521 { 1522 advance(); // get in position over the first chunk of text 1523 } 1524 1525 WordAwareIterator::~WordAwareIterator() 1526 { 1527 } 1528 1529 // We're always in one of these modes: 1530 // - The current chunk in the text iterator is our current chunk 1531 // (typically its a piece of whitespace, or text that ended with whitespace) 1532 // - The previous chunk in the text iterator is our current chunk 1533 // (we looked ahead to the next chunk and found a word boundary) 1534 // - We built up our own chunk of text from many chunks from the text iterator 1535 1536 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1537 1538 void WordAwareIterator::advance() 1539 { 1540 m_previousText = 0; 1541 m_buffer.clear(); // toss any old buffer we built up 1542 1543 // If last time we did a look-ahead, start with that looked-ahead chunk now 1544 if (!m_didLookAhead) { 1545 ASSERT(!m_textIterator.atEnd()); 1546 m_textIterator.advance(); 1547 } 1548 m_didLookAhead = false; 1549 1550 // Go to next non-empty chunk 1551 while (!m_textIterator.atEnd() && m_textIterator.length() == 0) 1552 m_textIterator.advance(); 1553 m_range = m_textIterator.range(); 1554 1555 if (m_textIterator.atEnd()) 1556 return; 1557 1558 while (1) { 1559 // If this chunk ends in whitespace we can just use it as our chunk. 1560 if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1])) 1561 return; 1562 1563 // If this is the first chunk that failed, save it in previousText before look ahead 1564 if (m_buffer.isEmpty()) { 1565 m_previousText = m_textIterator.characters(); 1566 m_previousLength = m_textIterator.length(); 1567 } 1568 1569 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1570 m_textIterator.advance(); 1571 if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) { 1572 m_didLookAhead = true; 1573 return; 1574 } 1575 1576 if (m_buffer.isEmpty()) { 1577 // Start gobbling chunks until we get to a suitable stopping point 1578 m_buffer.append(m_previousText, m_previousLength); 1579 m_previousText = 0; 1580 } 1581 m_buffer.append(m_textIterator.characters(), m_textIterator.length()); 1582 int exception = 0; 1583 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception); 1584 } 1585 } 1586 1587 int WordAwareIterator::length() const 1588 { 1589 if (!m_buffer.isEmpty()) 1590 return m_buffer.size(); 1591 if (m_previousText) 1592 return m_previousLength; 1593 return m_textIterator.length(); 1594 } 1595 1596 const UChar* WordAwareIterator::characters() const 1597 { 1598 if (!m_buffer.isEmpty()) 1599 return m_buffer.data(); 1600 if (m_previousText) 1601 return m_previousText; 1602 return m_textIterator.characters(); 1603 } 1604 1605 // -------- 1606 1607 static inline UChar foldQuoteMarkOrSoftHyphen(UChar c) 1608 { 1609 switch (c) { 1610 case hebrewPunctuationGershayim: 1611 case leftDoubleQuotationMark: 1612 case rightDoubleQuotationMark: 1613 return '"'; 1614 case hebrewPunctuationGeresh: 1615 case leftSingleQuotationMark: 1616 case rightSingleQuotationMark: 1617 return '\''; 1618 case softHyphen: 1619 // Replace soft hyphen with an ignorable character so that their presence or absence will 1620 // not affect string comparison. 1621 return 0; 1622 default: 1623 return c; 1624 } 1625 } 1626 1627 static inline void foldQuoteMarksAndSoftHyphens(String& s) 1628 { 1629 s.replace(hebrewPunctuationGeresh, '\''); 1630 s.replace(hebrewPunctuationGershayim, '"'); 1631 s.replace(leftDoubleQuotationMark, '"'); 1632 s.replace(leftSingleQuotationMark, '\''); 1633 s.replace(rightDoubleQuotationMark, '"'); 1634 s.replace(rightSingleQuotationMark, '\''); 1635 // Replace soft hyphen with an ignorable character so that their presence or absence will 1636 // not affect string comparison. 1637 s.replace(softHyphen, 0); 1638 } 1639 1640 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION 1641 1642 static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) 1643 { 1644 for (size_t i = 0; i < length; ++i) 1645 data[i] = foldQuoteMarkOrSoftHyphen(data[i]); 1646 } 1647 1648 static const size_t minimumSearchBufferSize = 8192; 1649 1650 #ifndef NDEBUG 1651 static bool searcherInUse; 1652 #endif 1653 1654 static UStringSearch* createSearcher() 1655 { 1656 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1657 // but it doesn't matter exactly what it is, since we don't perform any searches 1658 // without setting both the pattern and the text. 1659 UErrorCode status = U_ZERO_ERROR; 1660 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status); 1661 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1662 return searcher; 1663 } 1664 1665 static UStringSearch* searcher() 1666 { 1667 static UStringSearch* searcher = createSearcher(); 1668 return searcher; 1669 } 1670 1671 static inline void lockSearcher() 1672 { 1673 #ifndef NDEBUG 1674 ASSERT(!searcherInUse); 1675 searcherInUse = true; 1676 #endif 1677 } 1678 1679 static inline void unlockSearcher() 1680 { 1681 #ifndef NDEBUG 1682 ASSERT(searcherInUse); 1683 searcherInUse = false; 1684 #endif 1685 } 1686 1687 // ICU's search ignores the distinction between small kana letters and ones 1688 // that are not small, and also characters that differ only in the voicing 1689 // marks when considering only primary collation strength diffrences. 1690 // This is not helpful for end users, since these differences make words 1691 // distinct, so for our purposes we need these to be considered. 1692 // The Unicode folks do not think the collation algorithm should be 1693 // changed. To work around this, we would like to tailor the ICU searcher, 1694 // but we can't get that to work yet. So instead, we check for cases where 1695 // these differences occur, and skip those matches. 1696 1697 // We refer to the above technique as the "kana workaround". The next few 1698 // functions are helper functinos for the kana workaround. 1699 1700 static inline bool isKanaLetter(UChar character) 1701 { 1702 // Hiragana letters. 1703 if (character >= 0x3041 && character <= 0x3096) 1704 return true; 1705 1706 // Katakana letters. 1707 if (character >= 0x30A1 && character <= 0x30FA) 1708 return true; 1709 if (character >= 0x31F0 && character <= 0x31FF) 1710 return true; 1711 1712 // Halfwidth katakana letters. 1713 if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70) 1714 return true; 1715 1716 return false; 1717 } 1718 1719 static inline bool isSmallKanaLetter(UChar character) 1720 { 1721 ASSERT(isKanaLetter(character)); 1722 1723 switch (character) { 1724 case 0x3041: // HIRAGANA LETTER SMALL A 1725 case 0x3043: // HIRAGANA LETTER SMALL I 1726 case 0x3045: // HIRAGANA LETTER SMALL U 1727 case 0x3047: // HIRAGANA LETTER SMALL E 1728 case 0x3049: // HIRAGANA LETTER SMALL O 1729 case 0x3063: // HIRAGANA LETTER SMALL TU 1730 case 0x3083: // HIRAGANA LETTER SMALL YA 1731 case 0x3085: // HIRAGANA LETTER SMALL YU 1732 case 0x3087: // HIRAGANA LETTER SMALL YO 1733 case 0x308E: // HIRAGANA LETTER SMALL WA 1734 case 0x3095: // HIRAGANA LETTER SMALL KA 1735 case 0x3096: // HIRAGANA LETTER SMALL KE 1736 case 0x30A1: // KATAKANA LETTER SMALL A 1737 case 0x30A3: // KATAKANA LETTER SMALL I 1738 case 0x30A5: // KATAKANA LETTER SMALL U 1739 case 0x30A7: // KATAKANA LETTER SMALL E 1740 case 0x30A9: // KATAKANA LETTER SMALL O 1741 case 0x30C3: // KATAKANA LETTER SMALL TU 1742 case 0x30E3: // KATAKANA LETTER SMALL YA 1743 case 0x30E5: // KATAKANA LETTER SMALL YU 1744 case 0x30E7: // KATAKANA LETTER SMALL YO 1745 case 0x30EE: // KATAKANA LETTER SMALL WA 1746 case 0x30F5: // KATAKANA LETTER SMALL KA 1747 case 0x30F6: // KATAKANA LETTER SMALL KE 1748 case 0x31F0: // KATAKANA LETTER SMALL KU 1749 case 0x31F1: // KATAKANA LETTER SMALL SI 1750 case 0x31F2: // KATAKANA LETTER SMALL SU 1751 case 0x31F3: // KATAKANA LETTER SMALL TO 1752 case 0x31F4: // KATAKANA LETTER SMALL NU 1753 case 0x31F5: // KATAKANA LETTER SMALL HA 1754 case 0x31F6: // KATAKANA LETTER SMALL HI 1755 case 0x31F7: // KATAKANA LETTER SMALL HU 1756 case 0x31F8: // KATAKANA LETTER SMALL HE 1757 case 0x31F9: // KATAKANA LETTER SMALL HO 1758 case 0x31FA: // KATAKANA LETTER SMALL MU 1759 case 0x31FB: // KATAKANA LETTER SMALL RA 1760 case 0x31FC: // KATAKANA LETTER SMALL RI 1761 case 0x31FD: // KATAKANA LETTER SMALL RU 1762 case 0x31FE: // KATAKANA LETTER SMALL RE 1763 case 0x31FF: // KATAKANA LETTER SMALL RO 1764 case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A 1765 case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I 1766 case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U 1767 case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E 1768 case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O 1769 case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA 1770 case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU 1771 case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO 1772 case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU 1773 return true; 1774 } 1775 return false; 1776 } 1777 1778 enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark }; 1779 1780 static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character) 1781 { 1782 ASSERT(isKanaLetter(character)); 1783 1784 switch (character) { 1785 case 0x304C: // HIRAGANA LETTER GA 1786 case 0x304E: // HIRAGANA LETTER GI 1787 case 0x3050: // HIRAGANA LETTER GU 1788 case 0x3052: // HIRAGANA LETTER GE 1789 case 0x3054: // HIRAGANA LETTER GO 1790 case 0x3056: // HIRAGANA LETTER ZA 1791 case 0x3058: // HIRAGANA LETTER ZI 1792 case 0x305A: // HIRAGANA LETTER ZU 1793 case 0x305C: // HIRAGANA LETTER ZE 1794 case 0x305E: // HIRAGANA LETTER ZO 1795 case 0x3060: // HIRAGANA LETTER DA 1796 case 0x3062: // HIRAGANA LETTER DI 1797 case 0x3065: // HIRAGANA LETTER DU 1798 case 0x3067: // HIRAGANA LETTER DE 1799 case 0x3069: // HIRAGANA LETTER DO 1800 case 0x3070: // HIRAGANA LETTER BA 1801 case 0x3073: // HIRAGANA LETTER BI 1802 case 0x3076: // HIRAGANA LETTER BU 1803 case 0x3079: // HIRAGANA LETTER BE 1804 case 0x307C: // HIRAGANA LETTER BO 1805 case 0x3094: // HIRAGANA LETTER VU 1806 case 0x30AC: // KATAKANA LETTER GA 1807 case 0x30AE: // KATAKANA LETTER GI 1808 case 0x30B0: // KATAKANA LETTER GU 1809 case 0x30B2: // KATAKANA LETTER GE 1810 case 0x30B4: // KATAKANA LETTER GO 1811 case 0x30B6: // KATAKANA LETTER ZA 1812 case 0x30B8: // KATAKANA LETTER ZI 1813 case 0x30BA: // KATAKANA LETTER ZU 1814 case 0x30BC: // KATAKANA LETTER ZE 1815 case 0x30BE: // KATAKANA LETTER ZO 1816 case 0x30C0: // KATAKANA LETTER DA 1817 case 0x30C2: // KATAKANA LETTER DI 1818 case 0x30C5: // KATAKANA LETTER DU 1819 case 0x30C7: // KATAKANA LETTER DE 1820 case 0x30C9: // KATAKANA LETTER DO 1821 case 0x30D0: // KATAKANA LETTER BA 1822 case 0x30D3: // KATAKANA LETTER BI 1823 case 0x30D6: // KATAKANA LETTER BU 1824 case 0x30D9: // KATAKANA LETTER BE 1825 case 0x30DC: // KATAKANA LETTER BO 1826 case 0x30F4: // KATAKANA LETTER VU 1827 case 0x30F7: // KATAKANA LETTER VA 1828 case 0x30F8: // KATAKANA LETTER VI 1829 case 0x30F9: // KATAKANA LETTER VE 1830 case 0x30FA: // KATAKANA LETTER VO 1831 return VoicedSoundMark; 1832 case 0x3071: // HIRAGANA LETTER PA 1833 case 0x3074: // HIRAGANA LETTER PI 1834 case 0x3077: // HIRAGANA LETTER PU 1835 case 0x307A: // HIRAGANA LETTER PE 1836 case 0x307D: // HIRAGANA LETTER PO 1837 case 0x30D1: // KATAKANA LETTER PA 1838 case 0x30D4: // KATAKANA LETTER PI 1839 case 0x30D7: // KATAKANA LETTER PU 1840 case 0x30DA: // KATAKANA LETTER PE 1841 case 0x30DD: // KATAKANA LETTER PO 1842 return SemiVoicedSoundMark; 1843 } 1844 return NoVoicedSoundMark; 1845 } 1846 1847 static inline bool isCombiningVoicedSoundMark(UChar character) 1848 { 1849 switch (character) { 1850 case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 1851 case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 1852 return true; 1853 } 1854 return false; 1855 } 1856 1857 static inline bool containsKanaLetters(const String& pattern) 1858 { 1859 const UChar* characters = pattern.characters(); 1860 unsigned length = pattern.length(); 1861 for (unsigned i = 0; i < length; ++i) { 1862 if (isKanaLetter(characters[i])) 1863 return true; 1864 } 1865 return false; 1866 } 1867 1868 static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer) 1869 { 1870 ASSERT(length); 1871 1872 buffer.resize(length); 1873 1874 UErrorCode status = U_ZERO_ERROR; 1875 size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status); 1876 ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR); 1877 ASSERT(bufferSize); 1878 1879 buffer.resize(bufferSize); 1880 1881 if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) 1882 return; 1883 1884 status = U_ZERO_ERROR; 1885 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status); 1886 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 1887 } 1888 1889 static bool isNonLatin1Separator(UChar32 character) 1890 { 1891 ASSERT_ARG(character, character >= 256); 1892 1893 return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); 1894 } 1895 1896 static inline bool isSeparator(UChar32 character) 1897 { 1898 static const bool latin1SeparatorTable[256] = { 1899 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1900 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1901 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . / 1902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ? 1903 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @ 1904 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _ 1905 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ` 1906 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~ 1907 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1908 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1909 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1910 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1911 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1912 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1913 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1914 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 1915 }; 1916 1917 if (character < 256) 1918 return latin1SeparatorTable[character]; 1919 1920 return isNonLatin1Separator(character); 1921 } 1922 1923 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1924 : m_target(target) 1925 , m_options(options) 1926 , m_prefixLength(0) 1927 , m_atBreak(true) 1928 , m_needsMoreContext(options & AtWordStarts) 1929 , m_targetRequiresKanaWorkaround(containsKanaLetters(m_target)) 1930 { 1931 ASSERT(!m_target.isEmpty()); 1932 1933 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1934 // of doing it in a separate replacement pass here, but ICU doesn't offer a way 1935 // to add tailoring on top of the locale-specific tailoring as of this writing. 1936 foldQuoteMarksAndSoftHyphens(m_target); 1937 1938 size_t targetLength = m_target.length(); 1939 m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize)); 1940 m_overlap = m_buffer.capacity() / 4; 1941 1942 if ((m_options & AtWordStarts) && targetLength) { 1943 UChar32 targetFirstCharacter; 1944 U16_GET(m_target.characters(), 0, 0, targetLength, targetFirstCharacter); 1945 // Characters in the separator category never really occur at the beginning of a word, 1946 // so if the target begins with such a character, we just ignore the AtWordStart option. 1947 if (isSeparator(targetFirstCharacter)) { 1948 m_options &= ~AtWordStarts; 1949 m_needsMoreContext = false; 1950 } 1951 } 1952 1953 // Grab the single global searcher. 1954 // If we ever have a reason to do more than once search buffer at once, we'll have 1955 // to move to multiple searchers. 1956 lockSearcher(); 1957 1958 UStringSearch* searcher = WebCore::searcher(); 1959 UCollator* collator = usearch_getCollator(searcher); 1960 1961 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; 1962 if (ucol_getStrength(collator) != strength) { 1963 ucol_setStrength(collator, strength); 1964 usearch_reset(searcher); 1965 } 1966 1967 UErrorCode status = U_ZERO_ERROR; 1968 usearch_setPattern(searcher, m_target.characters(), targetLength, &status); 1969 ASSERT(status == U_ZERO_ERROR); 1970 1971 // The kana workaround requires a normalized copy of the target string. 1972 if (m_targetRequiresKanaWorkaround) 1973 normalizeCharacters(m_target.characters(), m_target.length(), m_normalizedTarget); 1974 } 1975 1976 inline SearchBuffer::~SearchBuffer() 1977 { 1978 // Leave the static object pointing to a valid string. 1979 UErrorCode status = U_ZERO_ERROR; 1980 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status); 1981 ASSERT(status == U_ZERO_ERROR); 1982 1983 unlockSearcher(); 1984 } 1985 1986 inline size_t SearchBuffer::append(const UChar* characters, size_t length) 1987 { 1988 ASSERT(length); 1989 1990 if (m_atBreak) { 1991 m_buffer.shrink(0); 1992 m_prefixLength = 0; 1993 m_atBreak = false; 1994 } else if (m_buffer.size() == m_buffer.capacity()) { 1995 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 1996 m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap); 1997 m_buffer.shrink(m_overlap); 1998 } 1999 2000 size_t oldLength = m_buffer.size(); 2001 size_t usableLength = min(m_buffer.capacity() - oldLength, length); 2002 ASSERT(usableLength); 2003 m_buffer.append(characters, usableLength); 2004 foldQuoteMarksAndSoftHyphens(m_buffer.data() + oldLength, usableLength); 2005 return usableLength; 2006 } 2007 2008 inline bool SearchBuffer::needsMoreContext() const 2009 { 2010 return m_needsMoreContext; 2011 } 2012 2013 inline void SearchBuffer::prependContext(const UChar* characters, size_t length) 2014 { 2015 ASSERT(m_needsMoreContext); 2016 ASSERT(m_prefixLength == m_buffer.size()); 2017 2018 if (!length) 2019 return; 2020 2021 m_atBreak = false; 2022 2023 size_t wordBoundaryContextStart = length; 2024 if (wordBoundaryContextStart) { 2025 U16_BACK_1(characters, 0, wordBoundaryContextStart); 2026 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); 2027 } 2028 2029 size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); 2030 m_buffer.prepend(characters + length - usableLength, usableLength); 2031 m_prefixLength += usableLength; 2032 2033 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 2034 m_needsMoreContext = false; 2035 } 2036 2037 inline bool SearchBuffer::atBreak() const 2038 { 2039 return m_atBreak; 2040 } 2041 2042 inline void SearchBuffer::reachedBreak() 2043 { 2044 m_atBreak = true; 2045 } 2046 2047 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 2048 { 2049 // This function implements the kana workaround. If usearch treats 2050 // it as a match, but we do not want to, then it's a "bad match". 2051 if (!m_targetRequiresKanaWorkaround) 2052 return false; 2053 2054 // Normalize into a match buffer. We reuse a single buffer rather than 2055 // creating a new one each time. 2056 normalizeCharacters(match, matchLength, m_normalizedMatch); 2057 2058 const UChar* a = m_normalizedTarget.begin(); 2059 const UChar* aEnd = m_normalizedTarget.end(); 2060 2061 const UChar* b = m_normalizedMatch.begin(); 2062 const UChar* bEnd = m_normalizedMatch.end(); 2063 2064 while (true) { 2065 // Skip runs of non-kana-letter characters. This is necessary so we can 2066 // correctly handle strings where the target and match have different-length 2067 // runs of characters that match, while still double checking the correctness 2068 // of matches of kana letters with other kana letters. 2069 while (a != aEnd && !isKanaLetter(*a)) 2070 ++a; 2071 while (b != bEnd && !isKanaLetter(*b)) 2072 ++b; 2073 2074 // If we reached the end of either the target or the match, we should have 2075 // reached the end of both; both should have the same number of kana letters. 2076 if (a == aEnd || b == bEnd) { 2077 ASSERT(a == aEnd); 2078 ASSERT(b == bEnd); 2079 return false; 2080 } 2081 2082 // Check for differences in the kana letter character itself. 2083 if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b)) 2084 return true; 2085 if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b)) 2086 return true; 2087 ++a; 2088 ++b; 2089 2090 // Check for differences in combining voiced sound marks found after the letter. 2091 while (1) { 2092 if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) { 2093 if (b != bEnd && isCombiningVoicedSoundMark(*b)) 2094 return true; 2095 break; 2096 } 2097 if (!(b != bEnd && isCombiningVoicedSoundMark(*b))) 2098 return true; 2099 if (*a != *b) 2100 return true; 2101 ++a; 2102 ++b; 2103 } 2104 } 2105 } 2106 2107 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 2108 { 2109 ASSERT(m_options & AtWordStarts); 2110 2111 if (!start) 2112 return true; 2113 2114 if (m_options & TreatMedialCapitalAsWordStart) { 2115 int size = m_buffer.size(); 2116 int offset = start; 2117 UChar32 firstCharacter; 2118 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 2119 UChar32 previousCharacter; 2120 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 2121 2122 if (isSeparator(firstCharacter)) { 2123 // The start of a separator run is a word start (".org" in "webkit.org"). 2124 if (!isSeparator(previousCharacter)) 2125 return true; 2126 } else if (isASCIIUpper(firstCharacter)) { 2127 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 2128 if (!isASCIIUpper(previousCharacter)) 2129 return true; 2130 // The last character of an uppercase run followed by a non-separator, non-digit 2131 // is a word start ("Request" in "XMLHTTPRequest"). 2132 offset = start; 2133 U16_FWD_1(m_buffer.data(), offset, size); 2134 UChar32 nextCharacter = 0; 2135 if (offset < size) 2136 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 2137 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 2138 return true; 2139 } else if (isASCIIDigit(firstCharacter)) { 2140 // The start of a digit run is a word start ("2" in "WebKit2"). 2141 if (!isASCIIDigit(previousCharacter)) 2142 return true; 2143 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 2144 // The start of a non-separator, non-uppercase, non-digit run is a word start, 2145 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 2146 return true; 2147 } 2148 } 2149 2150 size_t wordBreakSearchStart = start + length; 2151 while (wordBreakSearchStart > start) 2152 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); 2153 return wordBreakSearchStart == start; 2154 } 2155 2156 inline size_t SearchBuffer::search(size_t& start) 2157 { 2158 size_t size = m_buffer.size(); 2159 if (m_atBreak) { 2160 if (!size) 2161 return 0; 2162 } else { 2163 if (size != m_buffer.capacity()) 2164 return 0; 2165 } 2166 2167 UStringSearch* searcher = WebCore::searcher(); 2168 2169 UErrorCode status = U_ZERO_ERROR; 2170 usearch_setText(searcher, m_buffer.data(), size, &status); 2171 ASSERT(status == U_ZERO_ERROR); 2172 2173 usearch_setOffset(searcher, m_prefixLength, &status); 2174 ASSERT(status == U_ZERO_ERROR); 2175 2176 int matchStart = usearch_next(searcher, &status); 2177 ASSERT(status == U_ZERO_ERROR); 2178 2179 nextMatch: 2180 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 2181 ASSERT(matchStart == USEARCH_DONE); 2182 return 0; 2183 } 2184 2185 // Matches that start in the overlap area are only tentative. 2186 // The same match may appear later, matching more characters, 2187 // possibly including a combining character that's not yet in the buffer. 2188 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 2189 size_t overlap = m_overlap; 2190 if (m_options & AtWordStarts) { 2191 // Ensure that there is sufficient context before matchStart the next time around for 2192 // determining if it is at a word boundary. 2193 int wordBoundaryContextStart = matchStart; 2194 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 2195 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); 2196 overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart)); 2197 } 2198 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 2199 m_prefixLength -= min(m_prefixLength, size - overlap); 2200 m_buffer.shrink(overlap); 2201 return 0; 2202 } 2203 2204 size_t matchedLength = usearch_getMatchedLength(searcher); 2205 ASSERT(matchStart + matchedLength <= size); 2206 2207 // If this match is "bad", move on to the next match. 2208 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { 2209 matchStart = usearch_next(searcher, &status); 2210 ASSERT(status == U_ZERO_ERROR); 2211 goto nextMatch; 2212 } 2213 2214 size_t newSize = size - (matchStart + 1); 2215 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 2216 m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1); 2217 m_buffer.shrink(newSize); 2218 2219 start = size - matchStart; 2220 return matchedLength; 2221 } 2222 2223 #else // !ICU_UNICODE 2224 2225 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 2226 : m_target(options & CaseInsensitive ? target.foldCase() : target) 2227 , m_options(options) 2228 , m_buffer(m_target.length()) 2229 , m_isCharacterStartBuffer(m_target.length()) 2230 , m_isBufferFull(false) 2231 , m_cursor(0) 2232 { 2233 ASSERT(!m_target.isEmpty()); 2234 m_target.replace(noBreakSpace, ' '); 2235 foldQuoteMarksAndSoftHyphens(m_target); 2236 } 2237 2238 inline SearchBuffer::~SearchBuffer() 2239 { 2240 } 2241 2242 inline void SearchBuffer::reachedBreak() 2243 { 2244 m_cursor = 0; 2245 m_isBufferFull = false; 2246 } 2247 2248 inline bool SearchBuffer::atBreak() const 2249 { 2250 return !m_cursor && !m_isBufferFull; 2251 } 2252 2253 inline void SearchBuffer::append(UChar c, bool isStart) 2254 { 2255 m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMarkOrSoftHyphen(c); 2256 m_isCharacterStartBuffer[m_cursor] = isStart; 2257 if (++m_cursor == m_target.length()) { 2258 m_cursor = 0; 2259 m_isBufferFull = true; 2260 } 2261 } 2262 2263 inline size_t SearchBuffer::append(const UChar* characters, size_t length) 2264 { 2265 ASSERT(length); 2266 if (!(m_options & CaseInsensitive)) { 2267 append(characters[0], true); 2268 return 1; 2269 } 2270 const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough 2271 UChar foldedCharacters[maxFoldedCharacters]; 2272 bool error; 2273 int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error); 2274 ASSERT(!error); 2275 ASSERT(numFoldedCharacters); 2276 ASSERT(numFoldedCharacters <= maxFoldedCharacters); 2277 if (!error && numFoldedCharacters) { 2278 numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters); 2279 append(foldedCharacters[0], true); 2280 for (int i = 1; i < numFoldedCharacters; ++i) 2281 append(foldedCharacters[i], false); 2282 } 2283 return 1; 2284 } 2285 2286 inline bool SearchBuffer::needsMoreContext() const 2287 { 2288 return false; 2289 } 2290 2291 void SearchBuffer::prependContext(const UChar*, size_t) 2292 { 2293 ASSERT_NOT_REACHED(); 2294 } 2295 2296 inline size_t SearchBuffer::search(size_t& start) 2297 { 2298 if (!m_isBufferFull) 2299 return 0; 2300 if (!m_isCharacterStartBuffer[m_cursor]) 2301 return 0; 2302 2303 size_t tailSpace = m_target.length() - m_cursor; 2304 if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0) 2305 return 0; 2306 if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0) 2307 return 0; 2308 2309 start = length(); 2310 2311 // Now that we've found a match once, we don't want to find it again, because those 2312 // are the SearchBuffer semantics, allowing for a buffer where you append more than one 2313 // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if 2314 // we want to get rid of that in the future we could track this with a separate boolean 2315 // or even move the characters to the start of the buffer and set m_isBufferFull to false. 2316 m_isCharacterStartBuffer[m_cursor] = false; 2317 2318 return start; 2319 } 2320 2321 // Returns the number of characters that were appended to the buffer (what we are searching in). 2322 // That's not necessarily the same length as the passed-in target string, because case folding 2323 // can make two strings match even though they're not the same length. 2324 size_t SearchBuffer::length() const 2325 { 2326 size_t bufferSize = m_target.length(); 2327 size_t length = 0; 2328 for (size_t i = 0; i < bufferSize; ++i) 2329 length += m_isCharacterStartBuffer[i]; 2330 return length; 2331 } 2332 2333 #endif // !ICU_UNICODE 2334 2335 // -------- 2336 2337 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) 2338 { 2339 int length = 0; 2340 for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) 2341 length += it.length(); 2342 2343 return length; 2344 } 2345 2346 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 2347 { 2348 CharacterIterator entireRangeIterator(entireRange); 2349 return characterSubrange(entireRangeIterator, characterOffset, characterCount); 2350 } 2351 2352 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) 2353 { 2354 RefPtr<Range> resultRange = scope->document()->createRange(); 2355 2356 int docTextPosition = 0; 2357 int rangeEnd = rangeLocation + rangeLength; 2358 bool startRangeFound = false; 2359 2360 RefPtr<Range> textRunRange; 2361 2362 TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); 2363 2364 // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. 2365 if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) { 2366 textRunRange = it.range(); 2367 2368 ExceptionCode ec = 0; 2369 resultRange->setStart(textRunRange->startContainer(), 0, ec); 2370 ASSERT(!ec); 2371 resultRange->setEnd(textRunRange->startContainer(), 0, ec); 2372 ASSERT(!ec); 2373 2374 return resultRange.release(); 2375 } 2376 2377 for (; !it.atEnd(); it.advance()) { 2378 int len = it.length(); 2379 textRunRange = it.range(); 2380 2381 bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len; 2382 bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len; 2383 2384 // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only 2385 // in those cases that textRunRange is used. 2386 if (foundEnd) { 2387 // FIXME: This is a workaround for the fact that the end of a run is often at the wrong 2388 // position for emitted '\n's. 2389 if (len == 1 && it.characters()[0] == '\n') { 2390 scope->document()->updateLayoutIgnorePendingStylesheets(); 2391 it.advance(); 2392 if (!it.atEnd()) { 2393 RefPtr<Range> range = it.range(); 2394 ExceptionCode ec = 0; 2395 textRunRange->setEnd(range->startContainer(), range->startOffset(), ec); 2396 ASSERT(!ec); 2397 } else { 2398 Position runStart = textRunRange->startPosition(); 2399 Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); 2400 if (runEnd.isNotNull()) { 2401 ExceptionCode ec = 0; 2402 textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ec); 2403 ASSERT(!ec); 2404 } 2405 } 2406 } 2407 } 2408 2409 if (foundStart) { 2410 startRangeFound = true; 2411 int exception = 0; 2412 if (textRunRange->startContainer()->isTextNode()) { 2413 int offset = rangeLocation - docTextPosition; 2414 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); 2415 } else { 2416 if (rangeLocation == docTextPosition) 2417 resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception); 2418 else 2419 resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception); 2420 } 2421 } 2422 2423 if (foundEnd) { 2424 int exception = 0; 2425 if (textRunRange->startContainer()->isTextNode()) { 2426 int offset = rangeEnd - docTextPosition; 2427 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); 2428 } else { 2429 if (rangeEnd == docTextPosition) 2430 resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception); 2431 else 2432 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); 2433 } 2434 docTextPosition += len; 2435 break; 2436 } 2437 docTextPosition += len; 2438 } 2439 2440 if (!startRangeFound) 2441 return 0; 2442 2443 if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds 2444 int exception = 0; 2445 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); 2446 } 2447 2448 return resultRange.release(); 2449 } 2450 2451 bool TextIterator::locationAndLengthFromRange(const Range* range, size_t& location, size_t& length) 2452 { 2453 location = notFound; 2454 length = 0; 2455 2456 if (!range->startContainer()) 2457 return false; 2458 2459 Element* selectionRoot = range->ownerDocument()->frame()->selection()->rootEditableElement(); 2460 Element* scope = selectionRoot ? selectionRoot : range->ownerDocument()->documentElement(); 2461 2462 // The critical assumption is that this only gets called with ranges that 2463 // concentrate on a given area containing the selection root. This is done 2464 // because of text fields and textareas. The DOM for those is not 2465 // directly in the document DOM, so ensure that the range does not cross a 2466 // boundary of one of those. 2467 if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope)) 2468 return false; 2469 if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope)) 2470 return false; 2471 2472 RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset()); 2473 ASSERT(testRange->startContainer() == scope); 2474 location = TextIterator::rangeLength(testRange.get()); 2475 2476 ExceptionCode ec; 2477 testRange->setEnd(range->endContainer(), range->endOffset(), ec); 2478 ASSERT(testRange->startContainer() == scope); 2479 length = TextIterator::rangeLength(testRange.get()) - location; 2480 return true; 2481 } 2482 2483 // -------- 2484 2485 UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior defaultBehavior) 2486 { 2487 UChar* result = 0; 2488 2489 // Do this in pieces to avoid massive reallocations if there is a large amount of text. 2490 // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS. 2491 static const unsigned cMaxSegmentSize = 1 << 16; 2492 bufferLength = 0; 2493 typedef pair<UChar*, unsigned> TextSegment; 2494 OwnPtr<Vector<TextSegment> > textSegments; 2495 Vector<UChar> textBuffer; 2496 textBuffer.reserveInitialCapacity(cMaxSegmentSize); 2497 TextIteratorBehavior behavior = defaultBehavior; 2498 if (!isDisplayString) 2499 behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding); 2500 2501 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { 2502 if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) { 2503 UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar))); 2504 if (!newSegmentBuffer) 2505 goto exit; 2506 memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar)); 2507 if (!textSegments) 2508 textSegments = adoptPtr(new Vector<TextSegment>); 2509 textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size())); 2510 textBuffer.clear(); 2511 } 2512 textBuffer.append(it.characters(), it.length()); 2513 bufferLength += it.length(); 2514 } 2515 2516 if (!bufferLength) 2517 return 0; 2518 2519 // Since we know the size now, we can make a single buffer out of the pieces with one big alloc 2520 result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar))); 2521 if (!result) 2522 goto exit; 2523 2524 { 2525 UChar* resultPos = result; 2526 if (textSegments) { 2527 unsigned size = textSegments->size(); 2528 for (unsigned i = 0; i < size; ++i) { 2529 const TextSegment& segment = textSegments->at(i); 2530 memcpy(resultPos, segment.first, segment.second * sizeof(UChar)); 2531 resultPos += segment.second; 2532 } 2533 } 2534 memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar)); 2535 } 2536 2537 exit: 2538 if (textSegments) { 2539 unsigned size = textSegments->size(); 2540 for (unsigned i = 0; i < size; ++i) 2541 free(textSegments->at(i).first); 2542 } 2543 2544 if (isDisplayString && r->ownerDocument()) 2545 r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength); 2546 2547 return result; 2548 } 2549 2550 String plainText(const Range* r, TextIteratorBehavior defaultBehavior) 2551 { 2552 unsigned length; 2553 UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false, defaultBehavior); 2554 if (!buf) 2555 return ""; 2556 String result(buf, length); 2557 free(buf); 2558 return result; 2559 } 2560 2561 static inline bool isAllCollapsibleWhitespace(const String& string) 2562 { 2563 const UChar* characters = string.characters(); 2564 unsigned length = string.length(); 2565 for (unsigned i = 0; i < length; ++i) { 2566 if (!isCollapsibleWhitespace(characters[i])) 2567 return false; 2568 } 2569 return true; 2570 } 2571 2572 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward) 2573 { 2574 ExceptionCode ec = 0; 2575 RefPtr<Range> result = range->cloneRange(ec); 2576 ASSERT(!ec); 2577 result->collapse(!forward, ec); 2578 ASSERT(!ec); 2579 return result.release(); 2580 } 2581 2582 static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) 2583 { 2584 matchStart = 0; 2585 size_t matchLength = 0; 2586 2587 SearchBuffer buffer(target, options); 2588 2589 if (buffer.needsMoreContext()) { 2590 RefPtr<Range> startRange = it.range(); 2591 RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange(); 2592 ExceptionCode ec = 0; 2593 beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), ec); 2594 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2595 buffer.prependContext(backwardsIterator.characters(), backwardsIterator.length()); 2596 if (!buffer.needsMoreContext()) 2597 break; 2598 } 2599 } 2600 2601 while (!it.atEnd()) { 2602 it.advance(buffer.append(it.characters(), it.length())); 2603 tryAgain: 2604 size_t matchStartOffset; 2605 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2606 // Note that we found a match, and where we found it. 2607 size_t lastCharacterInBufferOffset = it.characterOffset(); 2608 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2609 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2610 matchLength = newMatchLength; 2611 // If searching forward, stop on the first match. 2612 // If searching backward, don't stop, so we end up with the last match. 2613 if (!(options & Backwards)) 2614 break; 2615 goto tryAgain; 2616 } 2617 if (it.atBreak() && !buffer.atBreak()) { 2618 buffer.reachedBreak(); 2619 goto tryAgain; 2620 } 2621 } 2622 2623 return matchLength; 2624 } 2625 2626 PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) 2627 { 2628 // First, find the text. 2629 size_t matchStart; 2630 size_t matchLength; 2631 { 2632 CharacterIterator findIterator(range, TextIteratorEntersTextControls); 2633 matchLength = findPlainText(findIterator, target, options, matchStart); 2634 if (!matchLength) 2635 return collapsedToBoundary(range, !(options & Backwards)); 2636 } 2637 2638 // Then, find the document position of the start and the end of the text. 2639 CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls); 2640 return characterSubrange(computeRangeIterator, matchStart, matchLength); 2641 } 2642 2643 } 2644