1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "TextIterator.h" 29 30 #include "Document.h" 31 #include "Frame.h" 32 #include "HTMLElement.h" 33 #include "HTMLNames.h" 34 #include "htmlediting.h" 35 #include "InlineTextBox.h" 36 #include "Range.h" 37 #include "RenderTableCell.h" 38 #include "RenderTableRow.h" 39 #include "RenderTextControl.h" 40 #include "RenderTextFragment.h" 41 #include "TextBoundaries.h" 42 #include "TextBreakIterator.h" 43 #include "VisiblePosition.h" 44 #include "visible_units.h" 45 #include <wtf/unicode/CharacterNames.h> 46 47 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION 48 #include "TextBreakIteratorInternalICU.h" 49 #include <unicode/usearch.h> 50 #endif 51 52 using namespace WTF::Unicode; 53 using namespace std; 54 55 namespace WebCore { 56 57 using namespace HTMLNames; 58 59 // Buffer that knows how to compare with a search target. 60 // Keeps enough of the previous text to be able to search in the future, but no more. 61 // Non-breaking spaces are always equal to normal spaces. 62 // Case folding is also done if the CaseInsensitive option is specified. 63 // Matches are further filtered if the AtWordStarts option is specified, although some 64 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 65 class SearchBuffer { 66 WTF_MAKE_NONCOPYABLE(SearchBuffer); 67 public: 68 SearchBuffer(const String& target, FindOptions); 69 ~SearchBuffer(); 70 71 // Returns number of characters appended; guaranteed to be in the range [1, length]. 72 size_t append(const UChar*, size_t length); 73 bool needsMoreContext() const; 74 void prependContext(const UChar*, size_t length); 75 void reachedBreak(); 76 77 // Result is the size in characters of what was found. 78 // And <startOffset> is the number of characters back to the start of what was found. 79 size_t search(size_t& startOffset); 80 bool atBreak() const; 81 82 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION 83 84 private: 85 bool isBadMatch(const UChar*, size_t length) const; 86 bool isWordStartMatch(size_t start, size_t length) const; 87 88 String m_target; 89 FindOptions m_options; 90 91 Vector<UChar> m_buffer; 92 size_t m_overlap; 93 size_t m_prefixLength; 94 bool m_atBreak; 95 bool m_needsMoreContext; 96 97 bool m_targetRequiresKanaWorkaround; 98 Vector<UChar> m_normalizedTarget; 99 mutable Vector<UChar> m_normalizedMatch; 100 101 #else 102 103 private: 104 void append(UChar, bool isCharacterStart); 105 size_t length() const; 106 107 String m_target; 108 FindOptions m_options; 109 110 Vector<UChar> m_buffer; 111 Vector<bool> m_isCharacterStartBuffer; 112 bool m_isBufferFull; 113 size_t m_cursor; 114 115 #endif 116 }; 117 118 // -------- 119 120 static const unsigned bitsInWord = sizeof(unsigned) * 8; 121 static const unsigned bitInWordMask = bitsInWord - 1; 122 123 BitStack::BitStack() 124 : m_size(0) 125 { 126 } 127 128 BitStack::~BitStack() 129 { 130 } 131 132 void BitStack::push(bool bit) 133 { 134 unsigned index = m_size / bitsInWord; 135 unsigned shift = m_size & bitInWordMask; 136 if (!shift && index == m_words.size()) { 137 m_words.grow(index + 1); 138 m_words[index] = 0; 139 } 140 unsigned& word = m_words[index]; 141 unsigned mask = 1U << shift; 142 if (bit) 143 word |= mask; 144 else 145 word &= ~mask; 146 ++m_size; 147 } 148 149 void BitStack::pop() 150 { 151 if (m_size) 152 --m_size; 153 } 154 155 bool BitStack::top() const 156 { 157 if (!m_size) 158 return false; 159 unsigned shift = (m_size - 1) & bitInWordMask; 160 return m_words.last() & (1U << shift); 161 } 162 163 unsigned BitStack::size() const 164 { 165 return m_size; 166 } 167 168 // -------- 169 170 #if !ASSERT_DISABLED 171 172 static unsigned depthCrossingShadowBoundaries(Node* node) 173 { 174 unsigned depth = 0; 175 for (Node* parent = node->parentOrHostNode(); parent; parent = parent->parentOrHostNode()) 176 ++depth; 177 return depth; 178 } 179 180 #endif 181 182 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 183 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) 184 { 185 if (!rangeEndContainer) 186 return 0; 187 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { 188 if (Node* next = rangeEndContainer->childNode(rangeEndOffset)) 189 return next; 190 } 191 for (Node* node = rangeEndContainer; node; node = node->parentOrHostNode()) { 192 if (Node* next = node->nextSibling()) 193 return next; 194 } 195 return 0; 196 } 197 198 // -------- 199 200 static inline bool fullyClipsContents(Node* node) 201 { 202 RenderObject* renderer = node->renderer(); 203 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 204 return false; 205 return toRenderBox(renderer)->size().isEmpty(); 206 } 207 208 static inline bool ignoresContainerClip(Node* node) 209 { 210 RenderObject* renderer = node->renderer(); 211 if (!renderer || renderer->isText()) 212 return false; 213 EPosition position = renderer->style()->position(); 214 return position == AbsolutePosition || position == FixedPosition; 215 } 216 217 static void pushFullyClippedState(BitStack& stack, Node* node) 218 { 219 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 220 221 // Push true if this node full clips its contents, or if a parent already has fully 222 // clipped and this is not a node that ignores its container's clip. 223 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 224 } 225 226 static void setUpFullyClippedStack(BitStack& stack, Node* node) 227 { 228 // Put the nodes in a vector so we can iterate in reverse order. 229 Vector<Node*, 100> ancestry; 230 for (Node* parent = node->parentOrHostNode(); parent; parent = parent->parentOrHostNode()) 231 ancestry.append(parent); 232 233 // Call pushFullyClippedState on each node starting with the earliest ancestor. 234 size_t size = ancestry.size(); 235 for (size_t i = 0; i < size; ++i) 236 pushFullyClippedState(stack, ancestry[size - i - 1]); 237 pushFullyClippedState(stack, node); 238 239 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 240 } 241 242 #if OS(ANDROID) 243 static bool checkFormControlElement(Node* startNode) 244 { 245 Node* node = startNode; 246 while (node) { 247 if (node->isElementNode() && static_cast<Element*>(node)->isFormControlElement()) 248 return true; 249 node = node->parentNode(); 250 } 251 return false; 252 } 253 #endif 254 255 256 // -------- 257 258 TextIterator::TextIterator() 259 : m_startContainer(0) 260 , m_startOffset(0) 261 , m_endContainer(0) 262 , m_endOffset(0) 263 , m_positionNode(0) 264 , m_textCharacters(0) 265 , m_textLength(0) 266 , m_remainingTextBox(0) 267 , m_firstLetterText(0) 268 , m_lastCharacter(0) 269 , m_emitsCharactersBetweenAllVisiblePositions(false) 270 , m_entersTextControls(false) 271 , m_emitsTextWithoutTranscoding(false) 272 , m_handledFirstLetter(false) 273 , m_ignoresStyleVisibility(false) 274 , m_emitsObjectReplacementCharacters(false) 275 #if OS(ANDROID) 276 , m_stopsOnFormControls(false) 277 , m_shouldStop(false) 278 #endif 279 { 280 } 281 282 TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior) 283 : m_startContainer(0) 284 , m_startOffset(0) 285 , m_endContainer(0) 286 , m_endOffset(0) 287 , m_positionNode(0) 288 , m_textCharacters(0) 289 , m_textLength(0) 290 #if OS(ANDROID) 291 , m_needsAnotherNewline(false) 292 #endif 293 , m_remainingTextBox(0) 294 , m_firstLetterText(0) 295 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) 296 , m_entersTextControls(behavior & TextIteratorEntersTextControls) 297 , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding) 298 , m_handledFirstLetter(false) 299 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) 300 , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters) 301 #if OS(ANDROID) 302 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 303 , m_shouldStop(false) 304 #endif 305 { 306 if (!r) 307 return; 308 309 // get and validate the range endpoints 310 Node* startContainer = r->startContainer(); 311 if (!startContainer) 312 return; 313 int startOffset = r->startOffset(); 314 Node* endContainer = r->endContainer(); 315 int endOffset = r->endOffset(); 316 317 // Callers should be handing us well-formed ranges. If we discover that this isn't 318 // the case, we could consider changing this assertion to an early return. 319 ASSERT(r->boundaryPointsValid()); 320 321 // remember range - this does not change 322 m_startContainer = startContainer; 323 m_startOffset = startOffset; 324 m_endContainer = endContainer; 325 m_endOffset = endOffset; 326 327 // set up the current node for processing 328 m_node = r->firstNode(); 329 if (!m_node) 330 return; 331 setUpFullyClippedStack(m_fullyClippedStack, m_node); 332 m_offset = m_node == m_startContainer ? m_startOffset : 0; 333 m_handledNode = false; 334 m_handledChildren = false; 335 336 // calculate first out of bounds node 337 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); 338 339 // initialize node processing state 340 m_needsAnotherNewline = false; 341 m_textBox = 0; 342 343 // initialize record of previous node processing 344 m_hasEmitted = false; 345 m_lastTextNode = 0; 346 m_lastTextNodeEndedWithCollapsedSpace = false; 347 m_lastCharacter = 0; 348 349 #ifndef NDEBUG 350 // need this just because of the assert in advance() 351 m_positionNode = m_node; 352 #endif 353 354 // identify the first run 355 advance(); 356 } 357 358 TextIterator::~TextIterator() 359 { 360 } 361 362 bool TextIterator::atEnd() const 363 { 364 #if OS(ANDROID) 365 return !m_positionNode || m_shouldStop; 366 #else 367 return !m_positionNode; 368 #endif 369 } 370 371 void TextIterator::advance() 372 { 373 #if OS(ANDROID) 374 if (m_shouldStop) 375 return; 376 #endif 377 // reset the run information 378 m_positionNode = 0; 379 m_textLength = 0; 380 381 // handle remembered node that needed a newline after the text node's newline 382 if (m_needsAnotherNewline) { 383 // Emit the extra newline, and position it *inside* m_node, after m_node's 384 // contents, in case it's a block, in the same way that we position the first 385 // newline. The range for the emitted newline should start where the line 386 // break begins. 387 // FIXME: It would be cleaner if we emitted two newlines during the last 388 // iteration, instead of using m_needsAnotherNewline. 389 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 390 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 391 m_needsAnotherNewline = false; 392 return; 393 } 394 395 if (!m_textBox && m_remainingTextBox) { 396 m_textBox = m_remainingTextBox; 397 m_remainingTextBox = 0; 398 m_firstLetterText = 0; 399 m_offset = 0; 400 } 401 // handle remembered text box 402 if (m_textBox) { 403 handleTextBox(); 404 if (m_positionNode) 405 return; 406 } 407 408 while (m_node && m_node != m_pastEndNode) { 409 #if OS(ANDROID) 410 if (!m_shouldStop && m_stopsOnFormControls && checkFormControlElement(m_node)) 411 m_shouldStop = true; 412 #endif 413 // if the range ends at offset 0 of an element, represent the 414 // position, but not the content, of that element e.g. if the 415 // node is a blockflow element, emit a newline that 416 // precedes the element 417 if (m_node == m_endContainer && m_endOffset == 0) { 418 representNodeOffsetZero(); 419 m_node = 0; 420 return; 421 } 422 423 RenderObject* renderer = m_node->renderer(); 424 if (!renderer) { 425 m_handledNode = true; 426 m_handledChildren = true; 427 } else { 428 // handle current node according to its type 429 if (!m_handledNode) { 430 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE? 431 m_handledNode = handleTextNode(); 432 else if (renderer && (renderer->isImage() || renderer->isWidget() || 433 (renderer->node() && renderer->node()->isElementNode() && 434 static_cast<Element*>(renderer->node())->isFormControlElement()))) 435 m_handledNode = handleReplacedElement(); 436 else 437 m_handledNode = handleNonTextNode(); 438 if (m_positionNode) 439 return; 440 } 441 } 442 443 // find a new current node to handle in depth-first manner, 444 // calling exitNode() as we come back thru a parent node 445 Node* next = m_handledChildren ? 0 : m_node->firstChild(); 446 m_offset = 0; 447 if (!next) { 448 next = m_node->nextSibling(); 449 if (!next) { 450 bool pastEnd = m_node->traverseNextNode() == m_pastEndNode; 451 Node* parentNode = m_node->parentOrHostNode(); 452 while (!next && parentNode) { 453 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 454 return; 455 bool haveRenderer = m_node->renderer(); 456 m_node = parentNode; 457 m_fullyClippedStack.pop(); 458 parentNode = m_node->parentOrHostNode(); 459 if (haveRenderer) 460 exitNode(); 461 if (m_positionNode) { 462 m_handledNode = true; 463 m_handledChildren = true; 464 return; 465 } 466 next = m_node->nextSibling(); 467 } 468 } 469 m_fullyClippedStack.pop(); 470 } 471 472 // set the new current node 473 m_node = next; 474 if (m_node) 475 pushFullyClippedState(m_fullyClippedStack, m_node); 476 m_handledNode = false; 477 m_handledChildren = false; 478 m_handledFirstLetter = false; 479 m_firstLetterText = 0; 480 481 // how would this ever be? 482 if (m_positionNode) 483 return; 484 } 485 } 486 487 bool TextIterator::handleTextNode() 488 { 489 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) 490 return false; 491 492 RenderText* renderer = toRenderText(m_node->renderer()); 493 494 m_lastTextNode = m_node; 495 String str = renderer->text(); 496 497 // handle pre-formatted text 498 if (!renderer->style()->collapseWhiteSpace()) { 499 int runStart = m_offset; 500 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 501 emitCharacter(' ', m_node, 0, runStart, runStart); 502 return false; 503 } 504 if (!m_handledFirstLetter && renderer->isTextFragment()) { 505 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 506 if (m_firstLetterText) { 507 String firstLetter = m_firstLetterText->text(); 508 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length()); 509 m_firstLetterText = 0; 510 m_textBox = 0; 511 return false; 512 } 513 } 514 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 515 return false; 516 int strLength = str.length(); 517 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; 518 int runEnd = min(strLength, end); 519 520 if (runStart >= runEnd) 521 return true; 522 523 emitText(m_node, runStart, runEnd); 524 return true; 525 } 526 527 if (!renderer->firstTextBox() && str.length() > 0) { 528 if (!m_handledFirstLetter && renderer->isTextFragment()) { 529 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 530 if (m_firstLetterText) { 531 handleTextBox(); 532 return false; 533 } 534 } 535 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 536 return false; 537 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 538 return true; 539 } 540 541 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 542 if (renderer->containsReversedText()) { 543 m_sortedTextBoxes.clear(); 544 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { 545 m_sortedTextBoxes.append(textBox); 546 } 547 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 548 m_sortedTextBoxesPosition = 0; 549 } 550 551 m_textBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 552 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) 553 handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); 554 handleTextBox(); 555 return true; 556 } 557 558 void TextIterator::handleTextBox() 559 { 560 RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer()); 561 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) { 562 m_textBox = 0; 563 return; 564 } 565 String str = renderer->text(); 566 unsigned start = m_offset; 567 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX; 568 while (m_textBox) { 569 unsigned textBoxStart = m_textBox->start(); 570 unsigned runStart = max(textBoxStart, start); 571 572 // Check for collapsed space at the start of this run. 573 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); 574 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace 575 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); 576 if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) { 577 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { 578 unsigned spaceRunStart = runStart - 1; 579 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') 580 --spaceRunStart; 581 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); 582 } else 583 emitCharacter(' ', m_node, 0, runStart, runStart); 584 return; 585 } 586 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 587 unsigned runEnd = min(textBoxEnd, end); 588 589 // Determine what the next text box will be, but don't advance yet 590 InlineTextBox* nextTextBox = 0; 591 if (renderer->containsReversedText()) { 592 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 593 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 594 } else 595 nextTextBox = m_textBox->nextTextBox(); 596 597 if (runStart < runEnd) { 598 // Handle either a single newline character (which becomes a space), 599 // or a run of characters that does not include a newline. 600 // This effectively translates newlines to spaces without copying the text. 601 if (str[runStart] == '\n') { 602 emitCharacter(' ', m_node, 0, runStart, runStart + 1); 603 m_offset = runStart + 1; 604 } else { 605 size_t subrunEnd = str.find('\n', runStart); 606 if (subrunEnd == notFound || subrunEnd > runEnd) 607 subrunEnd = runEnd; 608 609 m_offset = subrunEnd; 610 emitText(m_node, renderer, runStart, subrunEnd); 611 } 612 613 // If we are doing a subrun that doesn't go to the end of the text box, 614 // come back again to finish handling this text box; don't advance to the next one. 615 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 616 return; 617 618 // Advance and return 619 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); 620 if (nextRunStart > runEnd) 621 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 622 m_textBox = nextTextBox; 623 if (renderer->containsReversedText()) 624 ++m_sortedTextBoxesPosition; 625 return; 626 } 627 // Advance and continue 628 m_textBox = nextTextBox; 629 if (renderer->containsReversedText()) 630 ++m_sortedTextBoxesPosition; 631 } 632 if (!m_textBox && m_remainingTextBox) { 633 m_textBox = m_remainingTextBox; 634 m_remainingTextBox = 0; 635 m_firstLetterText = 0; 636 m_offset = 0; 637 handleTextBox(); 638 } 639 } 640 641 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) 642 { 643 if (renderer->firstLetter()) { 644 RenderObject* r = renderer->firstLetter(); 645 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 646 return; 647 for (RenderObject *currChild = r->firstChild(); currChild; currChild->nextSibling()) { 648 if (currChild->isText()) { 649 RenderText* firstLetter = toRenderText(currChild); 650 m_handledFirstLetter = true; 651 m_remainingTextBox = m_textBox; 652 m_textBox = firstLetter->firstTextBox(); 653 m_firstLetterText = firstLetter; 654 return; 655 } 656 } 657 } 658 m_handledFirstLetter = true; 659 } 660 661 bool TextIterator::handleReplacedElement() 662 { 663 if (m_fullyClippedStack.top()) 664 return false; 665 666 RenderObject* renderer = m_node->renderer(); 667 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) 668 return false; 669 670 if (m_lastTextNodeEndedWithCollapsedSpace) { 671 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 672 return false; 673 } 674 675 if (m_entersTextControls && renderer->isTextControl()) { 676 if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->innerTextElement()) { 677 m_node = innerTextElement->shadowTreeRootNode(); 678 pushFullyClippedState(m_fullyClippedStack, m_node); 679 m_offset = 0; 680 return false; 681 } 682 } 683 684 m_hasEmitted = true; 685 686 if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) { 687 emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1); 688 return true; 689 } 690 691 if (m_emitsCharactersBetweenAllVisiblePositions) { 692 // We want replaced elements to behave like punctuation for boundary 693 // finding, and to simply take up space for the selection preservation 694 // code in moveParagraphs, so we use a comma. 695 emitCharacter(',', m_node->parentNode(), m_node, 0, 1); 696 return true; 697 } 698 699 m_positionNode = m_node->parentNode(); 700 m_positionOffsetBaseNode = m_node; 701 m_positionStartOffset = 0; 702 m_positionEndOffset = 1; 703 704 m_textCharacters = 0; 705 m_textLength = 0; 706 707 m_lastCharacter = 0; 708 709 return true; 710 } 711 712 bool TextIterator::hasVisibleTextNode(RenderText* renderer) 713 { 714 if (renderer->style()->visibility() == VISIBLE) 715 return true; 716 if (renderer->isTextFragment()) { 717 RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer); 718 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE) 719 return true; 720 } 721 return false; 722 } 723 724 static bool shouldEmitTabBeforeNode(Node* node) 725 { 726 RenderObject* r = node->renderer(); 727 728 // Table cells are delimited by tabs. 729 if (!r || !isTableCell(node)) 730 return false; 731 732 // Want a tab before every cell other than the first one 733 RenderTableCell* rc = toRenderTableCell(r); 734 RenderTable* t = rc->table(); 735 return t && (t->cellBefore(rc) || t->cellAbove(rc)); 736 } 737 738 static bool shouldEmitNewlineForNode(Node* node) 739 { 740 // br elements are represented by a single newline. 741 RenderObject* r = node->renderer(); 742 if (!r) 743 return node->hasTagName(brTag); 744 745 return r->isBR(); 746 } 747 748 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node) 749 { 750 // Block flow (versus inline flow) is represented by having 751 // a newline both before and after the element. 752 RenderObject* r = node->renderer(); 753 if (!r) { 754 return (node->hasTagName(blockquoteTag) 755 || node->hasTagName(ddTag) 756 || node->hasTagName(divTag) 757 || node->hasTagName(dlTag) 758 || node->hasTagName(dtTag) 759 || node->hasTagName(h1Tag) 760 || node->hasTagName(h2Tag) 761 || node->hasTagName(h3Tag) 762 || node->hasTagName(h4Tag) 763 || node->hasTagName(h5Tag) 764 || node->hasTagName(h6Tag) 765 || node->hasTagName(hrTag) 766 || node->hasTagName(liTag) 767 || node->hasTagName(listingTag) 768 || node->hasTagName(olTag) 769 || node->hasTagName(pTag) 770 || node->hasTagName(preTag) 771 || node->hasTagName(trTag) 772 || node->hasTagName(ulTag)); 773 } 774 775 // Need to make an exception for table cells, because they are blocks, but we 776 // want them tab-delimited rather than having newlines before and after. 777 if (isTableCell(node)) 778 return false; 779 780 // Need to make an exception for table row elements, because they are neither 781 // "inline" or "RenderBlock", but we want newlines for them. 782 if (r->isTableRow()) { 783 RenderTable* t = toRenderTableRow(r)->table(); 784 if (t && !t->isInline()) 785 return true; 786 } 787 788 return !r->isInline() && r->isRenderBlock() && !r->isFloatingOrPositioned() && !r->isBody(); 789 } 790 791 static bool shouldEmitNewlineAfterNode(Node* node) 792 { 793 // FIXME: It should be better but slower to create a VisiblePosition here. 794 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 795 return false; 796 // Check if this is the very last renderer in the document. 797 // If so, then we should not emit a newline. 798 while ((node = node->traverseNextSibling())) 799 if (node->renderer()) 800 return true; 801 return false; 802 } 803 804 static bool shouldEmitNewlineBeforeNode(Node* node) 805 { 806 return shouldEmitNewlinesBeforeAndAfterNode(node); 807 } 808 809 static bool shouldEmitExtraNewlineForNode(Node* node) 810 { 811 // When there is a significant collapsed bottom margin, emit an extra 812 // newline for a more realistic result. We end up getting the right 813 // result even without margin collapsing. For example: <div><p>text</p></div> 814 // will work right even if both the <div> and the <p> have bottom margins. 815 RenderObject* r = node->renderer(); 816 if (!r || !r->isBox()) 817 return false; 818 819 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears 820 // not to do this at all 821 if (node->hasTagName(h1Tag) 822 || node->hasTagName(h2Tag) 823 || node->hasTagName(h3Tag) 824 || node->hasTagName(h4Tag) 825 || node->hasTagName(h5Tag) 826 || node->hasTagName(h6Tag) 827 || node->hasTagName(pTag)) { 828 RenderStyle* style = r->style(); 829 if (style) { 830 int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); 831 int fontSize = style->fontDescription().computedPixelSize(); 832 if (bottomMargin * 2 >= fontSize) 833 return true; 834 } 835 } 836 837 return false; 838 } 839 840 static int collapsedSpaceLength(RenderText* renderer, int textEnd) 841 { 842 const UChar* characters = renderer->text()->characters(); 843 int length = renderer->text()->length(); 844 for (int i = textEnd; i < length; ++i) { 845 if (!renderer->style()->isCollapsibleWhiteSpace(characters[i])) 846 return i - textEnd; 847 } 848 849 return length - textEnd; 850 } 851 852 static int maxOffsetIncludingCollapsedSpaces(Node* node) 853 { 854 int offset = caretMaxOffset(node); 855 856 if (node->renderer() && node->renderer()->isText()) 857 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); 858 859 return offset; 860 } 861 862 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 863 bool TextIterator::shouldRepresentNodeOffsetZero() 864 { 865 if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable()) 866 return true; 867 868 // Leave element positioned flush with start of a paragraph 869 // (e.g. do not insert tab before a table cell at the start of a paragraph) 870 if (m_lastCharacter == '\n') 871 return false; 872 873 // Otherwise, show the position if we have emitted any characters 874 if (m_hasEmitted) 875 return true; 876 877 // We've not emitted anything yet. Generally, there is no need for any positioning then. 878 // The only exception is when the element is visually not in the same line as 879 // the start of the range (e.g. the range starts at the end of the previous paragraph). 880 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 881 // make quicker checks to possibly avoid that. Another check that we could make is 882 // is whether the inline vs block flow changed since the previous visible element. 883 // I think we're already in a special enough case that that won't be needed, tho. 884 885 // No character needed if this is the first node in the range. 886 if (m_node == m_startContainer) 887 return false; 888 889 // If we are outside the start container's subtree, assume we need to emit. 890 // FIXME: m_startContainer could be an inline block 891 if (!m_node->isDescendantOf(m_startContainer)) 892 return true; 893 894 // If we started as m_startContainer offset 0 and the current node is a descendant of 895 // the start container, we already had enough context to correctly decide whether to 896 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 897 // so don't second guess that now. 898 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 899 // immaterial since we likely would have already emitted something by now. 900 if (m_startOffset == 0) 901 return false; 902 903 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 904 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 905 // we would create VisiblePositions on every call to this function without this check. 906 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE) 907 return false; 908 909 // The startPos.isNotNull() check is needed because the start could be before the body, 910 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 911 // The currPos.isNotNull() check is needed because positions in non-HTML content 912 // (like SVG) do not have visible positions, and we don't want to emit for them either. 913 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 914 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 915 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 916 } 917 918 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) 919 { 920 return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); 921 } 922 923 void TextIterator::representNodeOffsetZero() 924 { 925 // Emit a character to show the positioning of m_node. 926 927 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 928 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 929 // on m_node to see if it necessitates emitting a character first and will early return 930 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 931 if (shouldEmitTabBeforeNode(m_node)) { 932 if (shouldRepresentNodeOffsetZero()) 933 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); 934 } else if (shouldEmitNewlineBeforeNode(m_node)) { 935 if (shouldRepresentNodeOffsetZero()) 936 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); 937 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { 938 if (shouldRepresentNodeOffsetZero()) 939 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0); 940 } 941 } 942 943 bool TextIterator::handleNonTextNode() 944 { 945 if (shouldEmitNewlineForNode(m_node)) 946 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); 947 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) 948 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1); 949 else 950 representNodeOffsetZero(); 951 952 return true; 953 } 954 955 void TextIterator::exitNode() 956 { 957 // prevent emitting a newline when exiting a collapsed block at beginning of the range 958 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 959 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 960 // therefore look like a blank line. 961 if (!m_hasEmitted) 962 return; 963 964 // Emit with a position *inside* m_node, after m_node's contents, in 965 // case it is a block, because the run should start where the 966 // emitted character is positioned visually. 967 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 968 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 969 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 970 // TextIterator in _web_attributedStringFromRange. 971 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 972 if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) { 973 // use extra newline to represent margin bottom, as needed 974 bool addNewline = shouldEmitExtraNewlineForNode(m_node); 975 976 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 977 // contain a VisiblePosition when doing selection preservation. 978 if (m_lastCharacter != '\n') { 979 // insert a newline with a position following this block's contents. 980 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 981 // remember whether to later add a newline for the current node 982 ASSERT(!m_needsAnotherNewline); 983 m_needsAnotherNewline = addNewline; 984 } else if (addNewline) 985 // insert a newline with a position following this block's contents. 986 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); 987 } 988 989 // If nothing was emitted, see if we need to emit a space. 990 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) 991 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1); 992 } 993 994 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 995 { 996 m_hasEmitted = true; 997 998 // remember information with which to construct the TextIterator::range() 999 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode 1000 m_positionNode = textNode; 1001 m_positionOffsetBaseNode = offsetBaseNode; 1002 m_positionStartOffset = textStartOffset; 1003 m_positionEndOffset = textEndOffset; 1004 1005 // remember information with which to construct the TextIterator::characters() and length() 1006 m_singleCharacterBuffer = c; 1007 m_textCharacters = &m_singleCharacterBuffer; 1008 m_textLength = 1; 1009 1010 // remember some iteration state 1011 m_lastTextNodeEndedWithCollapsedSpace = false; 1012 m_lastCharacter = c; 1013 } 1014 1015 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset) 1016 { 1017 RenderText* renderer = toRenderText(renderObject); 1018 m_text = m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text(); 1019 ASSERT(m_text.characters()); 1020 1021 m_positionNode = textNode; 1022 m_positionOffsetBaseNode = 0; 1023 m_positionStartOffset = textStartOffset; 1024 m_positionEndOffset = textEndOffset; 1025 m_textCharacters = m_text.characters() + textStartOffset; 1026 m_textLength = textEndOffset - textStartOffset; 1027 m_lastCharacter = m_text[textEndOffset - 1]; 1028 1029 m_lastTextNodeEndedWithCollapsedSpace = false; 1030 m_hasEmitted = true; 1031 } 1032 1033 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) 1034 { 1035 emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset); 1036 } 1037 1038 PassRefPtr<Range> TextIterator::range() const 1039 { 1040 // use the current run information, if we have it 1041 if (m_positionNode) { 1042 if (m_positionOffsetBaseNode) { 1043 int index = m_positionOffsetBaseNode->nodeIndex(); 1044 m_positionStartOffset += index; 1045 m_positionEndOffset += index; 1046 m_positionOffsetBaseNode = 0; 1047 } 1048 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1049 } 1050 1051 // otherwise, return the end of the overall range we were given 1052 if (m_endContainer) 1053 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); 1054 1055 return 0; 1056 } 1057 1058 Node* TextIterator::node() const 1059 { 1060 RefPtr<Range> textRange = range(); 1061 if (!textRange) 1062 return 0; 1063 1064 Node* node = textRange->startContainer(); 1065 if (!node) 1066 return 0; 1067 if (node->offsetInCharacters()) 1068 return node; 1069 1070 return node->childNode(textRange->startOffset()); 1071 } 1072 1073 // -------- 1074 1075 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator() 1076 : m_behavior(TextIteratorDefaultBehavior) 1077 , m_node(0) 1078 , m_positionNode(0) 1079 #if OS(ANDROID) 1080 , m_stopsOnFormControls(false) 1081 , m_shouldStop(false) 1082 #endif 1083 { 1084 } 1085 1086 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior) 1087 : m_behavior(behavior) 1088 , m_node(0) 1089 , m_positionNode(0) 1090 #if OS(ANDROID) 1091 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) 1092 , m_shouldStop(false) 1093 #endif 1094 { 1095 #if OS(ANDROID) 1096 ASSERT(m_behavior == TextIteratorDefaultBehavior || m_behavior == TextIteratorStopsOnFormControls); 1097 #else 1098 ASSERT(m_behavior == TextIteratorDefaultBehavior); 1099 #endif 1100 1101 if (!r) 1102 return; 1103 1104 Node* startNode = r->startContainer(); 1105 if (!startNode) 1106 return; 1107 Node* endNode = r->endContainer(); 1108 int startOffset = r->startOffset(); 1109 int endOffset = r->endOffset(); 1110 1111 if (!startNode->offsetInCharacters()) { 1112 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { 1113 startNode = startNode->childNode(startOffset); 1114 startOffset = 0; 1115 } 1116 } 1117 if (!endNode->offsetInCharacters()) { 1118 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) { 1119 endNode = endNode->childNode(endOffset - 1); 1120 endOffset = lastOffsetInNode(endNode); 1121 } 1122 } 1123 1124 m_node = endNode; 1125 setUpFullyClippedStack(m_fullyClippedStack, m_node); 1126 m_offset = endOffset; 1127 m_handledNode = false; 1128 m_handledChildren = endOffset == 0; 1129 1130 m_startNode = startNode; 1131 m_startOffset = startOffset; 1132 m_endNode = endNode; 1133 m_endOffset = endOffset; 1134 1135 #ifndef NDEBUG 1136 // Need this just because of the assert. 1137 m_positionNode = endNode; 1138 #endif 1139 1140 m_lastTextNode = 0; 1141 m_lastCharacter = '\n'; 1142 1143 m_havePassedStartNode = false; 1144 1145 advance(); 1146 } 1147 1148 bool SimplifiedBackwardsTextIterator::atEnd() const 1149 { 1150 #if OS(ANDROID) 1151 return !m_positionNode || m_shouldStop; 1152 #else 1153 return !m_positionNode; 1154 #endif 1155 } 1156 1157 void SimplifiedBackwardsTextIterator::advance() 1158 { 1159 ASSERT(m_positionNode); 1160 1161 #if OS(ANDROID) 1162 if (m_shouldStop) 1163 return; 1164 1165 // Prevent changing the iterator position if a form control element was found and advance should stop on it. 1166 if (m_stopsOnFormControls && checkFormControlElement(m_node)) { 1167 m_shouldStop = true; 1168 return; 1169 } 1170 #endif 1171 1172 m_positionNode = 0; 1173 m_textLength = 0; 1174 1175 while (m_node && !m_havePassedStartNode) { 1176 // Don't handle node if we start iterating at [node, 0]. 1177 if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) { 1178 RenderObject* renderer = m_node->renderer(); 1179 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { 1180 // FIXME: What about CDATA_SECTION_NODE? 1181 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1182 m_handledNode = handleTextNode(); 1183 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1184 if (renderer->style()->visibility() == VISIBLE && m_offset > 0) 1185 m_handledNode = handleReplacedElement(); 1186 } else 1187 m_handledNode = handleNonTextNode(); 1188 if (m_positionNode) 1189 return; 1190 } 1191 1192 if (!m_handledChildren && m_node->hasChildNodes()) { 1193 m_node = m_node->lastChild(); 1194 pushFullyClippedState(m_fullyClippedStack, m_node); 1195 } else { 1196 // Exit empty containers as we pass over them or containers 1197 // where [container, 0] is where we started iterating. 1198 if (!m_handledNode 1199 && canHaveChildrenForEditing(m_node) 1200 && m_node->parentNode() 1201 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { 1202 exitNode(); 1203 if (m_positionNode) { 1204 m_handledNode = true; 1205 m_handledChildren = true; 1206 return; 1207 } 1208 } 1209 1210 // Exit all other containers. 1211 while (!m_node->previousSibling()) { 1212 if (!advanceRespectingRange(m_node->parentOrHostNode())) 1213 break; 1214 m_fullyClippedStack.pop(); 1215 exitNode(); 1216 if (m_positionNode) { 1217 m_handledNode = true; 1218 m_handledChildren = true; 1219 return; 1220 } 1221 } 1222 1223 m_fullyClippedStack.pop(); 1224 if (advanceRespectingRange(m_node->previousSibling())) 1225 pushFullyClippedState(m_fullyClippedStack, m_node); 1226 else 1227 m_node = 0; 1228 } 1229 1230 // For the purpose of word boundary detection, 1231 // we should iterate all visible text and trailing (collapsed) whitespaces. 1232 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; 1233 m_handledNode = false; 1234 m_handledChildren = false; 1235 1236 if (m_positionNode) 1237 return; 1238 } 1239 } 1240 1241 bool SimplifiedBackwardsTextIterator::handleTextNode() 1242 { 1243 m_lastTextNode = m_node; 1244 1245 RenderText* renderer = toRenderText(m_node->renderer()); 1246 String str = renderer->text(); 1247 1248 if (!renderer->firstTextBox() && str.length() > 0) 1249 return true; 1250 1251 m_positionEndOffset = m_offset; 1252 1253 m_offset = (m_node == m_startNode) ? m_startOffset : 0; 1254 m_positionNode = m_node; 1255 m_positionStartOffset = m_offset; 1256 m_textLength = m_positionEndOffset - m_positionStartOffset; 1257 m_textCharacters = str.characters() + m_positionStartOffset; 1258 1259 m_lastCharacter = str[m_positionEndOffset - 1]; 1260 1261 return true; 1262 } 1263 1264 bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1265 { 1266 unsigned index = m_node->nodeIndex(); 1267 // We want replaced elements to behave like punctuation for boundary 1268 // finding, and to simply take up space for the selection preservation 1269 // code in moveParagraphs, so we use a comma. Unconditionally emit 1270 // here because this iterator is only used for boundary finding. 1271 emitCharacter(',', m_node->parentNode(), index, index + 1); 1272 return true; 1273 } 1274 1275 bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1276 { 1277 // We can use a linefeed in place of a tab because this simple iterator is only used to 1278 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1279 if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) { 1280 unsigned index = m_node->nodeIndex(); 1281 // The start of this emitted range is wrong. Ensuring correctness would require 1282 // VisiblePositions and so would be slow. previousBoundary expects this. 1283 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); 1284 } 1285 return true; 1286 } 1287 1288 void SimplifiedBackwardsTextIterator::exitNode() 1289 { 1290 if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) { 1291 // The start of this emitted range is wrong. Ensuring correctness would require 1292 // VisiblePositions and so would be slow. previousBoundary expects this. 1293 emitCharacter('\n', m_node, 0, 0); 1294 } 1295 } 1296 1297 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) 1298 { 1299 m_singleCharacterBuffer = c; 1300 m_positionNode = node; 1301 m_positionStartOffset = startOffset; 1302 m_positionEndOffset = endOffset; 1303 m_textCharacters = &m_singleCharacterBuffer; 1304 m_textLength = 1; 1305 m_lastCharacter = c; 1306 } 1307 1308 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1309 { 1310 if (!next) 1311 return false; 1312 m_havePassedStartNode |= m_node == m_startNode; 1313 if (m_havePassedStartNode) 1314 return false; 1315 m_node = next; 1316 return true; 1317 } 1318 1319 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const 1320 { 1321 if (m_positionNode) 1322 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1323 1324 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset); 1325 } 1326 1327 // -------- 1328 1329 CharacterIterator::CharacterIterator() 1330 : m_offset(0) 1331 , m_runOffset(0) 1332 , m_atBreak(true) 1333 { 1334 } 1335 1336 CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior) 1337 : m_offset(0) 1338 , m_runOffset(0) 1339 , m_atBreak(true) 1340 , m_textIterator(r, behavior) 1341 { 1342 while (!atEnd() && m_textIterator.length() == 0) 1343 m_textIterator.advance(); 1344 } 1345 1346 PassRefPtr<Range> CharacterIterator::range() const 1347 { 1348 RefPtr<Range> r = m_textIterator.range(); 1349 if (!m_textIterator.atEnd()) { 1350 if (m_textIterator.length() <= 1) { 1351 ASSERT(m_runOffset == 0); 1352 } else { 1353 Node* n = r->startContainer(); 1354 ASSERT(n == r->endContainer()); 1355 int offset = r->startOffset() + m_runOffset; 1356 ExceptionCode ec = 0; 1357 r->setStart(n, offset, ec); 1358 r->setEnd(n, offset + 1, ec); 1359 ASSERT(!ec); 1360 } 1361 } 1362 return r.release(); 1363 } 1364 1365 void CharacterIterator::advance(int count) 1366 { 1367 if (count <= 0) { 1368 ASSERT(count == 0); 1369 return; 1370 } 1371 1372 m_atBreak = false; 1373 1374 // easy if there is enough left in the current m_textIterator run 1375 int remaining = m_textIterator.length() - m_runOffset; 1376 if (count < remaining) { 1377 m_runOffset += count; 1378 m_offset += count; 1379 return; 1380 } 1381 1382 // exhaust the current m_textIterator run 1383 count -= remaining; 1384 m_offset += remaining; 1385 1386 // move to a subsequent m_textIterator run 1387 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1388 int runLength = m_textIterator.length(); 1389 if (runLength == 0) 1390 m_atBreak = true; 1391 else { 1392 // see whether this is m_textIterator to use 1393 if (count < runLength) { 1394 m_runOffset = count; 1395 m_offset += count; 1396 return; 1397 } 1398 1399 // exhaust this m_textIterator run 1400 count -= runLength; 1401 m_offset += runLength; 1402 } 1403 } 1404 1405 // ran to the end of the m_textIterator... no more runs left 1406 m_atBreak = true; 1407 m_runOffset = 0; 1408 } 1409 1410 String CharacterIterator::string(int numChars) 1411 { 1412 Vector<UChar> result; 1413 result.reserveInitialCapacity(numChars); 1414 while (numChars > 0 && !atEnd()) { 1415 int runSize = min(numChars, length()); 1416 result.append(characters(), runSize); 1417 numChars -= runSize; 1418 advance(runSize); 1419 } 1420 return String::adopt(result); 1421 } 1422 1423 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) 1424 { 1425 it.advance(offset); 1426 RefPtr<Range> start = it.range(); 1427 1428 if (length > 1) 1429 it.advance(length - 1); 1430 RefPtr<Range> end = it.range(); 1431 1432 return Range::create(start->startContainer()->document(), 1433 start->startContainer(), start->startOffset(), 1434 end->endContainer(), end->endOffset()); 1435 } 1436 1437 BackwardsCharacterIterator::BackwardsCharacterIterator() 1438 : m_offset(0) 1439 , m_runOffset(0) 1440 , m_atBreak(true) 1441 { 1442 } 1443 1444 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior) 1445 : m_offset(0) 1446 , m_runOffset(0) 1447 , m_atBreak(true) 1448 , m_textIterator(range, behavior) 1449 { 1450 while (!atEnd() && !m_textIterator.length()) 1451 m_textIterator.advance(); 1452 } 1453 1454 PassRefPtr<Range> BackwardsCharacterIterator::range() const 1455 { 1456 RefPtr<Range> r = m_textIterator.range(); 1457 if (!m_textIterator.atEnd()) { 1458 if (m_textIterator.length() <= 1) 1459 ASSERT(m_runOffset == 0); 1460 else { 1461 Node* n = r->startContainer(); 1462 ASSERT(n == r->endContainer()); 1463 int offset = r->endOffset() - m_runOffset; 1464 ExceptionCode ec = 0; 1465 r->setStart(n, offset - 1, ec); 1466 r->setEnd(n, offset, ec); 1467 ASSERT(!ec); 1468 } 1469 } 1470 return r.release(); 1471 } 1472 1473 void BackwardsCharacterIterator::advance(int count) 1474 { 1475 if (count <= 0) { 1476 ASSERT(!count); 1477 return; 1478 } 1479 1480 m_atBreak = false; 1481 1482 int remaining = m_textIterator.length() - m_runOffset; 1483 if (count < remaining) { 1484 m_runOffset += count; 1485 m_offset += count; 1486 return; 1487 } 1488 1489 count -= remaining; 1490 m_offset += remaining; 1491 1492 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { 1493 int runLength = m_textIterator.length(); 1494 if (runLength == 0) 1495 m_atBreak = true; 1496 else { 1497 if (count < runLength) { 1498 m_runOffset = count; 1499 m_offset += count; 1500 return; 1501 } 1502 1503 count -= runLength; 1504 m_offset += runLength; 1505 } 1506 } 1507 1508 m_atBreak = true; 1509 m_runOffset = 0; 1510 } 1511 1512 // -------- 1513 1514 WordAwareIterator::WordAwareIterator() 1515 : m_previousText(0) 1516 , m_didLookAhead(false) 1517 { 1518 } 1519 1520 WordAwareIterator::WordAwareIterator(const Range* r) 1521 : m_previousText(0) 1522 , m_didLookAhead(true) // so we consider the first chunk from the text iterator 1523 , m_textIterator(r) 1524 { 1525 advance(); // get in position over the first chunk of text 1526 } 1527 1528 WordAwareIterator::~WordAwareIterator() 1529 { 1530 } 1531 1532 // We're always in one of these modes: 1533 // - The current chunk in the text iterator is our current chunk 1534 // (typically its a piece of whitespace, or text that ended with whitespace) 1535 // - The previous chunk in the text iterator is our current chunk 1536 // (we looked ahead to the next chunk and found a word boundary) 1537 // - We built up our own chunk of text from many chunks from the text iterator 1538 1539 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1540 1541 void WordAwareIterator::advance() 1542 { 1543 m_previousText = 0; 1544 m_buffer.clear(); // toss any old buffer we built up 1545 1546 // If last time we did a look-ahead, start with that looked-ahead chunk now 1547 if (!m_didLookAhead) { 1548 ASSERT(!m_textIterator.atEnd()); 1549 m_textIterator.advance(); 1550 } 1551 m_didLookAhead = false; 1552 1553 // Go to next non-empty chunk 1554 while (!m_textIterator.atEnd() && m_textIterator.length() == 0) 1555 m_textIterator.advance(); 1556 m_range = m_textIterator.range(); 1557 1558 if (m_textIterator.atEnd()) 1559 return; 1560 1561 while (1) { 1562 // If this chunk ends in whitespace we can just use it as our chunk. 1563 if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1])) 1564 return; 1565 1566 // If this is the first chunk that failed, save it in previousText before look ahead 1567 if (m_buffer.isEmpty()) { 1568 m_previousText = m_textIterator.characters(); 1569 m_previousLength = m_textIterator.length(); 1570 } 1571 1572 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1573 m_textIterator.advance(); 1574 if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) { 1575 m_didLookAhead = true; 1576 return; 1577 } 1578 1579 if (m_buffer.isEmpty()) { 1580 // Start gobbling chunks until we get to a suitable stopping point 1581 m_buffer.append(m_previousText, m_previousLength); 1582 m_previousText = 0; 1583 } 1584 m_buffer.append(m_textIterator.characters(), m_textIterator.length()); 1585 int exception = 0; 1586 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception); 1587 } 1588 } 1589 1590 int WordAwareIterator::length() const 1591 { 1592 if (!m_buffer.isEmpty()) 1593 return m_buffer.size(); 1594 if (m_previousText) 1595 return m_previousLength; 1596 return m_textIterator.length(); 1597 } 1598 1599 const UChar* WordAwareIterator::characters() const 1600 { 1601 if (!m_buffer.isEmpty()) 1602 return m_buffer.data(); 1603 if (m_previousText) 1604 return m_previousText; 1605 return m_textIterator.characters(); 1606 } 1607 1608 // -------- 1609 1610 static inline UChar foldQuoteMarkOrSoftHyphen(UChar c) 1611 { 1612 switch (c) { 1613 case hebrewPunctuationGershayim: 1614 case leftDoubleQuotationMark: 1615 case rightDoubleQuotationMark: 1616 return '"'; 1617 case hebrewPunctuationGeresh: 1618 case leftSingleQuotationMark: 1619 case rightSingleQuotationMark: 1620 return '\''; 1621 case softHyphen: 1622 // Replace soft hyphen with an ignorable character so that their presence or absence will 1623 // not affect string comparison. 1624 return 0; 1625 default: 1626 return c; 1627 } 1628 } 1629 1630 static inline void foldQuoteMarksAndSoftHyphens(String& s) 1631 { 1632 s.replace(hebrewPunctuationGeresh, '\''); 1633 s.replace(hebrewPunctuationGershayim, '"'); 1634 s.replace(leftDoubleQuotationMark, '"'); 1635 s.replace(leftSingleQuotationMark, '\''); 1636 s.replace(rightDoubleQuotationMark, '"'); 1637 s.replace(rightSingleQuotationMark, '\''); 1638 // Replace soft hyphen with an ignorable character so that their presence or absence will 1639 // not affect string comparison. 1640 s.replace(softHyphen, 0); 1641 } 1642 1643 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION 1644 1645 static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) 1646 { 1647 for (size_t i = 0; i < length; ++i) 1648 data[i] = foldQuoteMarkOrSoftHyphen(data[i]); 1649 } 1650 1651 static const size_t minimumSearchBufferSize = 8192; 1652 1653 #ifndef NDEBUG 1654 static bool searcherInUse; 1655 #endif 1656 1657 static UStringSearch* createSearcher() 1658 { 1659 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1660 // but it doesn't matter exactly what it is, since we don't perform any searches 1661 // without setting both the pattern and the text. 1662 UErrorCode status = U_ZERO_ERROR; 1663 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status); 1664 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1665 return searcher; 1666 } 1667 1668 static UStringSearch* searcher() 1669 { 1670 static UStringSearch* searcher = createSearcher(); 1671 return searcher; 1672 } 1673 1674 static inline void lockSearcher() 1675 { 1676 #ifndef NDEBUG 1677 ASSERT(!searcherInUse); 1678 searcherInUse = true; 1679 #endif 1680 } 1681 1682 static inline void unlockSearcher() 1683 { 1684 #ifndef NDEBUG 1685 ASSERT(searcherInUse); 1686 searcherInUse = false; 1687 #endif 1688 } 1689 1690 // ICU's search ignores the distinction between small kana letters and ones 1691 // that are not small, and also characters that differ only in the voicing 1692 // marks when considering only primary collation strength diffrences. 1693 // This is not helpful for end users, since these differences make words 1694 // distinct, so for our purposes we need these to be considered. 1695 // The Unicode folks do not think the collation algorithm should be 1696 // changed. To work around this, we would like to tailor the ICU searcher, 1697 // but we can't get that to work yet. So instead, we check for cases where 1698 // these differences occur, and skip those matches. 1699 1700 // We refer to the above technique as the "kana workaround". The next few 1701 // functions are helper functinos for the kana workaround. 1702 1703 static inline bool isKanaLetter(UChar character) 1704 { 1705 // Hiragana letters. 1706 if (character >= 0x3041 && character <= 0x3096) 1707 return true; 1708 1709 // Katakana letters. 1710 if (character >= 0x30A1 && character <= 0x30FA) 1711 return true; 1712 if (character >= 0x31F0 && character <= 0x31FF) 1713 return true; 1714 1715 // Halfwidth katakana letters. 1716 if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70) 1717 return true; 1718 1719 return false; 1720 } 1721 1722 static inline bool isSmallKanaLetter(UChar character) 1723 { 1724 ASSERT(isKanaLetter(character)); 1725 1726 switch (character) { 1727 case 0x3041: // HIRAGANA LETTER SMALL A 1728 case 0x3043: // HIRAGANA LETTER SMALL I 1729 case 0x3045: // HIRAGANA LETTER SMALL U 1730 case 0x3047: // HIRAGANA LETTER SMALL E 1731 case 0x3049: // HIRAGANA LETTER SMALL O 1732 case 0x3063: // HIRAGANA LETTER SMALL TU 1733 case 0x3083: // HIRAGANA LETTER SMALL YA 1734 case 0x3085: // HIRAGANA LETTER SMALL YU 1735 case 0x3087: // HIRAGANA LETTER SMALL YO 1736 case 0x308E: // HIRAGANA LETTER SMALL WA 1737 case 0x3095: // HIRAGANA LETTER SMALL KA 1738 case 0x3096: // HIRAGANA LETTER SMALL KE 1739 case 0x30A1: // KATAKANA LETTER SMALL A 1740 case 0x30A3: // KATAKANA LETTER SMALL I 1741 case 0x30A5: // KATAKANA LETTER SMALL U 1742 case 0x30A7: // KATAKANA LETTER SMALL E 1743 case 0x30A9: // KATAKANA LETTER SMALL O 1744 case 0x30C3: // KATAKANA LETTER SMALL TU 1745 case 0x30E3: // KATAKANA LETTER SMALL YA 1746 case 0x30E5: // KATAKANA LETTER SMALL YU 1747 case 0x30E7: // KATAKANA LETTER SMALL YO 1748 case 0x30EE: // KATAKANA LETTER SMALL WA 1749 case 0x30F5: // KATAKANA LETTER SMALL KA 1750 case 0x30F6: // KATAKANA LETTER SMALL KE 1751 case 0x31F0: // KATAKANA LETTER SMALL KU 1752 case 0x31F1: // KATAKANA LETTER SMALL SI 1753 case 0x31F2: // KATAKANA LETTER SMALL SU 1754 case 0x31F3: // KATAKANA LETTER SMALL TO 1755 case 0x31F4: // KATAKANA LETTER SMALL NU 1756 case 0x31F5: // KATAKANA LETTER SMALL HA 1757 case 0x31F6: // KATAKANA LETTER SMALL HI 1758 case 0x31F7: // KATAKANA LETTER SMALL HU 1759 case 0x31F8: // KATAKANA LETTER SMALL HE 1760 case 0x31F9: // KATAKANA LETTER SMALL HO 1761 case 0x31FA: // KATAKANA LETTER SMALL MU 1762 case 0x31FB: // KATAKANA LETTER SMALL RA 1763 case 0x31FC: // KATAKANA LETTER SMALL RI 1764 case 0x31FD: // KATAKANA LETTER SMALL RU 1765 case 0x31FE: // KATAKANA LETTER SMALL RE 1766 case 0x31FF: // KATAKANA LETTER SMALL RO 1767 case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A 1768 case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I 1769 case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U 1770 case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E 1771 case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O 1772 case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA 1773 case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU 1774 case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO 1775 case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU 1776 return true; 1777 } 1778 return false; 1779 } 1780 1781 enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark }; 1782 1783 static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character) 1784 { 1785 ASSERT(isKanaLetter(character)); 1786 1787 switch (character) { 1788 case 0x304C: // HIRAGANA LETTER GA 1789 case 0x304E: // HIRAGANA LETTER GI 1790 case 0x3050: // HIRAGANA LETTER GU 1791 case 0x3052: // HIRAGANA LETTER GE 1792 case 0x3054: // HIRAGANA LETTER GO 1793 case 0x3056: // HIRAGANA LETTER ZA 1794 case 0x3058: // HIRAGANA LETTER ZI 1795 case 0x305A: // HIRAGANA LETTER ZU 1796 case 0x305C: // HIRAGANA LETTER ZE 1797 case 0x305E: // HIRAGANA LETTER ZO 1798 case 0x3060: // HIRAGANA LETTER DA 1799 case 0x3062: // HIRAGANA LETTER DI 1800 case 0x3065: // HIRAGANA LETTER DU 1801 case 0x3067: // HIRAGANA LETTER DE 1802 case 0x3069: // HIRAGANA LETTER DO 1803 case 0x3070: // HIRAGANA LETTER BA 1804 case 0x3073: // HIRAGANA LETTER BI 1805 case 0x3076: // HIRAGANA LETTER BU 1806 case 0x3079: // HIRAGANA LETTER BE 1807 case 0x307C: // HIRAGANA LETTER BO 1808 case 0x3094: // HIRAGANA LETTER VU 1809 case 0x30AC: // KATAKANA LETTER GA 1810 case 0x30AE: // KATAKANA LETTER GI 1811 case 0x30B0: // KATAKANA LETTER GU 1812 case 0x30B2: // KATAKANA LETTER GE 1813 case 0x30B4: // KATAKANA LETTER GO 1814 case 0x30B6: // KATAKANA LETTER ZA 1815 case 0x30B8: // KATAKANA LETTER ZI 1816 case 0x30BA: // KATAKANA LETTER ZU 1817 case 0x30BC: // KATAKANA LETTER ZE 1818 case 0x30BE: // KATAKANA LETTER ZO 1819 case 0x30C0: // KATAKANA LETTER DA 1820 case 0x30C2: // KATAKANA LETTER DI 1821 case 0x30C5: // KATAKANA LETTER DU 1822 case 0x30C7: // KATAKANA LETTER DE 1823 case 0x30C9: // KATAKANA LETTER DO 1824 case 0x30D0: // KATAKANA LETTER BA 1825 case 0x30D3: // KATAKANA LETTER BI 1826 case 0x30D6: // KATAKANA LETTER BU 1827 case 0x30D9: // KATAKANA LETTER BE 1828 case 0x30DC: // KATAKANA LETTER BO 1829 case 0x30F4: // KATAKANA LETTER VU 1830 case 0x30F7: // KATAKANA LETTER VA 1831 case 0x30F8: // KATAKANA LETTER VI 1832 case 0x30F9: // KATAKANA LETTER VE 1833 case 0x30FA: // KATAKANA LETTER VO 1834 return VoicedSoundMark; 1835 case 0x3071: // HIRAGANA LETTER PA 1836 case 0x3074: // HIRAGANA LETTER PI 1837 case 0x3077: // HIRAGANA LETTER PU 1838 case 0x307A: // HIRAGANA LETTER PE 1839 case 0x307D: // HIRAGANA LETTER PO 1840 case 0x30D1: // KATAKANA LETTER PA 1841 case 0x30D4: // KATAKANA LETTER PI 1842 case 0x30D7: // KATAKANA LETTER PU 1843 case 0x30DA: // KATAKANA LETTER PE 1844 case 0x30DD: // KATAKANA LETTER PO 1845 return SemiVoicedSoundMark; 1846 } 1847 return NoVoicedSoundMark; 1848 } 1849 1850 static inline bool isCombiningVoicedSoundMark(UChar character) 1851 { 1852 switch (character) { 1853 case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 1854 case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 1855 return true; 1856 } 1857 return false; 1858 } 1859 1860 static inline bool containsKanaLetters(const String& pattern) 1861 { 1862 const UChar* characters = pattern.characters(); 1863 unsigned length = pattern.length(); 1864 for (unsigned i = 0; i < length; ++i) { 1865 if (isKanaLetter(characters[i])) 1866 return true; 1867 } 1868 return false; 1869 } 1870 1871 static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer) 1872 { 1873 ASSERT(length); 1874 1875 buffer.resize(length); 1876 1877 UErrorCode status = U_ZERO_ERROR; 1878 size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status); 1879 ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR); 1880 ASSERT(bufferSize); 1881 1882 buffer.resize(bufferSize); 1883 1884 if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) 1885 return; 1886 1887 status = U_ZERO_ERROR; 1888 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status); 1889 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 1890 } 1891 1892 static bool isNonLatin1Separator(UChar32 character) 1893 { 1894 ASSERT_ARG(character, character >= 256); 1895 1896 return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); 1897 } 1898 1899 static inline bool isSeparator(UChar32 character) 1900 { 1901 static const bool latin1SeparatorTable[256] = { 1902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1904 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . / 1905 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ? 1906 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @ 1907 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _ 1908 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ` 1909 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~ 1910 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1911 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1912 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1913 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1914 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1915 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1916 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1917 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 1918 }; 1919 1920 if (character < 256) 1921 return latin1SeparatorTable[character]; 1922 1923 return isNonLatin1Separator(character); 1924 } 1925 1926 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1927 : m_target(target) 1928 , m_options(options) 1929 , m_prefixLength(0) 1930 , m_atBreak(true) 1931 , m_needsMoreContext(options & AtWordStarts) 1932 , m_targetRequiresKanaWorkaround(containsKanaLetters(m_target)) 1933 { 1934 ASSERT(!m_target.isEmpty()); 1935 1936 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1937 // of doing it in a separate replacement pass here, but ICU doesn't offer a way 1938 // to add tailoring on top of the locale-specific tailoring as of this writing. 1939 foldQuoteMarksAndSoftHyphens(m_target); 1940 1941 size_t targetLength = m_target.length(); 1942 m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize)); 1943 m_overlap = m_buffer.capacity() / 4; 1944 1945 if ((m_options & AtWordStarts) && targetLength) { 1946 UChar32 targetFirstCharacter; 1947 U16_GET(m_target.characters(), 0, 0, targetLength, targetFirstCharacter); 1948 // Characters in the separator category never really occur at the beginning of a word, 1949 // so if the target begins with such a character, we just ignore the AtWordStart option. 1950 if (isSeparator(targetFirstCharacter)) { 1951 m_options &= ~AtWordStarts; 1952 m_needsMoreContext = false; 1953 } 1954 } 1955 1956 // Grab the single global searcher. 1957 // If we ever have a reason to do more than once search buffer at once, we'll have 1958 // to move to multiple searchers. 1959 lockSearcher(); 1960 1961 UStringSearch* searcher = WebCore::searcher(); 1962 UCollator* collator = usearch_getCollator(searcher); 1963 1964 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; 1965 if (ucol_getStrength(collator) != strength) { 1966 ucol_setStrength(collator, strength); 1967 usearch_reset(searcher); 1968 } 1969 1970 UErrorCode status = U_ZERO_ERROR; 1971 usearch_setPattern(searcher, m_target.characters(), targetLength, &status); 1972 ASSERT(status == U_ZERO_ERROR); 1973 1974 // The kana workaround requires a normalized copy of the target string. 1975 if (m_targetRequiresKanaWorkaround) 1976 normalizeCharacters(m_target.characters(), m_target.length(), m_normalizedTarget); 1977 } 1978 1979 inline SearchBuffer::~SearchBuffer() 1980 { 1981 // Leave the static object pointing to a valid string. 1982 UErrorCode status = U_ZERO_ERROR; 1983 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status); 1984 ASSERT(status == U_ZERO_ERROR); 1985 1986 unlockSearcher(); 1987 } 1988 1989 inline size_t SearchBuffer::append(const UChar* characters, size_t length) 1990 { 1991 ASSERT(length); 1992 1993 if (m_atBreak) { 1994 m_buffer.shrink(0); 1995 m_prefixLength = 0; 1996 m_atBreak = false; 1997 } else if (m_buffer.size() == m_buffer.capacity()) { 1998 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 1999 m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap); 2000 m_buffer.shrink(m_overlap); 2001 } 2002 2003 size_t oldLength = m_buffer.size(); 2004 size_t usableLength = min(m_buffer.capacity() - oldLength, length); 2005 ASSERT(usableLength); 2006 m_buffer.append(characters, usableLength); 2007 foldQuoteMarksAndSoftHyphens(m_buffer.data() + oldLength, usableLength); 2008 return usableLength; 2009 } 2010 2011 inline bool SearchBuffer::needsMoreContext() const 2012 { 2013 return m_needsMoreContext; 2014 } 2015 2016 inline void SearchBuffer::prependContext(const UChar* characters, size_t length) 2017 { 2018 ASSERT(m_needsMoreContext); 2019 ASSERT(m_prefixLength == m_buffer.size()); 2020 2021 if (!length) 2022 return; 2023 2024 m_atBreak = false; 2025 2026 size_t wordBoundaryContextStart = length; 2027 if (wordBoundaryContextStart) { 2028 U16_BACK_1(characters, 0, wordBoundaryContextStart); 2029 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); 2030 } 2031 2032 size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); 2033 m_buffer.prepend(characters + length - usableLength, usableLength); 2034 m_prefixLength += usableLength; 2035 2036 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 2037 m_needsMoreContext = false; 2038 } 2039 2040 inline bool SearchBuffer::atBreak() const 2041 { 2042 return m_atBreak; 2043 } 2044 2045 inline void SearchBuffer::reachedBreak() 2046 { 2047 m_atBreak = true; 2048 } 2049 2050 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 2051 { 2052 // This function implements the kana workaround. If usearch treats 2053 // it as a match, but we do not want to, then it's a "bad match". 2054 if (!m_targetRequiresKanaWorkaround) 2055 return false; 2056 2057 // Normalize into a match buffer. We reuse a single buffer rather than 2058 // creating a new one each time. 2059 normalizeCharacters(match, matchLength, m_normalizedMatch); 2060 2061 const UChar* a = m_normalizedTarget.begin(); 2062 const UChar* aEnd = m_normalizedTarget.end(); 2063 2064 const UChar* b = m_normalizedMatch.begin(); 2065 const UChar* bEnd = m_normalizedMatch.end(); 2066 2067 while (true) { 2068 // Skip runs of non-kana-letter characters. This is necessary so we can 2069 // correctly handle strings where the target and match have different-length 2070 // runs of characters that match, while still double checking the correctness 2071 // of matches of kana letters with other kana letters. 2072 while (a != aEnd && !isKanaLetter(*a)) 2073 ++a; 2074 while (b != bEnd && !isKanaLetter(*b)) 2075 ++b; 2076 2077 // If we reached the end of either the target or the match, we should have 2078 // reached the end of both; both should have the same number of kana letters. 2079 if (a == aEnd || b == bEnd) { 2080 ASSERT(a == aEnd); 2081 ASSERT(b == bEnd); 2082 return false; 2083 } 2084 2085 // Check for differences in the kana letter character itself. 2086 if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b)) 2087 return true; 2088 if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b)) 2089 return true; 2090 ++a; 2091 ++b; 2092 2093 // Check for differences in combining voiced sound marks found after the letter. 2094 while (1) { 2095 if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) { 2096 if (b != bEnd && isCombiningVoicedSoundMark(*b)) 2097 return true; 2098 break; 2099 } 2100 if (!(b != bEnd && isCombiningVoicedSoundMark(*b))) 2101 return true; 2102 if (*a != *b) 2103 return true; 2104 ++a; 2105 ++b; 2106 } 2107 } 2108 } 2109 2110 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 2111 { 2112 ASSERT(m_options & AtWordStarts); 2113 2114 if (!start) 2115 return true; 2116 2117 if (m_options & TreatMedialCapitalAsWordStart) { 2118 int size = m_buffer.size(); 2119 int offset = start; 2120 UChar32 firstCharacter; 2121 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 2122 UChar32 previousCharacter; 2123 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 2124 2125 if (isSeparator(firstCharacter)) { 2126 // The start of a separator run is a word start (".org" in "webkit.org"). 2127 if (!isSeparator(previousCharacter)) 2128 return true; 2129 } else if (isASCIIUpper(firstCharacter)) { 2130 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 2131 if (!isASCIIUpper(previousCharacter)) 2132 return true; 2133 // The last character of an uppercase run followed by a non-separator, non-digit 2134 // is a word start ("Request" in "XMLHTTPRequest"). 2135 offset = start; 2136 U16_FWD_1(m_buffer.data(), offset, size); 2137 UChar32 nextCharacter = 0; 2138 if (offset < size) 2139 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 2140 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 2141 return true; 2142 } else if (isASCIIDigit(firstCharacter)) { 2143 // The start of a digit run is a word start ("2" in "WebKit2"). 2144 if (!isASCIIDigit(previousCharacter)) 2145 return true; 2146 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 2147 // The start of a non-separator, non-uppercase, non-digit run is a word start, 2148 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 2149 return true; 2150 } 2151 } 2152 2153 size_t wordBreakSearchStart = start + length; 2154 while (wordBreakSearchStart > start) 2155 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); 2156 return wordBreakSearchStart == start; 2157 } 2158 2159 inline size_t SearchBuffer::search(size_t& start) 2160 { 2161 size_t size = m_buffer.size(); 2162 if (m_atBreak) { 2163 if (!size) 2164 return 0; 2165 } else { 2166 if (size != m_buffer.capacity()) 2167 return 0; 2168 } 2169 2170 UStringSearch* searcher = WebCore::searcher(); 2171 2172 UErrorCode status = U_ZERO_ERROR; 2173 usearch_setText(searcher, m_buffer.data(), size, &status); 2174 ASSERT(status == U_ZERO_ERROR); 2175 2176 usearch_setOffset(searcher, m_prefixLength, &status); 2177 ASSERT(status == U_ZERO_ERROR); 2178 2179 int matchStart = usearch_next(searcher, &status); 2180 ASSERT(status == U_ZERO_ERROR); 2181 2182 nextMatch: 2183 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 2184 ASSERT(matchStart == USEARCH_DONE); 2185 return 0; 2186 } 2187 2188 // Matches that start in the overlap area are only tentative. 2189 // The same match may appear later, matching more characters, 2190 // possibly including a combining character that's not yet in the buffer. 2191 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 2192 size_t overlap = m_overlap; 2193 if (m_options & AtWordStarts) { 2194 // Ensure that there is sufficient context before matchStart the next time around for 2195 // determining if it is at a word boundary. 2196 int wordBoundaryContextStart = matchStart; 2197 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 2198 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); 2199 overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart)); 2200 } 2201 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 2202 m_prefixLength -= min(m_prefixLength, size - overlap); 2203 m_buffer.shrink(overlap); 2204 return 0; 2205 } 2206 2207 size_t matchedLength = usearch_getMatchedLength(searcher); 2208 ASSERT(matchStart + matchedLength <= size); 2209 2210 // If this match is "bad", move on to the next match. 2211 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { 2212 matchStart = usearch_next(searcher, &status); 2213 ASSERT(status == U_ZERO_ERROR); 2214 goto nextMatch; 2215 } 2216 2217 size_t newSize = size - (matchStart + 1); 2218 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 2219 m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1); 2220 m_buffer.shrink(newSize); 2221 2222 start = size - matchStart; 2223 return matchedLength; 2224 } 2225 2226 #else // !ICU_UNICODE 2227 2228 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 2229 : m_target(options & CaseInsensitive ? target.foldCase() : target) 2230 , m_options(options) 2231 , m_buffer(m_target.length()) 2232 , m_isCharacterStartBuffer(m_target.length()) 2233 , m_isBufferFull(false) 2234 , m_cursor(0) 2235 { 2236 ASSERT(!m_target.isEmpty()); 2237 m_target.replace(noBreakSpace, ' '); 2238 foldQuoteMarksAndSoftHyphens(m_target); 2239 } 2240 2241 inline SearchBuffer::~SearchBuffer() 2242 { 2243 } 2244 2245 inline void SearchBuffer::reachedBreak() 2246 { 2247 m_cursor = 0; 2248 m_isBufferFull = false; 2249 } 2250 2251 inline bool SearchBuffer::atBreak() const 2252 { 2253 return !m_cursor && !m_isBufferFull; 2254 } 2255 2256 inline void SearchBuffer::append(UChar c, bool isStart) 2257 { 2258 m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMarkOrSoftHyphen(c); 2259 m_isCharacterStartBuffer[m_cursor] = isStart; 2260 if (++m_cursor == m_target.length()) { 2261 m_cursor = 0; 2262 m_isBufferFull = true; 2263 } 2264 } 2265 2266 inline size_t SearchBuffer::append(const UChar* characters, size_t length) 2267 { 2268 ASSERT(length); 2269 if (!(m_options & CaseInsensitive)) { 2270 append(characters[0], true); 2271 return 1; 2272 } 2273 const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough 2274 UChar foldedCharacters[maxFoldedCharacters]; 2275 bool error; 2276 int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error); 2277 ASSERT(!error); 2278 ASSERT(numFoldedCharacters); 2279 ASSERT(numFoldedCharacters <= maxFoldedCharacters); 2280 if (!error && numFoldedCharacters) { 2281 numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters); 2282 append(foldedCharacters[0], true); 2283 for (int i = 1; i < numFoldedCharacters; ++i) 2284 append(foldedCharacters[i], false); 2285 } 2286 return 1; 2287 } 2288 2289 inline bool SearchBuffer::needsMoreContext() const 2290 { 2291 return false; 2292 } 2293 2294 void SearchBuffer::prependContext(const UChar*, size_t) 2295 { 2296 ASSERT_NOT_REACHED(); 2297 } 2298 2299 inline size_t SearchBuffer::search(size_t& start) 2300 { 2301 if (!m_isBufferFull) 2302 return 0; 2303 if (!m_isCharacterStartBuffer[m_cursor]) 2304 return 0; 2305 2306 size_t tailSpace = m_target.length() - m_cursor; 2307 if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0) 2308 return 0; 2309 if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0) 2310 return 0; 2311 2312 start = length(); 2313 2314 // Now that we've found a match once, we don't want to find it again, because those 2315 // are the SearchBuffer semantics, allowing for a buffer where you append more than one 2316 // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if 2317 // we want to get rid of that in the future we could track this with a separate boolean 2318 // or even move the characters to the start of the buffer and set m_isBufferFull to false. 2319 m_isCharacterStartBuffer[m_cursor] = false; 2320 2321 return start; 2322 } 2323 2324 // Returns the number of characters that were appended to the buffer (what we are searching in). 2325 // That's not necessarily the same length as the passed-in target string, because case folding 2326 // can make two strings match even though they're not the same length. 2327 size_t SearchBuffer::length() const 2328 { 2329 size_t bufferSize = m_target.length(); 2330 size_t length = 0; 2331 for (size_t i = 0; i < bufferSize; ++i) 2332 length += m_isCharacterStartBuffer[i]; 2333 return length; 2334 } 2335 2336 #endif // !ICU_UNICODE 2337 2338 // -------- 2339 2340 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) 2341 { 2342 int length = 0; 2343 for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) 2344 length += it.length(); 2345 2346 return length; 2347 } 2348 2349 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 2350 { 2351 CharacterIterator entireRangeIterator(entireRange); 2352 return characterSubrange(entireRangeIterator, characterOffset, characterCount); 2353 } 2354 2355 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) 2356 { 2357 RefPtr<Range> resultRange = scope->document()->createRange(); 2358 2359 int docTextPosition = 0; 2360 int rangeEnd = rangeLocation + rangeLength; 2361 bool startRangeFound = false; 2362 2363 RefPtr<Range> textRunRange; 2364 2365 TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); 2366 2367 // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. 2368 if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) { 2369 textRunRange = it.range(); 2370 2371 ExceptionCode ec = 0; 2372 resultRange->setStart(textRunRange->startContainer(), 0, ec); 2373 ASSERT(!ec); 2374 resultRange->setEnd(textRunRange->startContainer(), 0, ec); 2375 ASSERT(!ec); 2376 2377 return resultRange.release(); 2378 } 2379 2380 for (; !it.atEnd(); it.advance()) { 2381 int len = it.length(); 2382 textRunRange = it.range(); 2383 2384 bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len; 2385 bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len; 2386 2387 // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only 2388 // in those cases that textRunRange is used. 2389 if (foundEnd) { 2390 // FIXME: This is a workaround for the fact that the end of a run is often at the wrong 2391 // position for emitted '\n's. 2392 if (len == 1 && it.characters()[0] == '\n') { 2393 scope->document()->updateLayoutIgnorePendingStylesheets(); 2394 it.advance(); 2395 if (!it.atEnd()) { 2396 RefPtr<Range> range = it.range(); 2397 ExceptionCode ec = 0; 2398 textRunRange->setEnd(range->startContainer(), range->startOffset(), ec); 2399 ASSERT(!ec); 2400 } else { 2401 Position runStart = textRunRange->startPosition(); 2402 Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); 2403 if (runEnd.isNotNull()) { 2404 ExceptionCode ec = 0; 2405 textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ec); 2406 ASSERT(!ec); 2407 } 2408 } 2409 } 2410 } 2411 2412 if (foundStart) { 2413 startRangeFound = true; 2414 int exception = 0; 2415 if (textRunRange->startContainer()->isTextNode()) { 2416 int offset = rangeLocation - docTextPosition; 2417 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); 2418 } else { 2419 if (rangeLocation == docTextPosition) 2420 resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception); 2421 else 2422 resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception); 2423 } 2424 } 2425 2426 if (foundEnd) { 2427 int exception = 0; 2428 if (textRunRange->startContainer()->isTextNode()) { 2429 int offset = rangeEnd - docTextPosition; 2430 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); 2431 } else { 2432 if (rangeEnd == docTextPosition) 2433 resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception); 2434 else 2435 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); 2436 } 2437 docTextPosition += len; 2438 break; 2439 } 2440 docTextPosition += len; 2441 } 2442 2443 if (!startRangeFound) 2444 return 0; 2445 2446 if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds 2447 int exception = 0; 2448 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); 2449 } 2450 2451 return resultRange.release(); 2452 } 2453 2454 bool TextIterator::locationAndLengthFromRange(const Range* range, size_t& location, size_t& length) 2455 { 2456 location = notFound; 2457 length = 0; 2458 2459 if (!range->startContainer()) 2460 return false; 2461 2462 Element* selectionRoot = range->ownerDocument()->frame()->selection()->rootEditableElement(); 2463 Element* scope = selectionRoot ? selectionRoot : range->ownerDocument()->documentElement(); 2464 2465 // The critical assumption is that this only gets called with ranges that 2466 // concentrate on a given area containing the selection root. This is done 2467 // because of text fields and textareas. The DOM for those is not 2468 // directly in the document DOM, so ensure that the range does not cross a 2469 // boundary of one of those. 2470 if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope)) 2471 return false; 2472 if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope)) 2473 return false; 2474 2475 RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset()); 2476 ASSERT(testRange->startContainer() == scope); 2477 location = TextIterator::rangeLength(testRange.get()); 2478 2479 ExceptionCode ec; 2480 testRange->setEnd(range->endContainer(), range->endOffset(), ec); 2481 ASSERT(testRange->startContainer() == scope); 2482 length = TextIterator::rangeLength(testRange.get()) - location; 2483 return true; 2484 } 2485 2486 // -------- 2487 2488 UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior defaultBehavior) 2489 { 2490 UChar* result = 0; 2491 2492 // Do this in pieces to avoid massive reallocations if there is a large amount of text. 2493 // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS. 2494 static const unsigned cMaxSegmentSize = 1 << 16; 2495 bufferLength = 0; 2496 typedef pair<UChar*, unsigned> TextSegment; 2497 OwnPtr<Vector<TextSegment> > textSegments; 2498 Vector<UChar> textBuffer; 2499 textBuffer.reserveInitialCapacity(cMaxSegmentSize); 2500 TextIteratorBehavior behavior = defaultBehavior; 2501 if (!isDisplayString) 2502 behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding); 2503 2504 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { 2505 if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) { 2506 UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar))); 2507 if (!newSegmentBuffer) 2508 goto exit; 2509 memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar)); 2510 if (!textSegments) 2511 textSegments = adoptPtr(new Vector<TextSegment>); 2512 textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size())); 2513 textBuffer.clear(); 2514 } 2515 textBuffer.append(it.characters(), it.length()); 2516 bufferLength += it.length(); 2517 } 2518 2519 if (!bufferLength) 2520 return 0; 2521 2522 // Since we know the size now, we can make a single buffer out of the pieces with one big alloc 2523 result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar))); 2524 if (!result) 2525 goto exit; 2526 2527 { 2528 UChar* resultPos = result; 2529 if (textSegments) { 2530 unsigned size = textSegments->size(); 2531 for (unsigned i = 0; i < size; ++i) { 2532 const TextSegment& segment = textSegments->at(i); 2533 memcpy(resultPos, segment.first, segment.second * sizeof(UChar)); 2534 resultPos += segment.second; 2535 } 2536 } 2537 memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar)); 2538 } 2539 2540 exit: 2541 if (textSegments) { 2542 unsigned size = textSegments->size(); 2543 for (unsigned i = 0; i < size; ++i) 2544 free(textSegments->at(i).first); 2545 } 2546 2547 if (isDisplayString && r->ownerDocument()) 2548 r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength); 2549 2550 return result; 2551 } 2552 2553 String plainText(const Range* r, TextIteratorBehavior defaultBehavior) 2554 { 2555 unsigned length; 2556 UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false, defaultBehavior); 2557 if (!buf) 2558 return ""; 2559 String result(buf, length); 2560 free(buf); 2561 return result; 2562 } 2563 2564 static inline bool isAllCollapsibleWhitespace(const String& string) 2565 { 2566 const UChar* characters = string.characters(); 2567 unsigned length = string.length(); 2568 for (unsigned i = 0; i < length; ++i) { 2569 if (!isCollapsibleWhitespace(characters[i])) 2570 return false; 2571 } 2572 return true; 2573 } 2574 2575 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward) 2576 { 2577 ExceptionCode ec = 0; 2578 RefPtr<Range> result = range->cloneRange(ec); 2579 ASSERT(!ec); 2580 result->collapse(!forward, ec); 2581 ASSERT(!ec); 2582 return result.release(); 2583 } 2584 2585 static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) 2586 { 2587 matchStart = 0; 2588 size_t matchLength = 0; 2589 2590 SearchBuffer buffer(target, options); 2591 2592 if (buffer.needsMoreContext()) { 2593 RefPtr<Range> startRange = it.range(); 2594 RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange(); 2595 ExceptionCode ec = 0; 2596 beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), ec); 2597 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2598 buffer.prependContext(backwardsIterator.characters(), backwardsIterator.length()); 2599 if (!buffer.needsMoreContext()) 2600 break; 2601 } 2602 } 2603 2604 while (!it.atEnd()) { 2605 it.advance(buffer.append(it.characters(), it.length())); 2606 tryAgain: 2607 size_t matchStartOffset; 2608 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2609 // Note that we found a match, and where we found it. 2610 size_t lastCharacterInBufferOffset = it.characterOffset(); 2611 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2612 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2613 matchLength = newMatchLength; 2614 // If searching forward, stop on the first match. 2615 // If searching backward, don't stop, so we end up with the last match. 2616 if (!(options & Backwards)) 2617 break; 2618 goto tryAgain; 2619 } 2620 if (it.atBreak() && !buffer.atBreak()) { 2621 buffer.reachedBreak(); 2622 goto tryAgain; 2623 } 2624 } 2625 2626 return matchLength; 2627 } 2628 2629 PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) 2630 { 2631 // First, find the text. 2632 size_t matchStart; 2633 size_t matchLength; 2634 { 2635 CharacterIterator findIterator(range, TextIteratorEntersTextControls); 2636 matchLength = findPlainText(findIterator, target, options, matchStart); 2637 if (!matchLength) 2638 return collapsedToBoundary(range, !(options & Backwards)); 2639 } 2640 2641 // Then, find the document position of the start and the end of the text. 2642 CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls); 2643 return characterSubrange(computeRangeIterator, matchStart, matchLength); 2644 } 2645 2646 } 2647