1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * (C) 1999 Antti Koivisto (koivisto (at) kde.org) 4 * (C) 2001 Dirk Mueller ( mueller (at) kde.org ) 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 6 * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #include "config.h" 26 #include "StringImpl.h" 27 28 #include "AtomicString.h" 29 #include "CString.h" 30 #include "CharacterNames.h" 31 #include "FloatConversion.h" 32 #include "StringBuffer.h" 33 #include "StringHash.h" 34 #include "TextBreakIterator.h" 35 #include "TextEncoding.h" 36 #include "ThreadGlobalData.h" 37 #include <runtime/UString.h> 38 #include <wtf/dtoa.h> 39 #include <wtf/Assertions.h> 40 #include <wtf/Threading.h> 41 #include <wtf/unicode/Unicode.h> 42 43 using namespace WTF; 44 using namespace Unicode; 45 46 namespace WebCore { 47 48 static const unsigned minLengthToShare = 20; 49 50 static inline UChar* newUCharVector(unsigned n) 51 { 52 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * n)); 53 } 54 55 static inline void deleteUCharVector(const UChar* p) 56 { 57 fastFree(const_cast<UChar*>(p)); 58 } 59 60 // Some of the factory methods create buffers using fastMalloc. 61 // We must ensure that all allocations of StringImpl are allocated using 62 // fastMalloc so that we don't have mis-matched frees. We accomplish 63 // this by overriding the new and delete operators. 64 void* StringImpl::operator new(size_t size, void* address) 65 { 66 if (address) 67 return address; // Allocating using an internal buffer 68 return fastMalloc(size); 69 } 70 71 void* StringImpl::operator new(size_t size) 72 { 73 return fastMalloc(size); 74 } 75 76 void StringImpl::operator delete(void* address) 77 { 78 fastFree(address); 79 } 80 81 // This constructor is used only to create the empty string. 82 StringImpl::StringImpl() 83 : m_data(0) 84 , m_length(0) 85 , m_hash(0) 86 { 87 // Ensure that the hash is computed so that AtomicStringHash can call existingHash() 88 // with impunity. The empty string is special because it is never entered into 89 // AtomicString's HashKey, but still needs to compare correctly. 90 hash(); 91 } 92 93 inline StringImpl::StringImpl(const UChar* characters, unsigned length) 94 : m_data(characters) 95 , m_length(length) 96 , m_hash(0) 97 { 98 ASSERT(characters); 99 ASSERT(length); 100 ASSERT(!bufferIsInternal()); 101 } 102 103 inline StringImpl::StringImpl(unsigned length) 104 : m_data(reinterpret_cast<const UChar*>(this + 1)) 105 , m_length(length) 106 , m_hash(0) 107 { 108 ASSERT(length); 109 ASSERT(bufferIsInternal()); 110 } 111 112 StringImpl::~StringImpl() 113 { 114 if (inTable()) 115 AtomicString::remove(this); 116 if (!bufferIsInternal()) { 117 SharedUChar* sharedBuffer = m_sharedBufferAndFlags.get(); 118 if (sharedBuffer) 119 sharedBuffer->deref(); 120 else 121 deleteUCharVector(m_data); 122 } 123 } 124 125 StringImpl* StringImpl::empty() 126 { 127 return threadGlobalData().emptyString(); 128 } 129 130 bool StringImpl::containsOnlyWhitespace() 131 { 132 // FIXME: The definition of whitespace here includes a number of characters 133 // that are not whitespace from the point of view of RenderText; I wonder if 134 // that's a problem in practice. 135 for (unsigned i = 0; i < m_length; i++) 136 if (!isASCIISpace(m_data[i])) 137 return false; 138 return true; 139 } 140 141 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) 142 { 143 if (start >= m_length) 144 return empty(); 145 unsigned maxLength = m_length - start; 146 if (length >= maxLength) { 147 if (!start) 148 return this; 149 length = maxLength; 150 } 151 return create(m_data + start, length); 152 } 153 154 UChar32 StringImpl::characterStartingAt(unsigned i) 155 { 156 if (U16_IS_SINGLE(m_data[i])) 157 return m_data[i]; 158 if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) 159 return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); 160 return 0; 161 } 162 163 PassRefPtr<StringImpl> StringImpl::lower() 164 { 165 // Note: This is a hot function in the Dromaeo benchmark, specifically the 166 // no-op code path up through the first 'return' statement. 167 168 // First scan the string for uppercase and non-ASCII characters: 169 UChar ored = 0; 170 bool noUpper = true; 171 const UChar *end = m_data + m_length; 172 for (const UChar* chp = m_data; chp != end; chp++) { 173 if (UNLIKELY(isASCIIUpper(*chp))) 174 noUpper = false; 175 ored |= *chp; 176 } 177 178 // Nothing to do if the string is all ASCII with no uppercase. 179 if (noUpper && !(ored & ~0x7F)) 180 return this; 181 182 int32_t length = m_length; 183 UChar* data; 184 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 185 186 if (!(ored & ~0x7F)) { 187 // Do a faster loop for the case where all the characters are ASCII. 188 for (int i = 0; i < length; i++) { 189 UChar c = m_data[i]; 190 data[i] = toASCIILower(c); 191 } 192 return newImpl; 193 } 194 195 // Do a slower implementation for cases that include non-ASCII characters. 196 bool error; 197 int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error); 198 if (!error && realLength == length) 199 return newImpl; 200 newImpl = createUninitialized(realLength, data); 201 Unicode::toLower(data, realLength, m_data, m_length, &error); 202 if (error) 203 return this; 204 return newImpl; 205 } 206 207 PassRefPtr<StringImpl> StringImpl::upper() 208 { 209 // This function could be optimized for no-op cases the way lower() is, 210 // but in empirical testing, few actual calls to upper() are no-ops, so 211 // it wouldn't be worth the extra time for pre-scanning. 212 UChar* data; 213 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 214 int32_t length = m_length; 215 216 // Do a faster loop for the case where all the characters are ASCII. 217 UChar ored = 0; 218 for (int i = 0; i < length; i++) { 219 UChar c = m_data[i]; 220 ored |= c; 221 data[i] = toASCIIUpper(c); 222 } 223 if (!(ored & ~0x7F)) 224 return newImpl; 225 226 // Do a slower implementation for cases that include non-ASCII characters. 227 bool error; 228 int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error); 229 if (!error && realLength == length) 230 return newImpl; 231 newImpl = createUninitialized(realLength, data); 232 Unicode::toUpper(data, realLength, m_data, m_length, &error); 233 if (error) 234 return this; 235 return newImpl; 236 } 237 238 PassRefPtr<StringImpl> StringImpl::secure(UChar aChar) 239 { 240 UChar* data; 241 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 242 int32_t length = m_length; 243 for (int i = 0; i < length; ++i) 244 data[i] = aChar; 245 return newImpl; 246 } 247 248 PassRefPtr<StringImpl> StringImpl::foldCase() 249 { 250 UChar* data; 251 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 252 int32_t length = m_length; 253 254 // Do a faster loop for the case where all the characters are ASCII. 255 UChar ored = 0; 256 for (int i = 0; i < length; i++) { 257 UChar c = m_data[i]; 258 ored |= c; 259 data[i] = toASCIILower(c); 260 } 261 if (!(ored & ~0x7F)) 262 return newImpl; 263 264 // Do a slower implementation for cases that include non-ASCII characters. 265 bool error; 266 int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error); 267 if (!error && realLength == length) 268 return newImpl; 269 newImpl = createUninitialized(realLength, data); 270 Unicode::foldCase(data, realLength, m_data, m_length, &error); 271 if (error) 272 return this; 273 return newImpl; 274 } 275 276 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() 277 { 278 if (!m_length) 279 return empty(); 280 281 unsigned start = 0; 282 unsigned end = m_length - 1; 283 284 // skip white space from start 285 while (start <= end && isSpaceOrNewline(m_data[start])) 286 start++; 287 288 // only white space 289 if (start > end) 290 return empty(); 291 292 // skip white space from end 293 while (end && isSpaceOrNewline(m_data[end])) 294 end--; 295 296 if (!start && end == m_length - 1) 297 return this; 298 return create(m_data + start, end + 1 - start); 299 } 300 301 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) 302 { 303 const UChar* from = m_data; 304 const UChar* fromend = from + m_length; 305 306 // Assume the common case will not remove any characters 307 while (from != fromend && !findMatch(*from)) 308 from++; 309 if (from == fromend) 310 return this; 311 312 StringBuffer data(m_length); 313 UChar* to = data.characters(); 314 unsigned outc = from - m_data; 315 316 if (outc) 317 memcpy(to, m_data, outc * sizeof(UChar)); 318 319 while (true) { 320 while (from != fromend && findMatch(*from)) 321 from++; 322 while (from != fromend && !findMatch(*from)) 323 to[outc++] = *from++; 324 if (from == fromend) 325 break; 326 } 327 328 data.shrink(outc); 329 330 return adopt(data); 331 } 332 333 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() 334 { 335 StringBuffer data(m_length); 336 337 const UChar* from = m_data; 338 const UChar* fromend = from + m_length; 339 int outc = 0; 340 bool changedToSpace = false; 341 342 UChar* to = data.characters(); 343 344 while (true) { 345 while (from != fromend && isSpaceOrNewline(*from)) { 346 if (*from != ' ') 347 changedToSpace = true; 348 from++; 349 } 350 while (from != fromend && !isSpaceOrNewline(*from)) 351 to[outc++] = *from++; 352 if (from != fromend) 353 to[outc++] = ' '; 354 else 355 break; 356 } 357 358 if (outc > 0 && to[outc - 1] == ' ') 359 outc--; 360 361 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) 362 return this; 363 364 data.shrink(outc); 365 366 return adopt(data); 367 } 368 369 PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous) 370 { 371 StringBuffer stringWithPrevious(m_length + 1); 372 stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous; 373 for (unsigned i = 1; i < m_length + 1; i++) { 374 // Replace   with a real space since ICU no longer treats   as a word separator. 375 if (m_data[i - 1] == noBreakSpace) 376 stringWithPrevious[i] = ' '; 377 else 378 stringWithPrevious[i] = m_data[i - 1]; 379 } 380 381 TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1); 382 if (!boundary) 383 return this; 384 385 StringBuffer data(m_length); 386 387 int32_t endOfWord; 388 int32_t startOfWord = textBreakFirst(boundary); 389 for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) { 390 if (startOfWord != 0) // Ignore first char of previous string 391 data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]); 392 for (int i = startOfWord + 1; i < endOfWord; i++) 393 data[i - 1] = m_data[i - 1]; 394 } 395 396 return adopt(data); 397 } 398 399 int StringImpl::toIntStrict(bool* ok, int base) 400 { 401 return charactersToIntStrict(m_data, m_length, ok, base); 402 } 403 404 unsigned StringImpl::toUIntStrict(bool* ok, int base) 405 { 406 return charactersToUIntStrict(m_data, m_length, ok, base); 407 } 408 409 int64_t StringImpl::toInt64Strict(bool* ok, int base) 410 { 411 return charactersToInt64Strict(m_data, m_length, ok, base); 412 } 413 414 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) 415 { 416 return charactersToUInt64Strict(m_data, m_length, ok, base); 417 } 418 419 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) 420 { 421 return charactersToIntPtrStrict(m_data, m_length, ok, base); 422 } 423 424 int StringImpl::toInt(bool* ok) 425 { 426 return charactersToInt(m_data, m_length, ok); 427 } 428 429 unsigned StringImpl::toUInt(bool* ok) 430 { 431 return charactersToUInt(m_data, m_length, ok); 432 } 433 434 int64_t StringImpl::toInt64(bool* ok) 435 { 436 return charactersToInt64(m_data, m_length, ok); 437 } 438 439 uint64_t StringImpl::toUInt64(bool* ok) 440 { 441 return charactersToUInt64(m_data, m_length, ok); 442 } 443 444 intptr_t StringImpl::toIntPtr(bool* ok) 445 { 446 return charactersToIntPtr(m_data, m_length, ok); 447 } 448 449 double StringImpl::toDouble(bool* ok) 450 { 451 return charactersToDouble(m_data, m_length, ok); 452 } 453 454 float StringImpl::toFloat(bool* ok) 455 { 456 return charactersToFloat(m_data, m_length, ok); 457 } 458 459 static bool equal(const UChar* a, const char* b, int length) 460 { 461 ASSERT(length >= 0); 462 while (length--) { 463 unsigned char bc = *b++; 464 if (*a++ != bc) 465 return false; 466 } 467 return true; 468 } 469 470 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) 471 { 472 while (length--) { 473 unsigned char bc = *b++; 474 if (foldCase(*a++) != foldCase(bc)) 475 return false; 476 } 477 return true; 478 } 479 480 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) 481 { 482 ASSERT(length >= 0); 483 return umemcasecmp(a, b, length) == 0; 484 } 485 486 int StringImpl::find(const char* chs, int index, bool caseSensitive) 487 { 488 if (!chs || index < 0) 489 return -1; 490 491 int chsLength = strlen(chs); 492 int n = m_length - index; 493 if (n < 0) 494 return -1; 495 n -= chsLength - 1; 496 if (n <= 0) 497 return -1; 498 499 const char* chsPlusOne = chs + 1; 500 int chsLengthMinusOne = chsLength - 1; 501 502 const UChar* ptr = m_data + index - 1; 503 if (caseSensitive) { 504 UChar c = *chs; 505 do { 506 if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) 507 return m_length - chsLength - n + 1; 508 } while (--n); 509 } else { 510 UChar lc = Unicode::foldCase(*chs); 511 do { 512 if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) 513 return m_length - chsLength - n + 1; 514 } while (--n); 515 } 516 517 return -1; 518 } 519 520 int StringImpl::find(UChar c, int start) 521 { 522 return WebCore::find(m_data, m_length, c, start); 523 } 524 525 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start) 526 { 527 return WebCore::find(m_data, m_length, matchFunction, start); 528 } 529 530 int StringImpl::find(StringImpl* str, int index, bool caseSensitive) 531 { 532 /* 533 We use a simple trick for efficiency's sake. Instead of 534 comparing strings, we compare the sum of str with that of 535 a part of this string. Only if that matches, we call memcmp 536 or ucstrnicmp. 537 */ 538 ASSERT(str); 539 if (index < 0) 540 index += m_length; 541 int lstr = str->m_length; 542 int lthis = m_length - index; 543 if ((unsigned)lthis > m_length) 544 return -1; 545 int delta = lthis - lstr; 546 if (delta < 0) 547 return -1; 548 549 const UChar* uthis = m_data + index; 550 const UChar* ustr = str->m_data; 551 unsigned hthis = 0; 552 unsigned hstr = 0; 553 if (caseSensitive) { 554 for (int i = 0; i < lstr; i++) { 555 hthis += uthis[i]; 556 hstr += ustr[i]; 557 } 558 int i = 0; 559 while (1) { 560 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) 561 return index + i; 562 if (i == delta) 563 return -1; 564 hthis += uthis[i + lstr]; 565 hthis -= uthis[i]; 566 i++; 567 } 568 } else { 569 for (int i = 0; i < lstr; i++ ) { 570 hthis += toASCIILower(uthis[i]); 571 hstr += toASCIILower(ustr[i]); 572 } 573 int i = 0; 574 while (1) { 575 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) 576 return index + i; 577 if (i == delta) 578 return -1; 579 hthis += toASCIILower(uthis[i + lstr]); 580 hthis -= toASCIILower(uthis[i]); 581 i++; 582 } 583 } 584 } 585 586 int StringImpl::reverseFind(UChar c, int index) 587 { 588 return WebCore::reverseFind(m_data, m_length, c, index); 589 } 590 591 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) 592 { 593 /* 594 See StringImpl::find() for explanations. 595 */ 596 ASSERT(str); 597 int lthis = m_length; 598 if (index < 0) 599 index += lthis; 600 601 int lstr = str->m_length; 602 int delta = lthis - lstr; 603 if ( index < 0 || index > lthis || delta < 0 ) 604 return -1; 605 if ( index > delta ) 606 index = delta; 607 608 const UChar *uthis = m_data; 609 const UChar *ustr = str->m_data; 610 unsigned hthis = 0; 611 unsigned hstr = 0; 612 int i; 613 if (caseSensitive) { 614 for ( i = 0; i < lstr; i++ ) { 615 hthis += uthis[index + i]; 616 hstr += ustr[i]; 617 } 618 i = index; 619 while (1) { 620 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) 621 return i; 622 if (i == 0) 623 return -1; 624 i--; 625 hthis -= uthis[i + lstr]; 626 hthis += uthis[i]; 627 } 628 } else { 629 for (i = 0; i < lstr; i++) { 630 hthis += toASCIILower(uthis[index + i]); 631 hstr += toASCIILower(ustr[i]); 632 } 633 i = index; 634 while (1) { 635 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) 636 return i; 637 if (i == 0) 638 return -1; 639 i--; 640 hthis -= toASCIILower(uthis[i + lstr]); 641 hthis += toASCIILower(uthis[i]); 642 } 643 } 644 645 // Should never get here. 646 return -1; 647 } 648 649 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) 650 { 651 ASSERT(m_data); 652 int start = m_length - m_data->m_length; 653 if (start >= 0) 654 return (find(m_data, start, caseSensitive) == start); 655 return false; 656 } 657 658 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) 659 { 660 if (oldC == newC) 661 return this; 662 unsigned i; 663 for (i = 0; i != m_length; ++i) 664 if (m_data[i] == oldC) 665 break; 666 if (i == m_length) 667 return this; 668 669 UChar* data; 670 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 671 672 for (i = 0; i != m_length; ++i) { 673 UChar ch = m_data[i]; 674 if (ch == oldC) 675 ch = newC; 676 data[i] = ch; 677 } 678 return newImpl; 679 } 680 681 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) 682 { 683 position = min(position, length()); 684 lengthToReplace = min(lengthToReplace, length() - position); 685 unsigned lengthToInsert = str ? str->length() : 0; 686 if (!lengthToReplace && !lengthToInsert) 687 return this; 688 UChar* data; 689 PassRefPtr<StringImpl> newImpl = 690 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 691 memcpy(data, characters(), position * sizeof(UChar)); 692 if (str) 693 memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar)); 694 memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace, 695 (length() - position - lengthToReplace) * sizeof(UChar)); 696 return newImpl; 697 } 698 699 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) 700 { 701 if (!replacement) 702 return this; 703 704 int repStrLength = replacement->length(); 705 int srcSegmentStart = 0; 706 int matchCount = 0; 707 708 // Count the matches 709 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { 710 ++matchCount; 711 ++srcSegmentStart; 712 } 713 714 // If we have 0 matches, we don't have to do any more work 715 if (!matchCount) 716 return this; 717 718 UChar* data; 719 PassRefPtr<StringImpl> newImpl = 720 createUninitialized(m_length - matchCount + (matchCount * repStrLength), data); 721 722 // Construct the new data 723 int srcSegmentEnd; 724 int srcSegmentLength; 725 srcSegmentStart = 0; 726 int dstOffset = 0; 727 728 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { 729 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 730 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 731 dstOffset += srcSegmentLength; 732 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); 733 dstOffset += repStrLength; 734 srcSegmentStart = srcSegmentEnd + 1; 735 } 736 737 srcSegmentLength = m_length - srcSegmentStart; 738 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 739 740 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); 741 742 return newImpl; 743 } 744 745 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) 746 { 747 if (!pattern || !replacement) 748 return this; 749 750 int patternLength = pattern->length(); 751 if (!patternLength) 752 return this; 753 754 int repStrLength = replacement->length(); 755 int srcSegmentStart = 0; 756 int matchCount = 0; 757 758 // Count the matches 759 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { 760 ++matchCount; 761 srcSegmentStart += patternLength; 762 } 763 764 // If we have 0 matches, we don't have to do any more work 765 if (!matchCount) 766 return this; 767 768 UChar* data; 769 PassRefPtr<StringImpl> newImpl = 770 createUninitialized(m_length + matchCount * (repStrLength - patternLength), data); 771 772 // Construct the new data 773 int srcSegmentEnd; 774 int srcSegmentLength; 775 srcSegmentStart = 0; 776 int dstOffset = 0; 777 778 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { 779 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 780 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 781 dstOffset += srcSegmentLength; 782 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); 783 dstOffset += repStrLength; 784 srcSegmentStart = srcSegmentEnd + patternLength; 785 } 786 787 srcSegmentLength = m_length - srcSegmentStart; 788 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 789 790 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); 791 792 return newImpl; 793 } 794 795 bool equal(StringImpl* a, StringImpl* b) 796 { 797 return StringHash::equal(a, b); 798 } 799 800 bool equal(StringImpl* a, const char* b) 801 { 802 if (!a) 803 return !b; 804 if (!b) 805 return !a; 806 807 unsigned length = a->length(); 808 const UChar* as = a->characters(); 809 for (unsigned i = 0; i != length; ++i) { 810 unsigned char bc = b[i]; 811 if (!bc) 812 return false; 813 if (as[i] != bc) 814 return false; 815 } 816 817 return !b[length]; 818 } 819 820 bool equalIgnoringCase(StringImpl* a, StringImpl* b) 821 { 822 return CaseFoldingHash::equal(a, b); 823 } 824 825 bool equalIgnoringCase(StringImpl* a, const char* b) 826 { 827 if (!a) 828 return !b; 829 if (!b) 830 return !a; 831 832 unsigned length = a->length(); 833 const UChar* as = a->characters(); 834 835 // Do a faster loop for the case where all the characters are ASCII. 836 UChar ored = 0; 837 bool equal = true; 838 for (unsigned i = 0; i != length; ++i) { 839 char bc = b[i]; 840 if (!bc) 841 return false; 842 UChar ac = as[i]; 843 ored |= ac; 844 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); 845 } 846 847 // Do a slower implementation for cases that include non-ASCII characters. 848 if (ored & ~0x7F) { 849 equal = true; 850 for (unsigned i = 0; i != length; ++i) { 851 unsigned char bc = b[i]; 852 equal = equal && (foldCase(as[i]) == foldCase(bc)); 853 } 854 } 855 856 return equal && !b[length]; 857 } 858 859 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) 860 { 861 if (StringHash::equal(a, b)) 862 return true; 863 if (!a && b && !b->length()) 864 return true; 865 if (!b && a && !a->length()) 866 return true; 867 868 return false; 869 } 870 871 Vector<char> StringImpl::ascii() 872 { 873 Vector<char> buffer(m_length + 1); 874 for (unsigned i = 0; i != m_length; ++i) { 875 UChar c = m_data[i]; 876 if ((c >= 0x20 && c < 0x7F) || c == 0x00) 877 buffer[i] = c; 878 else 879 buffer[i] = '?'; 880 } 881 buffer[m_length] = '\0'; 882 return buffer; 883 } 884 885 WTF::Unicode::Direction StringImpl::defaultWritingDirection() 886 { 887 for (unsigned i = 0; i < m_length; ++i) { 888 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]); 889 if (charDirection == WTF::Unicode::LeftToRight) 890 return WTF::Unicode::LeftToRight; 891 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) 892 return WTF::Unicode::RightToLeft; 893 } 894 return WTF::Unicode::LeftToRight; 895 } 896 897 // This is a hot function because it's used when parsing HTML. 898 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length) 899 { 900 StringBuffer strippedCopy(length); 901 unsigned strippedLength = 0; 902 for (unsigned i = 0; i < length; i++) { 903 if (int c = characters[i]) 904 strippedCopy[strippedLength++] = c; 905 } 906 ASSERT(strippedLength < length); // Only take the slow case when stripping. 907 strippedCopy.shrink(strippedLength); 908 return adopt(strippedCopy); 909 } 910 911 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer) 912 { 913 unsigned length = buffer.length(); 914 if (length == 0) 915 return empty(); 916 return adoptRef(new StringImpl(buffer.release(), length)); 917 } 918 919 PassRefPtr<StringImpl> StringImpl::adopt(Vector<UChar>& vector) 920 { 921 size_t size = vector.size(); 922 if (size == 0) 923 return empty(); 924 return adoptRef(new StringImpl(vector.releaseBuffer(), size)); 925 } 926 927 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) 928 { 929 if (!length) { 930 data = 0; 931 return empty(); 932 } 933 934 // Allocate a single buffer large enough to contain the StringImpl 935 // struct as well as the data which it contains. This removes one 936 // heap allocation from this call. 937 size_t size = sizeof(StringImpl) + length * sizeof(UChar); 938 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); 939 data = reinterpret_cast<UChar*>(string + 1); 940 string = new (string) StringImpl(length); 941 return adoptRef(string); 942 } 943 944 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) 945 { 946 if (!characters || !length) 947 return empty(); 948 949 UChar* data; 950 PassRefPtr<StringImpl> string = createUninitialized(length, data); 951 memcpy(data, characters, length * sizeof(UChar)); 952 return string; 953 } 954 955 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length) 956 { 957 if (!characters || !length) 958 return empty(); 959 960 UChar* data; 961 PassRefPtr<StringImpl> string = createUninitialized(length, data); 962 for (unsigned i = 0; i != length; ++i) { 963 unsigned char c = characters[i]; 964 data[i] = c; 965 } 966 return string; 967 } 968 969 PassRefPtr<StringImpl> StringImpl::create(const char* string) 970 { 971 if (!string) 972 return empty(); 973 return create(string, strlen(string)); 974 } 975 976 #if USE(JSC) 977 PassRefPtr<StringImpl> StringImpl::create(const JSC::UString& str) 978 { 979 SharedUChar* sharedBuffer = const_cast<JSC::UString*>(&str)->rep()->sharedBuffer(); 980 if (sharedBuffer) { 981 PassRefPtr<StringImpl> impl = adoptRef(new StringImpl(str.data(), str.size())); 982 sharedBuffer->ref(); 983 impl->m_sharedBufferAndFlags.set(sharedBuffer); 984 return impl; 985 } 986 return StringImpl::create(str.data(), str.size()); 987 } 988 989 JSC::UString StringImpl::ustring() 990 { 991 SharedUChar* sharedBuffer = this->sharedBuffer(); 992 if (sharedBuffer) 993 return JSC::UString::Rep::create(sharedBuffer, const_cast<UChar*>(m_data), m_length); 994 995 return JSC::UString(m_data, m_length); 996 } 997 #endif 998 999 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) 1000 { 1001 // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer 1002 // get allocated in a single malloc block. 1003 UChar* data; 1004 int length = string.m_length; 1005 RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data); 1006 memcpy(data, string.m_data, length * sizeof(UChar)); 1007 data[length] = 0; 1008 terminatedString->m_length--; 1009 terminatedString->m_hash = string.m_hash; 1010 terminatedString->m_sharedBufferAndFlags.setFlag(HasTerminatingNullCharacter); 1011 return terminatedString.release(); 1012 } 1013 1014 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const 1015 { 1016 // Special-case empty strings to make sure that per-thread empty string instance isn't returned. 1017 if (m_length == 0) 1018 return adoptRef(new StringImpl); 1019 return create(m_data, m_length); 1020 } 1021 1022 PassRefPtr<StringImpl> StringImpl::crossThreadString() 1023 { 1024 SharedUChar* shared = sharedBuffer(); 1025 if (shared) { 1026 RefPtr<StringImpl> impl = adoptRef(new StringImpl(m_data, m_length)); 1027 impl->m_sharedBufferAndFlags.set(shared->crossThreadCopy().releaseRef()); 1028 return impl.release(); 1029 } 1030 1031 // If no shared buffer is available, create a copy. 1032 return threadsafeCopy(); 1033 } 1034 1035 StringImpl::SharedUChar* StringImpl::sharedBuffer() 1036 { 1037 if (m_length < minLengthToShare || bufferIsInternal()) 1038 return 0; 1039 1040 if (!m_sharedBufferAndFlags.get()) 1041 m_sharedBufferAndFlags.set(SharedUChar::create(new OwnFastMallocPtr<UChar>(const_cast<UChar*>(m_data))).releaseRef()); 1042 return m_sharedBufferAndFlags.get(); 1043 } 1044 1045 1046 } // namespace WebCore 1047