1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * (C) 1999 Antti Koivisto (koivisto (at) kde.org) 4 * (C) 2001 Dirk Mueller ( mueller (at) kde.org ) 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 6 * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #include "config.h" 26 #include "StringImpl.h" 27 28 #include "AtomicString.h" 29 #include "StringBuffer.h" 30 #include "StringHash.h" 31 #include <wtf/StdLibExtras.h> 32 #include <wtf/WTFThreadData.h> 33 34 using namespace std; 35 36 namespace WTF { 37 38 using namespace Unicode; 39 40 static const unsigned minLengthToShare = 20; 41 42 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small); 43 44 StringImpl::~StringImpl() 45 { 46 ASSERT(!isStatic()); 47 48 if (isAtomic()) 49 AtomicString::remove(this); 50 #if USE(JSC) 51 if (isIdentifier()) { 52 if (!wtfThreadData().currentIdentifierTable()->remove(this)) 53 CRASH(); 54 } 55 #endif 56 57 BufferOwnership ownership = bufferOwnership(); 58 if (ownership != BufferInternal) { 59 if (ownership == BufferOwned) { 60 ASSERT(!m_sharedBuffer); 61 ASSERT(m_data); 62 fastFree(const_cast<UChar*>(m_data)); 63 } else if (ownership == BufferSubstring) { 64 ASSERT(m_substringBuffer); 65 m_substringBuffer->deref(); 66 } else { 67 ASSERT(ownership == BufferShared); 68 ASSERT(m_sharedBuffer); 69 m_sharedBuffer->deref(); 70 } 71 } 72 } 73 74 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) 75 { 76 if (!length) { 77 data = 0; 78 return empty(); 79 } 80 81 // Allocate a single buffer large enough to contain the StringImpl 82 // struct as well as the data which it contains. This removes one 83 // heap allocation from this call. 84 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) 85 CRASH(); 86 size_t size = sizeof(StringImpl) + length * sizeof(UChar); 87 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); 88 89 data = reinterpret_cast<UChar*>(string + 1); 90 return adoptRef(new (string) StringImpl(length)); 91 } 92 93 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) 94 { 95 if (!characters || !length) 96 return empty(); 97 98 UChar* data; 99 RefPtr<StringImpl> string = createUninitialized(length, data); 100 memcpy(data, characters, length * sizeof(UChar)); 101 return string.release(); 102 } 103 104 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length) 105 { 106 if (!characters || !length) 107 return empty(); 108 109 UChar* data; 110 RefPtr<StringImpl> string = createUninitialized(length, data); 111 for (unsigned i = 0; i != length; ++i) { 112 unsigned char c = characters[i]; 113 data[i] = c; 114 } 115 return string.release(); 116 } 117 118 PassRefPtr<StringImpl> StringImpl::create(const char* string) 119 { 120 if (!string) 121 return empty(); 122 size_t length = strlen(string); 123 if (length > numeric_limits<unsigned>::max()) 124 CRASH(); 125 return create(string, length); 126 } 127 128 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) 129 { 130 ASSERT(characters); 131 ASSERT(minLengthToShare && length >= minLengthToShare); 132 return adoptRef(new StringImpl(characters, length, sharedBuffer)); 133 } 134 135 SharedUChar* StringImpl::sharedBuffer() 136 { 137 if (m_length < minLengthToShare) 138 return 0; 139 // All static strings are smaller that the minimim length to share. 140 ASSERT(!isStatic()); 141 142 BufferOwnership ownership = bufferOwnership(); 143 144 if (ownership == BufferInternal) 145 return 0; 146 if (ownership == BufferSubstring) 147 return m_substringBuffer->sharedBuffer(); 148 if (ownership == BufferOwned) { 149 ASSERT(!m_sharedBuffer); 150 m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).leakRef(); 151 m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared; 152 } 153 154 ASSERT(bufferOwnership() == BufferShared); 155 ASSERT(m_sharedBuffer); 156 return m_sharedBuffer; 157 } 158 159 bool StringImpl::containsOnlyWhitespace() 160 { 161 // FIXME: The definition of whitespace here includes a number of characters 162 // that are not whitespace from the point of view of RenderText; I wonder if 163 // that's a problem in practice. 164 for (unsigned i = 0; i < m_length; i++) 165 if (!isASCIISpace(m_data[i])) 166 return false; 167 return true; 168 } 169 170 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) 171 { 172 if (start >= m_length) 173 return empty(); 174 unsigned maxLength = m_length - start; 175 if (length >= maxLength) { 176 if (!start) 177 return this; 178 length = maxLength; 179 } 180 return create(m_data + start, length); 181 } 182 183 UChar32 StringImpl::characterStartingAt(unsigned i) 184 { 185 if (U16_IS_SINGLE(m_data[i])) 186 return m_data[i]; 187 if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) 188 return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); 189 return 0; 190 } 191 192 PassRefPtr<StringImpl> StringImpl::lower() 193 { 194 // Note: This is a hot function in the Dromaeo benchmark, specifically the 195 // no-op code path up through the first 'return' statement. 196 197 // First scan the string for uppercase and non-ASCII characters: 198 UChar ored = 0; 199 bool noUpper = true; 200 const UChar *end = m_data + m_length; 201 for (const UChar* chp = m_data; chp != end; chp++) { 202 if (UNLIKELY(isASCIIUpper(*chp))) 203 noUpper = false; 204 ored |= *chp; 205 } 206 207 // Nothing to do if the string is all ASCII with no uppercase. 208 if (noUpper && !(ored & ~0x7F)) 209 return this; 210 211 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 212 CRASH(); 213 int32_t length = m_length; 214 215 UChar* data; 216 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 217 218 if (!(ored & ~0x7F)) { 219 // Do a faster loop for the case where all the characters are ASCII. 220 for (int i = 0; i < length; i++) { 221 UChar c = m_data[i]; 222 data[i] = toASCIILower(c); 223 } 224 return newImpl; 225 } 226 227 // Do a slower implementation for cases that include non-ASCII characters. 228 bool error; 229 int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error); 230 if (!error && realLength == length) 231 return newImpl; 232 newImpl = createUninitialized(realLength, data); 233 Unicode::toLower(data, realLength, m_data, m_length, &error); 234 if (error) 235 return this; 236 return newImpl; 237 } 238 239 PassRefPtr<StringImpl> StringImpl::upper() 240 { 241 // This function could be optimized for no-op cases the way lower() is, 242 // but in empirical testing, few actual calls to upper() are no-ops, so 243 // it wouldn't be worth the extra time for pre-scanning. 244 UChar* data; 245 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 246 247 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 248 CRASH(); 249 int32_t length = m_length; 250 251 // Do a faster loop for the case where all the characters are ASCII. 252 UChar ored = 0; 253 for (int i = 0; i < length; i++) { 254 UChar c = m_data[i]; 255 ored |= c; 256 data[i] = toASCIIUpper(c); 257 } 258 if (!(ored & ~0x7F)) 259 return newImpl.release(); 260 261 // Do a slower implementation for cases that include non-ASCII characters. 262 bool error; 263 int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error); 264 if (!error && realLength == length) 265 return newImpl; 266 newImpl = createUninitialized(realLength, data); 267 Unicode::toUpper(data, realLength, m_data, m_length, &error); 268 if (error) 269 return this; 270 return newImpl.release(); 271 } 272 273 PassRefPtr<StringImpl> StringImpl::secure(UChar character, LastCharacterBehavior behavior) 274 { 275 if (!m_length) 276 return this; 277 278 UChar* data; 279 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 280 unsigned lastCharacterIndex = m_length - 1; 281 for (unsigned i = 0; i < lastCharacterIndex; ++i) 282 data[i] = character; 283 data[lastCharacterIndex] = (behavior == ObscureLastCharacter) ? character : m_data[lastCharacterIndex]; 284 return newImpl.release(); 285 } 286 287 PassRefPtr<StringImpl> StringImpl::foldCase() 288 { 289 UChar* data; 290 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 291 292 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 293 CRASH(); 294 int32_t length = m_length; 295 296 // Do a faster loop for the case where all the characters are ASCII. 297 UChar ored = 0; 298 for (int32_t i = 0; i < length; i++) { 299 UChar c = m_data[i]; 300 ored |= c; 301 data[i] = toASCIILower(c); 302 } 303 if (!(ored & ~0x7F)) 304 return newImpl.release(); 305 306 // Do a slower implementation for cases that include non-ASCII characters. 307 bool error; 308 int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error); 309 if (!error && realLength == length) 310 return newImpl.release(); 311 newImpl = createUninitialized(realLength, data); 312 Unicode::foldCase(data, realLength, m_data, m_length, &error); 313 if (error) 314 return this; 315 return newImpl.release(); 316 } 317 318 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() 319 { 320 if (!m_length) 321 return empty(); 322 323 unsigned start = 0; 324 unsigned end = m_length - 1; 325 326 // skip white space from start 327 while (start <= end && isSpaceOrNewline(m_data[start])) 328 start++; 329 330 // only white space 331 if (start > end) 332 return empty(); 333 334 // skip white space from end 335 while (end && isSpaceOrNewline(m_data[end])) 336 end--; 337 338 if (!start && end == m_length - 1) 339 return this; 340 return create(m_data + start, end + 1 - start); 341 } 342 343 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) 344 { 345 const UChar* from = m_data; 346 const UChar* fromend = from + m_length; 347 348 // Assume the common case will not remove any characters 349 while (from != fromend && !findMatch(*from)) 350 from++; 351 if (from == fromend) 352 return this; 353 354 StringBuffer data(m_length); 355 UChar* to = data.characters(); 356 unsigned outc = from - m_data; 357 358 if (outc) 359 memcpy(to, m_data, outc * sizeof(UChar)); 360 361 while (true) { 362 while (from != fromend && findMatch(*from)) 363 from++; 364 while (from != fromend && !findMatch(*from)) 365 to[outc++] = *from++; 366 if (from == fromend) 367 break; 368 } 369 370 data.shrink(outc); 371 372 return adopt(data); 373 } 374 375 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() 376 { 377 StringBuffer data(m_length); 378 379 const UChar* from = m_data; 380 const UChar* fromend = from + m_length; 381 int outc = 0; 382 bool changedToSpace = false; 383 384 UChar* to = data.characters(); 385 386 while (true) { 387 while (from != fromend && isSpaceOrNewline(*from)) { 388 if (*from != ' ') 389 changedToSpace = true; 390 from++; 391 } 392 while (from != fromend && !isSpaceOrNewline(*from)) 393 to[outc++] = *from++; 394 if (from != fromend) 395 to[outc++] = ' '; 396 else 397 break; 398 } 399 400 if (outc > 0 && to[outc - 1] == ' ') 401 outc--; 402 403 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) 404 return this; 405 406 data.shrink(outc); 407 408 return adopt(data); 409 } 410 411 int StringImpl::toIntStrict(bool* ok, int base) 412 { 413 return charactersToIntStrict(m_data, m_length, ok, base); 414 } 415 416 unsigned StringImpl::toUIntStrict(bool* ok, int base) 417 { 418 return charactersToUIntStrict(m_data, m_length, ok, base); 419 } 420 421 int64_t StringImpl::toInt64Strict(bool* ok, int base) 422 { 423 return charactersToInt64Strict(m_data, m_length, ok, base); 424 } 425 426 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) 427 { 428 return charactersToUInt64Strict(m_data, m_length, ok, base); 429 } 430 431 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) 432 { 433 return charactersToIntPtrStrict(m_data, m_length, ok, base); 434 } 435 436 int StringImpl::toInt(bool* ok) 437 { 438 return charactersToInt(m_data, m_length, ok); 439 } 440 441 unsigned StringImpl::toUInt(bool* ok) 442 { 443 return charactersToUInt(m_data, m_length, ok); 444 } 445 446 int64_t StringImpl::toInt64(bool* ok) 447 { 448 return charactersToInt64(m_data, m_length, ok); 449 } 450 451 uint64_t StringImpl::toUInt64(bool* ok) 452 { 453 return charactersToUInt64(m_data, m_length, ok); 454 } 455 456 intptr_t StringImpl::toIntPtr(bool* ok) 457 { 458 return charactersToIntPtr(m_data, m_length, ok); 459 } 460 461 double StringImpl::toDouble(bool* ok, bool* didReadNumber) 462 { 463 return charactersToDouble(m_data, m_length, ok, didReadNumber); 464 } 465 466 float StringImpl::toFloat(bool* ok, bool* didReadNumber) 467 { 468 return charactersToFloat(m_data, m_length, ok, didReadNumber); 469 } 470 471 static bool equal(const UChar* a, const char* b, int length) 472 { 473 ASSERT(length >= 0); 474 while (length--) { 475 unsigned char bc = *b++; 476 if (*a++ != bc) 477 return false; 478 } 479 return true; 480 } 481 482 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) 483 { 484 while (length--) { 485 unsigned char bc = *b++; 486 if (foldCase(*a++) != foldCase(bc)) 487 return false; 488 } 489 return true; 490 } 491 492 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) 493 { 494 ASSERT(length >= 0); 495 return umemcasecmp(a, b, length) == 0; 496 } 497 498 int codePointCompare(const StringImpl* s1, const StringImpl* s2) 499 { 500 const unsigned l1 = s1 ? s1->length() : 0; 501 const unsigned l2 = s2 ? s2->length() : 0; 502 const unsigned lmin = l1 < l2 ? l1 : l2; 503 const UChar* c1 = s1 ? s1->characters() : 0; 504 const UChar* c2 = s2 ? s2->characters() : 0; 505 unsigned pos = 0; 506 while (pos < lmin && *c1 == *c2) { 507 c1++; 508 c2++; 509 pos++; 510 } 511 512 if (pos < lmin) 513 return (c1[0] > c2[0]) ? 1 : -1; 514 515 if (l1 == l2) 516 return 0; 517 518 return (l1 > l2) ? 1 : -1; 519 } 520 521 size_t StringImpl::find(UChar c, unsigned start) 522 { 523 return WTF::find(m_data, m_length, c, start); 524 } 525 526 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) 527 { 528 return WTF::find(m_data, m_length, matchFunction, start); 529 } 530 531 size_t StringImpl::find(const char* matchString, unsigned index) 532 { 533 // Check for null or empty string to match against 534 if (!matchString) 535 return notFound; 536 size_t matchStringLength = strlen(matchString); 537 if (matchStringLength > numeric_limits<unsigned>::max()) 538 CRASH(); 539 unsigned matchLength = matchStringLength; 540 if (!matchLength) 541 return min(index, length()); 542 543 // Optimization 1: fast case for strings of length 1. 544 if (matchLength == 1) 545 return WTF::find(characters(), length(), *(const unsigned char*)matchString, index); 546 547 // Check index & matchLength are in range. 548 if (index > length()) 549 return notFound; 550 unsigned searchLength = length() - index; 551 if (matchLength > searchLength) 552 return notFound; 553 // delta is the number of additional times to test; delta == 0 means test only once. 554 unsigned delta = searchLength - matchLength; 555 556 const UChar* searchCharacters = characters() + index; 557 const unsigned char* matchCharacters = (const unsigned char*)matchString; 558 559 // Optimization 2: keep a running hash of the strings, 560 // only call memcmp if the hashes match. 561 unsigned searchHash = 0; 562 unsigned matchHash = 0; 563 for (unsigned i = 0; i < matchLength; ++i) { 564 searchHash += searchCharacters[i]; 565 matchHash += matchCharacters[i]; 566 } 567 568 unsigned i = 0; 569 // keep looping until we match 570 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { 571 if (i == delta) 572 return notFound; 573 searchHash += searchCharacters[i + matchLength]; 574 searchHash -= searchCharacters[i]; 575 ++i; 576 } 577 return index + i; 578 } 579 580 size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index) 581 { 582 // Check for null or empty string to match against 583 if (!matchString) 584 return notFound; 585 size_t matchStringLength = strlen(matchString); 586 if (matchStringLength > numeric_limits<unsigned>::max()) 587 CRASH(); 588 unsigned matchLength = matchStringLength; 589 if (!matchLength) 590 return min(index, length()); 591 592 // Check index & matchLength are in range. 593 if (index > length()) 594 return notFound; 595 unsigned searchLength = length() - index; 596 if (matchLength > searchLength) 597 return notFound; 598 // delta is the number of additional times to test; delta == 0 means test only once. 599 unsigned delta = searchLength - matchLength; 600 601 const UChar* searchCharacters = characters() + index; 602 603 unsigned i = 0; 604 // keep looping until we match 605 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { 606 if (i == delta) 607 return notFound; 608 ++i; 609 } 610 return index + i; 611 } 612 613 size_t StringImpl::find(StringImpl* matchString, unsigned index) 614 { 615 // Check for null or empty string to match against 616 if (!matchString) 617 return notFound; 618 unsigned matchLength = matchString->length(); 619 if (!matchLength) 620 return min(index, length()); 621 622 // Optimization 1: fast case for strings of length 1. 623 if (matchLength == 1) 624 return WTF::find(characters(), length(), matchString->characters()[0], index); 625 626 // Check index & matchLength are in range. 627 if (index > length()) 628 return notFound; 629 unsigned searchLength = length() - index; 630 if (matchLength > searchLength) 631 return notFound; 632 // delta is the number of additional times to test; delta == 0 means test only once. 633 unsigned delta = searchLength - matchLength; 634 635 const UChar* searchCharacters = characters() + index; 636 const UChar* matchCharacters = matchString->characters(); 637 638 // Optimization 2: keep a running hash of the strings, 639 // only call memcmp if the hashes match. 640 unsigned searchHash = 0; 641 unsigned matchHash = 0; 642 for (unsigned i = 0; i < matchLength; ++i) { 643 searchHash += searchCharacters[i]; 644 matchHash += matchCharacters[i]; 645 } 646 647 unsigned i = 0; 648 // keep looping until we match 649 while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) { 650 if (i == delta) 651 return notFound; 652 searchHash += searchCharacters[i + matchLength]; 653 searchHash -= searchCharacters[i]; 654 ++i; 655 } 656 return index + i; 657 } 658 659 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) 660 { 661 // Check for null or empty string to match against 662 if (!matchString) 663 return notFound; 664 unsigned matchLength = matchString->length(); 665 if (!matchLength) 666 return min(index, length()); 667 668 // Check index & matchLength are in range. 669 if (index > length()) 670 return notFound; 671 unsigned searchLength = length() - index; 672 if (matchLength > searchLength) 673 return notFound; 674 // delta is the number of additional times to test; delta == 0 means test only once. 675 unsigned delta = searchLength - matchLength; 676 677 const UChar* searchCharacters = characters() + index; 678 const UChar* matchCharacters = matchString->characters(); 679 680 unsigned i = 0; 681 // keep looping until we match 682 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { 683 if (i == delta) 684 return notFound; 685 ++i; 686 } 687 return index + i; 688 } 689 690 size_t StringImpl::reverseFind(UChar c, unsigned index) 691 { 692 return WTF::reverseFind(m_data, m_length, c, index); 693 } 694 695 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) 696 { 697 // Check for null or empty string to match against 698 if (!matchString) 699 return notFound; 700 unsigned matchLength = matchString->length(); 701 if (!matchLength) 702 return min(index, length()); 703 704 // Optimization 1: fast case for strings of length 1. 705 if (matchLength == 1) 706 return WTF::reverseFind(characters(), length(), matchString->characters()[0], index); 707 708 // Check index & matchLength are in range. 709 if (matchLength > length()) 710 return notFound; 711 // delta is the number of additional times to test; delta == 0 means test only once. 712 unsigned delta = min(index, length() - matchLength); 713 714 const UChar *searchCharacters = characters(); 715 const UChar *matchCharacters = matchString->characters(); 716 717 // Optimization 2: keep a running hash of the strings, 718 // only call memcmp if the hashes match. 719 unsigned searchHash = 0; 720 unsigned matchHash = 0; 721 for (unsigned i = 0; i < matchLength; ++i) { 722 searchHash += searchCharacters[delta + i]; 723 matchHash += matchCharacters[i]; 724 } 725 726 // keep looping until we match 727 while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) { 728 if (!delta) 729 return notFound; 730 delta--; 731 searchHash -= searchCharacters[delta + matchLength]; 732 searchHash += searchCharacters[delta]; 733 } 734 return delta; 735 } 736 737 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) 738 { 739 // Check for null or empty string to match against 740 if (!matchString) 741 return notFound; 742 unsigned matchLength = matchString->length(); 743 if (!matchLength) 744 return min(index, length()); 745 746 // Check index & matchLength are in range. 747 if (matchLength > length()) 748 return notFound; 749 // delta is the number of additional times to test; delta == 0 means test only once. 750 unsigned delta = min(index, length() - matchLength); 751 752 const UChar *searchCharacters = characters(); 753 const UChar *matchCharacters = matchString->characters(); 754 755 // keep looping until we match 756 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { 757 if (!delta) 758 return notFound; 759 delta--; 760 } 761 return delta; 762 } 763 764 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) 765 { 766 ASSERT(m_data); 767 if (m_length >= m_data->m_length) { 768 unsigned start = m_length - m_data->m_length; 769 return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start; 770 } 771 return false; 772 } 773 774 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) 775 { 776 if (oldC == newC) 777 return this; 778 unsigned i; 779 for (i = 0; i != m_length; ++i) 780 if (m_data[i] == oldC) 781 break; 782 if (i == m_length) 783 return this; 784 785 UChar* data; 786 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 787 788 for (i = 0; i != m_length; ++i) { 789 UChar ch = m_data[i]; 790 if (ch == oldC) 791 ch = newC; 792 data[i] = ch; 793 } 794 return newImpl.release(); 795 } 796 797 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) 798 { 799 position = min(position, length()); 800 lengthToReplace = min(lengthToReplace, length() - position); 801 unsigned lengthToInsert = str ? str->length() : 0; 802 if (!lengthToReplace && !lengthToInsert) 803 return this; 804 UChar* data; 805 806 if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert)) 807 CRASH(); 808 809 RefPtr<StringImpl> newImpl = 810 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 811 memcpy(data, characters(), position * sizeof(UChar)); 812 if (str) 813 memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar)); 814 memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace, 815 (length() - position - lengthToReplace) * sizeof(UChar)); 816 return newImpl.release(); 817 } 818 819 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) 820 { 821 if (!replacement) 822 return this; 823 824 unsigned repStrLength = replacement->length(); 825 size_t srcSegmentStart = 0; 826 unsigned matchCount = 0; 827 828 // Count the matches 829 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { 830 ++matchCount; 831 ++srcSegmentStart; 832 } 833 834 // If we have 0 matches, we don't have to do any more work 835 if (!matchCount) 836 return this; 837 838 if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength) 839 CRASH(); 840 841 unsigned replaceSize = matchCount * repStrLength; 842 unsigned newSize = m_length - matchCount; 843 if (newSize >= (numeric_limits<unsigned>::max() - replaceSize)) 844 CRASH(); 845 846 newSize += replaceSize; 847 848 UChar* data; 849 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 850 851 // Construct the new data 852 size_t srcSegmentEnd; 853 unsigned srcSegmentLength; 854 srcSegmentStart = 0; 855 unsigned dstOffset = 0; 856 857 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { 858 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 859 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 860 dstOffset += srcSegmentLength; 861 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); 862 dstOffset += repStrLength; 863 srcSegmentStart = srcSegmentEnd + 1; 864 } 865 866 srcSegmentLength = m_length - srcSegmentStart; 867 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 868 869 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 870 871 return newImpl.release(); 872 } 873 874 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) 875 { 876 if (!pattern || !replacement) 877 return this; 878 879 unsigned patternLength = pattern->length(); 880 if (!patternLength) 881 return this; 882 883 unsigned repStrLength = replacement->length(); 884 size_t srcSegmentStart = 0; 885 unsigned matchCount = 0; 886 887 // Count the matches 888 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { 889 ++matchCount; 890 srcSegmentStart += patternLength; 891 } 892 893 // If we have 0 matches, we don't have to do any more work 894 if (!matchCount) 895 return this; 896 897 unsigned newSize = m_length - matchCount * patternLength; 898 if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength) 899 CRASH(); 900 901 if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength)) 902 CRASH(); 903 904 newSize += matchCount * repStrLength; 905 906 UChar* data; 907 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 908 909 // Construct the new data 910 size_t srcSegmentEnd; 911 unsigned srcSegmentLength; 912 srcSegmentStart = 0; 913 unsigned dstOffset = 0; 914 915 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { 916 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 917 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 918 dstOffset += srcSegmentLength; 919 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); 920 dstOffset += repStrLength; 921 srcSegmentStart = srcSegmentEnd + patternLength; 922 } 923 924 srcSegmentLength = m_length - srcSegmentStart; 925 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 926 927 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 928 929 return newImpl.release(); 930 } 931 932 bool equal(const StringImpl* a, const StringImpl* b) 933 { 934 return StringHash::equal(a, b); 935 } 936 937 bool equal(const StringImpl* a, const char* b) 938 { 939 if (!a) 940 return !b; 941 if (!b) 942 return !a; 943 944 unsigned length = a->length(); 945 const UChar* as = a->characters(); 946 for (unsigned i = 0; i != length; ++i) { 947 unsigned char bc = b[i]; 948 if (!bc) 949 return false; 950 if (as[i] != bc) 951 return false; 952 } 953 954 return !b[length]; 955 } 956 957 bool equalIgnoringCase(StringImpl* a, StringImpl* b) 958 { 959 return CaseFoldingHash::equal(a, b); 960 } 961 962 bool equalIgnoringCase(StringImpl* a, const char* b) 963 { 964 if (!a) 965 return !b; 966 if (!b) 967 return !a; 968 969 unsigned length = a->length(); 970 const UChar* as = a->characters(); 971 972 // Do a faster loop for the case where all the characters are ASCII. 973 UChar ored = 0; 974 bool equal = true; 975 for (unsigned i = 0; i != length; ++i) { 976 char bc = b[i]; 977 if (!bc) 978 return false; 979 UChar ac = as[i]; 980 ored |= ac; 981 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); 982 } 983 984 // Do a slower implementation for cases that include non-ASCII characters. 985 if (ored & ~0x7F) { 986 equal = true; 987 for (unsigned i = 0; i != length; ++i) { 988 unsigned char bc = b[i]; 989 equal = equal && (foldCase(as[i]) == foldCase(bc)); 990 } 991 } 992 993 return equal && !b[length]; 994 } 995 996 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) 997 { 998 if (StringHash::equal(a, b)) 999 return true; 1000 if (!a && b && !b->length()) 1001 return true; 1002 if (!b && a && !a->length()) 1003 return true; 1004 1005 return false; 1006 } 1007 1008 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirectionality) 1009 { 1010 for (unsigned i = 0; i < m_length; ++i) { 1011 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]); 1012 if (charDirection == WTF::Unicode::LeftToRight) { 1013 if (hasStrongDirectionality) 1014 *hasStrongDirectionality = true; 1015 return WTF::Unicode::LeftToRight; 1016 } 1017 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) { 1018 if (hasStrongDirectionality) 1019 *hasStrongDirectionality = true; 1020 return WTF::Unicode::RightToLeft; 1021 } 1022 } 1023 if (hasStrongDirectionality) 1024 *hasStrongDirectionality = false; 1025 return WTF::Unicode::LeftToRight; 1026 } 1027 1028 // This is a hot function because it's used when parsing HTML. 1029 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length) 1030 { 1031 StringBuffer strippedCopy(length); 1032 unsigned strippedLength = 0; 1033 for (unsigned i = 0; i < length; i++) { 1034 if (int c = characters[i]) 1035 strippedCopy[strippedLength++] = c; 1036 } 1037 ASSERT(strippedLength < length); // Only take the slow case when stripping. 1038 strippedCopy.shrink(strippedLength); 1039 return adopt(strippedCopy); 1040 } 1041 1042 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer) 1043 { 1044 unsigned length = buffer.length(); 1045 if (length == 0) 1046 return empty(); 1047 return adoptRef(new StringImpl(buffer.release(), length)); 1048 } 1049 1050 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) 1051 { 1052 // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer 1053 // get allocated in a single memory block. 1054 UChar* data; 1055 unsigned length = string.m_length; 1056 if (length >= numeric_limits<unsigned>::max()) 1057 CRASH(); 1058 RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data); 1059 memcpy(data, string.m_data, length * sizeof(UChar)); 1060 data[length] = 0; 1061 terminatedString->m_length--; 1062 terminatedString->m_hash = string.m_hash; 1063 terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter; 1064 return terminatedString.release(); 1065 } 1066 1067 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const 1068 { 1069 return create(m_data, m_length); 1070 } 1071 1072 PassRefPtr<StringImpl> StringImpl::crossThreadString() 1073 { 1074 if (SharedUChar* sharedBuffer = this->sharedBuffer()) 1075 return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy())); 1076 1077 // If no shared buffer is available, create a copy. 1078 return threadsafeCopy(); 1079 } 1080 1081 } // namespace WTF 1082