1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * (C) 1999 Antti Koivisto (koivisto (at) kde.org) 4 * (C) 2001 Dirk Mueller ( mueller (at) kde.org ) 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved. 6 * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #include "config.h" 26 #include "wtf/text/StringImpl.h" 27 28 #include "wtf/DynamicAnnotations.h" 29 #include "wtf/LeakAnnotations.h" 30 #include "wtf/MainThread.h" 31 #include "wtf/PartitionAlloc.h" 32 #include "wtf/StdLibExtras.h" 33 #include "wtf/WTF.h" 34 #include "wtf/text/AtomicString.h" 35 #include "wtf/text/StringBuffer.h" 36 #include "wtf/text/StringHash.h" 37 #include "wtf/unicode/CharacterNames.h" 38 39 #ifdef STRING_STATS 40 #include "wtf/DataLog.h" 41 #include "wtf/HashMap.h" 42 #include "wtf/HashSet.h" 43 #include "wtf/ProcessID.h" 44 #include "wtf/RefCounted.h" 45 #include "wtf/ThreadingPrimitives.h" 46 #include <unistd.h> 47 #endif 48 49 using namespace std; 50 51 namespace WTF { 52 53 using namespace Unicode; 54 55 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small); 56 57 #ifdef STRING_STATS 58 59 static Mutex& statsMutex() 60 { 61 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); 62 return mutex; 63 } 64 65 static HashSet<void*>& liveStrings() 66 { 67 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings. 68 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); 69 return strings; 70 } 71 72 void addStringForStats(StringImpl* string) 73 { 74 MutexLocker locker(statsMutex()); 75 liveStrings().add(string); 76 } 77 78 void removeStringForStats(StringImpl* string) 79 { 80 MutexLocker locker(statsMutex()); 81 liveStrings().remove(string); 82 } 83 84 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) 85 { 86 const unsigned kMaxSnippetLength = 64; 87 snippet.clear(); 88 89 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); 90 if (expectedLength == kMaxSnippetLength) 91 expectedLength += 3; // For the "...". 92 ++expectedLength; // For the terminating '\0'. 93 snippet.reserveCapacity(expectedLength); 94 95 size_t i; 96 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { 97 UChar c = (*string)[i]; 98 if (isASCIIPrintable(c)) 99 snippet.append(c); 100 else 101 snippet.append('?'); 102 } 103 if (i < string->length()) { 104 snippet.append('.'); 105 snippet.append('.'); 106 snippet.append('.'); 107 } 108 snippet.append('\0'); 109 } 110 111 static bool isUnnecessarilyWide(const StringImpl* string) 112 { 113 if (string->is8Bit()) 114 return false; 115 UChar c = 0; 116 for (unsigned i = 0; i < string->length(); ++i) 117 c |= (*string)[i] >> 8; 118 return !c; 119 } 120 121 class PerStringStats : public RefCounted<PerStringStats> { 122 public: 123 static PassRefPtr<PerStringStats> create() 124 { 125 return adoptRef(new PerStringStats); 126 } 127 128 void add(const StringImpl* string) 129 { 130 ++m_numberOfCopies; 131 if (!m_length) { 132 m_length = string->length(); 133 fillWithSnippet(string, m_snippet); 134 } 135 if (string->isAtomic()) 136 ++m_numberOfAtomicCopies; 137 if (isUnnecessarilyWide(string)) 138 m_unnecessarilyWide = true; 139 } 140 141 size_t totalCharacters() const 142 { 143 return m_numberOfCopies * m_length; 144 } 145 146 void print() 147 { 148 const char* status = "ok"; 149 if (m_unnecessarilyWide) 150 status = "16"; 151 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data()); 152 } 153 154 bool m_unnecessarilyWide; 155 unsigned m_numberOfCopies; 156 unsigned m_length; 157 unsigned m_numberOfAtomicCopies; 158 Vector<char> m_snippet; 159 160 private: 161 PerStringStats() 162 : m_unnecessarilyWide(false) 163 , m_numberOfCopies(0) 164 , m_length(0) 165 , m_numberOfAtomicCopies(0) 166 { 167 } 168 }; 169 170 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) 171 { 172 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) 173 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; 174 if (a->totalCharacters() != b->totalCharacters()) 175 return a->totalCharacters() < b->totalCharacters(); 176 if (a->m_numberOfCopies != b->m_numberOfCopies) 177 return a->m_numberOfCopies < b->m_numberOfCopies; 178 if (a->m_length != b->m_length) 179 return a->m_length < b->m_length; 180 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; 181 } 182 183 static void printLiveStringStats(void*) 184 { 185 MutexLocker locker(statsMutex()); 186 HashSet<void*>& strings = liveStrings(); 187 188 HashMap<StringImpl*, RefPtr<PerStringStats> > stats; 189 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) { 190 StringImpl* string = static_cast<StringImpl*>(*iter); 191 HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string); 192 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value; 193 value->add(string); 194 stats.set(string, value.release()); 195 } 196 197 Vector<RefPtr<PerStringStats> > all; 198 for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter) 199 all.append(iter->value); 200 201 std::sort(all.begin(), all.end()); 202 std::reverse(all.begin(), all.end()); 203 for (size_t i = 0; i < 20 && i < all.size(); ++i) 204 all[i]->print(); 205 } 206 207 StringStats StringImpl::m_stringStats; 208 209 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency; 210 211 void StringStats::removeString(StringImpl* string) 212 { 213 unsigned length = string->length(); 214 --m_totalNumberStrings; 215 216 if (string->is8Bit()) { 217 --m_number8BitStrings; 218 m_total8BitData -= length; 219 } else { 220 --m_number16BitStrings; 221 m_total16BitData -= length; 222 } 223 224 if (!--s_stringRemovesTillPrintStats) { 225 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; 226 printStats(); 227 } 228 } 229 230 void StringStats::printStats() 231 { 232 dataLogF("String stats for process id %d:\n", getCurrentProcessID()); 233 234 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; 235 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0; 236 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0; 237 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength); 238 239 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0; 240 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0; 241 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength); 242 243 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0; 244 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; 245 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength); 246 unsigned long long totalSavedBytes = m_total8BitData; 247 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0; 248 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings); 249 250 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); 251 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100; 252 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent); 253 254 callOnMainThread(printLiveStringStats, 0); 255 } 256 #endif 257 258 void* StringImpl::operator new(size_t size) 259 { 260 ASSERT(size == sizeof(StringImpl)); 261 return partitionAllocGeneric(Partitions::getBufferPartition(), size); 262 } 263 264 void StringImpl::operator delete(void* ptr) 265 { 266 partitionFreeGeneric(Partitions::getBufferPartition(), ptr); 267 } 268 269 inline StringImpl::~StringImpl() 270 { 271 ASSERT(!isStatic()); 272 273 STRING_STATS_REMOVE_STRING(this); 274 275 if (isAtomic()) 276 AtomicString::remove(this); 277 } 278 279 void StringImpl::destroyIfNotStatic() 280 { 281 if (!isStatic()) 282 delete this; 283 } 284 285 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) 286 { 287 if (!length) { 288 data = 0; 289 return empty(); 290 } 291 292 // Allocate a single buffer large enough to contain the StringImpl 293 // struct as well as the data which it contains. This removes one 294 // heap allocation from this call. 295 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar))); 296 size_t size = sizeof(StringImpl) + length * sizeof(LChar); 297 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size)); 298 299 data = reinterpret_cast<LChar*>(string + 1); 300 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); 301 } 302 303 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) 304 { 305 if (!length) { 306 data = 0; 307 return empty(); 308 } 309 310 // Allocate a single buffer large enough to contain the StringImpl 311 // struct as well as the data which it contains. This removes one 312 // heap allocation from this call. 313 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))); 314 size_t size = sizeof(StringImpl) + length * sizeof(UChar); 315 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size)); 316 317 data = reinterpret_cast<UChar*>(string + 1); 318 return adoptRef(new (string) StringImpl(length)); 319 } 320 321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data) 322 { 323 ASSERT(originalString->is8Bit()); 324 ASSERT(originalString->hasOneRef()); 325 326 if (!length) { 327 data = 0; 328 return empty(); 329 } 330 331 // Same as createUninitialized() except here we use realloc. 332 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar))); 333 size_t size = sizeof(StringImpl) + length * sizeof(LChar); 334 originalString->~StringImpl(); 335 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size)); 336 337 data = reinterpret_cast<LChar*>(string + 1); 338 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); 339 } 340 341 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data) 342 { 343 ASSERT(!originalString->is8Bit()); 344 ASSERT(originalString->hasOneRef()); 345 346 if (!length) { 347 data = 0; 348 return empty(); 349 } 350 351 // Same as createUninitialized() except here we use realloc. 352 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))); 353 size_t size = sizeof(StringImpl) + length * sizeof(UChar); 354 originalString->~StringImpl(); 355 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size)); 356 357 data = reinterpret_cast<UChar*>(string + 1); 358 return adoptRef(new (string) StringImpl(length)); 359 } 360 361 static StaticStringsTable& staticStrings() 362 { 363 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); 364 return staticStrings; 365 } 366 367 #ifndef NDEBUG 368 static bool s_allowCreationOfStaticStrings = true; 369 #endif 370 371 const StaticStringsTable& StringImpl::allStaticStrings() 372 { 373 return staticStrings(); 374 } 375 376 void StringImpl::freezeStaticStrings() 377 { 378 ASSERT(isMainThread()); 379 380 #ifndef NDEBUG 381 s_allowCreationOfStaticStrings = false; 382 #endif 383 } 384 385 unsigned StringImpl::m_highestStaticStringLength = 0; 386 387 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash) 388 { 389 ASSERT(s_allowCreationOfStaticStrings); 390 ASSERT(string); 391 ASSERT(length); 392 393 StaticStringsTable::const_iterator it = staticStrings().find(hash); 394 if (it != staticStrings().end()) { 395 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); 396 return it->value; 397 } 398 399 // Allocate a single buffer large enough to contain the StringImpl 400 // struct as well as the data which it contains. This removes one 401 // heap allocation from this call. 402 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar))); 403 size_t size = sizeof(StringImpl) + length * sizeof(LChar); 404 405 WTF_ANNOTATE_SCOPED_MEMORY_LEAK; 406 StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size)); 407 408 LChar* data = reinterpret_cast<LChar*>(impl + 1); 409 impl = new (impl) StringImpl(length, hash, StaticString); 410 memcpy(data, string, length * sizeof(LChar)); 411 #ifndef NDEBUG 412 impl->assertHashIsCorrect(); 413 #endif 414 415 ASSERT(isMainThread()); 416 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); 417 staticStrings().add(hash, impl); 418 WTF_ANNOTATE_BENIGN_RACE(impl, 419 "Benign race on the reference counter of a static string created by StringImpl::createStatic"); 420 421 return impl; 422 } 423 424 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) 425 { 426 if (!characters || !length) 427 return empty(); 428 429 UChar* data; 430 RefPtr<StringImpl> string = createUninitialized(length, data); 431 memcpy(data, characters, length * sizeof(UChar)); 432 return string.release(); 433 } 434 435 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length) 436 { 437 if (!characters || !length) 438 return empty(); 439 440 LChar* data; 441 RefPtr<StringImpl> string = createUninitialized(length, data); 442 memcpy(data, characters, length * sizeof(LChar)); 443 return string.release(); 444 } 445 446 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) 447 { 448 if (!characters || !length) 449 return empty(); 450 451 LChar* data; 452 RefPtr<StringImpl> string = createUninitialized(length, data); 453 454 for (size_t i = 0; i < length; ++i) { 455 if (characters[i] & 0xff00) 456 return create(characters, length); 457 data[i] = static_cast<LChar>(characters[i]); 458 } 459 460 return string.release(); 461 } 462 463 PassRefPtr<StringImpl> StringImpl::create(const LChar* string) 464 { 465 if (!string) 466 return empty(); 467 size_t length = strlen(reinterpret_cast<const char*>(string)); 468 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); 469 return create(string, length); 470 } 471 472 bool StringImpl::containsOnlyWhitespace() 473 { 474 // FIXME: The definition of whitespace here includes a number of characters 475 // that are not whitespace from the point of view of RenderText; I wonder if 476 // that's a problem in practice. 477 if (is8Bit()) { 478 for (unsigned i = 0; i < m_length; ++i) { 479 UChar c = characters8()[i]; 480 if (!isASCIISpace(c)) 481 return false; 482 } 483 484 return true; 485 } 486 487 for (unsigned i = 0; i < m_length; ++i) { 488 UChar c = characters16()[i]; 489 if (!isASCIISpace(c)) 490 return false; 491 } 492 return true; 493 } 494 495 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) 496 { 497 if (start >= m_length) 498 return empty(); 499 unsigned maxLength = m_length - start; 500 if (length >= maxLength) { 501 if (!start) 502 return this; 503 length = maxLength; 504 } 505 if (is8Bit()) 506 return create(characters8() + start, length); 507 508 return create(characters16() + start, length); 509 } 510 511 UChar32 StringImpl::characterStartingAt(unsigned i) 512 { 513 if (is8Bit()) 514 return characters8()[i]; 515 if (U16_IS_SINGLE(characters16()[i])) 516 return characters16()[i]; 517 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1])) 518 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); 519 return 0; 520 } 521 522 PassRefPtr<StringImpl> StringImpl::lower() 523 { 524 // Note: This is a hot function in the Dromaeo benchmark, specifically the 525 // no-op code path up through the first 'return' statement. 526 527 // First scan the string for uppercase and non-ASCII characters: 528 bool noUpper = true; 529 UChar ored = 0; 530 if (is8Bit()) { 531 const LChar* end = characters8() + m_length; 532 for (const LChar* chp = characters8(); chp != end; ++chp) { 533 if (UNLIKELY(isASCIIUpper(*chp))) 534 noUpper = false; 535 ored |= *chp; 536 } 537 // Nothing to do if the string is all ASCII with no uppercase. 538 if (noUpper && !(ored & ~0x7F)) 539 return this; 540 541 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 542 int32_t length = m_length; 543 544 LChar* data8; 545 RefPtr<StringImpl> newImpl = createUninitialized(length, data8); 546 547 if (!(ored & ~0x7F)) { 548 for (int32_t i = 0; i < length; ++i) 549 data8[i] = toASCIILower(characters8()[i]); 550 551 return newImpl.release(); 552 } 553 554 // Do a slower implementation for cases that include non-ASCII Latin-1 characters. 555 for (int32_t i = 0; i < length; ++i) 556 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 557 558 return newImpl.release(); 559 } 560 561 const UChar* end = characters16() + m_length; 562 for (const UChar* chp = characters16(); chp != end; ++chp) { 563 if (UNLIKELY(isASCIIUpper(*chp))) 564 noUpper = false; 565 ored |= *chp; 566 } 567 // Nothing to do if the string is all ASCII with no uppercase. 568 if (noUpper && !(ored & ~0x7F)) 569 return this; 570 571 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 572 int32_t length = m_length; 573 574 if (!(ored & ~0x7F)) { 575 UChar* data16; 576 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 577 578 for (int32_t i = 0; i < length; ++i) { 579 UChar c = characters16()[i]; 580 data16[i] = toASCIILower(c); 581 } 582 return newImpl.release(); 583 } 584 585 // Do a slower implementation for cases that include non-ASCII characters. 586 UChar* data16; 587 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 588 589 bool error; 590 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error); 591 if (!error && realLength == length) 592 return newImpl.release(); 593 594 newImpl = createUninitialized(realLength, data16); 595 Unicode::toLower(data16, realLength, characters16(), m_length, &error); 596 if (error) 597 return this; 598 return newImpl.release(); 599 } 600 601 PassRefPtr<StringImpl> StringImpl::upper() 602 { 603 // This function could be optimized for no-op cases the way lower() is, 604 // but in empirical testing, few actual calls to upper() are no-ops, so 605 // it wouldn't be worth the extra time for pre-scanning. 606 607 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 608 int32_t length = m_length; 609 610 if (is8Bit()) { 611 LChar* data8; 612 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); 613 614 // Do a faster loop for the case where all the characters are ASCII. 615 LChar ored = 0; 616 for (int i = 0; i < length; ++i) { 617 LChar c = characters8()[i]; 618 ored |= c; 619 data8[i] = toASCIIUpper(c); 620 } 621 if (!(ored & ~0x7F)) 622 return newImpl.release(); 623 624 // Do a slower implementation for cases that include non-ASCII Latin-1 characters. 625 int numberSharpSCharacters = 0; 626 627 // There are two special cases. 628 // 1. latin-1 characters when converted to upper case are 16 bit characters. 629 // 2. Lower case sharp-S converts to "SS" (two characters) 630 for (int32_t i = 0; i < length; ++i) { 631 LChar c = characters8()[i]; 632 if (UNLIKELY(c == smallLetterSharpS)) 633 ++numberSharpSCharacters; 634 UChar upper = Unicode::toUpper(c); 635 if (UNLIKELY(upper > 0xff)) { 636 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path. 637 goto upconvert; 638 } 639 data8[i] = static_cast<LChar>(upper); 640 } 641 642 if (!numberSharpSCharacters) 643 return newImpl.release(); 644 645 // We have numberSSCharacters sharp-s characters, but none of the other special characters. 646 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); 647 648 LChar* dest = data8; 649 650 for (int32_t i = 0; i < length; ++i) { 651 LChar c = characters8()[i]; 652 if (c == smallLetterSharpS) { 653 *dest++ = 'S'; 654 *dest++ = 'S'; 655 } else 656 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); 657 } 658 659 return newImpl.release(); 660 } 661 662 upconvert: 663 RefPtr<StringImpl> upconverted = upconvertedString(); 664 const UChar* source16 = upconverted->characters16(); 665 666 UChar* data16; 667 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 668 669 // Do a faster loop for the case where all the characters are ASCII. 670 UChar ored = 0; 671 for (int i = 0; i < length; ++i) { 672 UChar c = source16[i]; 673 ored |= c; 674 data16[i] = toASCIIUpper(c); 675 } 676 if (!(ored & ~0x7F)) 677 return newImpl.release(); 678 679 // Do a slower implementation for cases that include non-ASCII characters. 680 bool error; 681 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error); 682 if (!error && realLength == length) 683 return newImpl; 684 newImpl = createUninitialized(realLength, data16); 685 Unicode::toUpper(data16, realLength, source16, m_length, &error); 686 if (error) 687 return this; 688 return newImpl.release(); 689 } 690 691 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) 692 { 693 // Use the more-optimized code path most of the time. 694 // Note the assumption here that the only locale-specific lowercasing is 695 // in the "tr" and "az" locales. 696 // FIXME: Could possibly optimize further by looking for the specific sequences 697 // that have locale-specific lowercasing. There are only three of them. 698 if (!(localeIdentifier == "tr" || localeIdentifier == "az")) 699 return lower(); 700 701 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 702 CRASH(); 703 int length = m_length; 704 705 // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than 706 // allocating memory just to turn localeIdentifier into a C string, and there is no 707 // difference between the uppercasing for "tr" and "az" locales. 708 RefPtr<StringImpl> upconverted = upconvertedString(); 709 const UChar* source16 = upconverted->characters16(); 710 UChar* data16; 711 RefPtr<StringImpl> newString = createUninitialized(length, data16); 712 do { 713 UErrorCode status = U_ZERO_ERROR; 714 int realLength = u_strToLower(data16, length, source16, length, "tr", &status); 715 if (U_SUCCESS(status)) { 716 newString->truncateAssumingIsolated(realLength); 717 return newString.release(); 718 } 719 if (status != U_BUFFER_OVERFLOW_ERROR) 720 return this; 721 // Expand the buffer. 722 newString = createUninitialized(realLength, data16); 723 } while (true); 724 } 725 726 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) 727 { 728 // Use the more-optimized code path most of the time. 729 // Note the assumption here that the only locale-specific uppercasing is of the 730 // letter "i" in the "tr" and "az" locales. 731 if (!(localeIdentifier == "tr" || localeIdentifier == "az") || find('i') == kNotFound) 732 return upper(); 733 734 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 735 CRASH(); 736 int length = m_length; 737 738 // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than 739 // allocating memory just to turn localeIdentifier into a C string, and there is no 740 // difference between the uppercasing for "tr" and "az" locales. 741 RefPtr<StringImpl> upconverted = upconvertedString(); 742 const UChar* source16 = upconverted->characters16(); 743 UChar* data16; 744 RefPtr<StringImpl> newString = createUninitialized(length, data16); 745 do { 746 UErrorCode status = U_ZERO_ERROR; 747 int realLength = u_strToUpper(data16, length, source16, length, "tr", &status); 748 if (U_SUCCESS(status)) { 749 newString->truncateAssumingIsolated(realLength); 750 return newString.release(); 751 } 752 if (status != U_BUFFER_OVERFLOW_ERROR) 753 return this; 754 // Expand the buffer. 755 newString = createUninitialized(realLength, data16); 756 } while (true); 757 } 758 759 PassRefPtr<StringImpl> StringImpl::fill(UChar character) 760 { 761 if (!(character & ~0x7F)) { 762 LChar* data; 763 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 764 for (unsigned i = 0; i < m_length; ++i) 765 data[i] = character; 766 return newImpl.release(); 767 } 768 UChar* data; 769 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 770 for (unsigned i = 0; i < m_length; ++i) 771 data[i] = character; 772 return newImpl.release(); 773 } 774 775 PassRefPtr<StringImpl> StringImpl::foldCase() 776 { 777 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 778 int32_t length = m_length; 779 780 if (is8Bit()) { 781 // Do a faster loop for the case where all the characters are ASCII. 782 LChar* data; 783 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); 784 LChar ored = 0; 785 786 for (int32_t i = 0; i < length; ++i) { 787 LChar c = characters8()[i]; 788 data[i] = toASCIILower(c); 789 ored |= c; 790 } 791 792 if (!(ored & ~0x7F)) 793 return newImpl.release(); 794 795 // Do a slower implementation for cases that include non-ASCII Latin-1 characters. 796 for (int32_t i = 0; i < length; ++i) 797 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 798 799 return newImpl.release(); 800 } 801 802 // Do a faster loop for the case where all the characters are ASCII. 803 UChar* data; 804 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 805 UChar ored = 0; 806 for (int32_t i = 0; i < length; ++i) { 807 UChar c = characters16()[i]; 808 ored |= c; 809 data[i] = toASCIILower(c); 810 } 811 if (!(ored & ~0x7F)) 812 return newImpl.release(); 813 814 // Do a slower implementation for cases that include non-ASCII characters. 815 bool error; 816 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error); 817 if (!error && realLength == length) 818 return newImpl.release(); 819 newImpl = createUninitialized(realLength, data); 820 Unicode::foldCase(data, realLength, characters16(), m_length, &error); 821 if (error) 822 return this; 823 return newImpl.release(); 824 } 825 826 template <class UCharPredicate> 827 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate) 828 { 829 if (!m_length) 830 return empty(); 831 832 unsigned start = 0; 833 unsigned end = m_length - 1; 834 835 // skip white space from start 836 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start])) 837 ++start; 838 839 // only white space 840 if (start > end) 841 return empty(); 842 843 // skip white space from end 844 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])) 845 --end; 846 847 if (!start && end == m_length - 1) 848 return this; 849 if (is8Bit()) 850 return create(characters8() + start, end + 1 - start); 851 return create(characters16() + start, end + 1 - start); 852 } 853 854 class UCharPredicate { 855 public: 856 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { } 857 858 inline bool operator()(UChar ch) const 859 { 860 return m_function(ch); 861 } 862 863 private: 864 const CharacterMatchFunctionPtr m_function; 865 }; 866 867 class SpaceOrNewlinePredicate { 868 public: 869 inline bool operator()(UChar ch) const 870 { 871 return isSpaceOrNewline(ch); 872 } 873 }; 874 875 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() 876 { 877 return stripMatchedCharacters(SpaceOrNewlinePredicate()); 878 } 879 880 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) 881 { 882 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); 883 } 884 885 template <typename CharType> 886 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch) 887 { 888 const CharType* from = characters; 889 const CharType* fromend = from + m_length; 890 891 // Assume the common case will not remove any characters 892 while (from != fromend && !findMatch(*from)) 893 ++from; 894 if (from == fromend) 895 return this; 896 897 StringBuffer<CharType> data(m_length); 898 CharType* to = data.characters(); 899 unsigned outc = from - characters; 900 901 if (outc) 902 memcpy(to, characters, outc * sizeof(CharType)); 903 904 while (true) { 905 while (from != fromend && findMatch(*from)) 906 ++from; 907 while (from != fromend && !findMatch(*from)) 908 to[outc++] = *from++; 909 if (from == fromend) 910 break; 911 } 912 913 data.shrink(outc); 914 915 return data.release(); 916 } 917 918 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) 919 { 920 if (is8Bit()) 921 return removeCharacters(characters8(), findMatch); 922 return removeCharacters(characters16(), findMatch); 923 } 924 925 template <typename CharType, class UCharPredicate> 926 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior) 927 { 928 StringBuffer<CharType> data(m_length); 929 930 const CharType* from = getCharacters<CharType>(); 931 const CharType* fromend = from + m_length; 932 int outc = 0; 933 bool changedToSpace = false; 934 935 CharType* to = data.characters(); 936 937 if (stripBehavior == StripExtraWhiteSpace) { 938 while (true) { 939 while (from != fromend && predicate(*from)) { 940 if (*from != ' ') 941 changedToSpace = true; 942 ++from; 943 } 944 while (from != fromend && !predicate(*from)) 945 to[outc++] = *from++; 946 if (from != fromend) 947 to[outc++] = ' '; 948 else 949 break; 950 } 951 952 if (outc > 0 && to[outc - 1] == ' ') 953 --outc; 954 } else { 955 for (; from != fromend; ++from) { 956 if (predicate(*from)) { 957 if (*from != ' ') 958 changedToSpace = true; 959 to[outc++] = ' '; 960 } else { 961 to[outc++] = *from; 962 } 963 } 964 } 965 966 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) 967 return this; 968 969 data.shrink(outc); 970 971 return data.release(); 972 } 973 974 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior) 975 { 976 if (is8Bit()) 977 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior); 978 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior); 979 } 980 981 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior) 982 { 983 if (is8Bit()) 984 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior); 985 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior); 986 } 987 988 int StringImpl::toIntStrict(bool* ok, int base) 989 { 990 if (is8Bit()) 991 return charactersToIntStrict(characters8(), m_length, ok, base); 992 return charactersToIntStrict(characters16(), m_length, ok, base); 993 } 994 995 unsigned StringImpl::toUIntStrict(bool* ok, int base) 996 { 997 if (is8Bit()) 998 return charactersToUIntStrict(characters8(), m_length, ok, base); 999 return charactersToUIntStrict(characters16(), m_length, ok, base); 1000 } 1001 1002 int64_t StringImpl::toInt64Strict(bool* ok, int base) 1003 { 1004 if (is8Bit()) 1005 return charactersToInt64Strict(characters8(), m_length, ok, base); 1006 return charactersToInt64Strict(characters16(), m_length, ok, base); 1007 } 1008 1009 uint64_t StringImpl::toUInt64Strict(bool* ok, int base) 1010 { 1011 if (is8Bit()) 1012 return charactersToUInt64Strict(characters8(), m_length, ok, base); 1013 return charactersToUInt64Strict(characters16(), m_length, ok, base); 1014 } 1015 1016 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) 1017 { 1018 if (is8Bit()) 1019 return charactersToIntPtrStrict(characters8(), m_length, ok, base); 1020 return charactersToIntPtrStrict(characters16(), m_length, ok, base); 1021 } 1022 1023 int StringImpl::toInt(bool* ok) 1024 { 1025 if (is8Bit()) 1026 return charactersToInt(characters8(), m_length, ok); 1027 return charactersToInt(characters16(), m_length, ok); 1028 } 1029 1030 unsigned StringImpl::toUInt(bool* ok) 1031 { 1032 if (is8Bit()) 1033 return charactersToUInt(characters8(), m_length, ok); 1034 return charactersToUInt(characters16(), m_length, ok); 1035 } 1036 1037 int64_t StringImpl::toInt64(bool* ok) 1038 { 1039 if (is8Bit()) 1040 return charactersToInt64(characters8(), m_length, ok); 1041 return charactersToInt64(characters16(), m_length, ok); 1042 } 1043 1044 uint64_t StringImpl::toUInt64(bool* ok) 1045 { 1046 if (is8Bit()) 1047 return charactersToUInt64(characters8(), m_length, ok); 1048 return charactersToUInt64(characters16(), m_length, ok); 1049 } 1050 1051 intptr_t StringImpl::toIntPtr(bool* ok) 1052 { 1053 if (is8Bit()) 1054 return charactersToIntPtr(characters8(), m_length, ok); 1055 return charactersToIntPtr(characters16(), m_length, ok); 1056 } 1057 1058 double StringImpl::toDouble(bool* ok) 1059 { 1060 if (is8Bit()) 1061 return charactersToDouble(characters8(), m_length, ok); 1062 return charactersToDouble(characters16(), m_length, ok); 1063 } 1064 1065 float StringImpl::toFloat(bool* ok) 1066 { 1067 if (is8Bit()) 1068 return charactersToFloat(characters8(), m_length, ok); 1069 return charactersToFloat(characters16(), m_length, ok); 1070 } 1071 1072 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) 1073 { 1074 while (length--) { 1075 LChar bc = *b++; 1076 if (foldCase(*a++) != foldCase(bc)) 1077 return false; 1078 } 1079 return true; 1080 } 1081 1082 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) 1083 { 1084 while (length--) { 1085 LChar bc = *b++; 1086 if (foldCase(*a++) != foldCase(bc)) 1087 return false; 1088 } 1089 return true; 1090 } 1091 1092 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) 1093 { 1094 if (is8Bit()) 1095 return WTF::find(characters8(), m_length, matchFunction, start); 1096 return WTF::find(characters16(), m_length, matchFunction, start); 1097 } 1098 1099 size_t StringImpl::find(const LChar* matchString, unsigned index) 1100 { 1101 // Check for null or empty string to match against 1102 if (!matchString) 1103 return kNotFound; 1104 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); 1105 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); 1106 unsigned matchLength = matchStringLength; 1107 if (!matchLength) 1108 return min(index, length()); 1109 1110 // Optimization 1: fast case for strings of length 1. 1111 if (matchLength == 1) 1112 return WTF::find(characters16(), length(), *matchString, index); 1113 1114 // Check index & matchLength are in range. 1115 if (index > length()) 1116 return kNotFound; 1117 unsigned searchLength = length() - index; 1118 if (matchLength > searchLength) 1119 return kNotFound; 1120 // delta is the number of additional times to test; delta == 0 means test only once. 1121 unsigned delta = searchLength - matchLength; 1122 1123 const UChar* searchCharacters = characters16() + index; 1124 1125 // Optimization 2: keep a running hash of the strings, 1126 // only call equal if the hashes match. 1127 unsigned searchHash = 0; 1128 unsigned matchHash = 0; 1129 for (unsigned i = 0; i < matchLength; ++i) { 1130 searchHash += searchCharacters[i]; 1131 matchHash += matchString[i]; 1132 } 1133 1134 unsigned i = 0; 1135 // keep looping until we match 1136 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { 1137 if (i == delta) 1138 return kNotFound; 1139 searchHash += searchCharacters[i + matchLength]; 1140 searchHash -= searchCharacters[i]; 1141 ++i; 1142 } 1143 return index + i; 1144 } 1145 1146 template<typename CharType> 1147 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength) 1148 { 1149 // delta is the number of additional times to test; delta == 0 means test only once. 1150 unsigned delta = searchLength - matchLength; 1151 1152 unsigned i = 0; 1153 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { 1154 if (i == delta) 1155 return kNotFound; 1156 ++i; 1157 } 1158 return index + i; 1159 } 1160 1161 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) 1162 { 1163 // Check for null or empty string to match against 1164 if (!matchString) 1165 return kNotFound; 1166 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); 1167 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); 1168 unsigned matchLength = matchStringLength; 1169 if (!matchLength) 1170 return min(index, length()); 1171 1172 // Check index & matchLength are in range. 1173 if (index > length()) 1174 return kNotFound; 1175 unsigned searchLength = length() - index; 1176 if (matchLength > searchLength) 1177 return kNotFound; 1178 1179 if (is8Bit()) 1180 return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength); 1181 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength); 1182 } 1183 1184 template <typename SearchCharacterType, typename MatchCharacterType> 1185 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) 1186 { 1187 // Optimization: keep a running hash of the strings, 1188 // only call equal() if the hashes match. 1189 1190 // delta is the number of additional times to test; delta == 0 means test only once. 1191 unsigned delta = searchLength - matchLength; 1192 1193 unsigned searchHash = 0; 1194 unsigned matchHash = 0; 1195 1196 for (unsigned i = 0; i < matchLength; ++i) { 1197 searchHash += searchCharacters[i]; 1198 matchHash += matchCharacters[i]; 1199 } 1200 1201 unsigned i = 0; 1202 // keep looping until we match 1203 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { 1204 if (i == delta) 1205 return kNotFound; 1206 searchHash += searchCharacters[i + matchLength]; 1207 searchHash -= searchCharacters[i]; 1208 ++i; 1209 } 1210 return index + i; 1211 } 1212 1213 size_t StringImpl::find(StringImpl* matchString) 1214 { 1215 // Check for null string to match against 1216 if (UNLIKELY(!matchString)) 1217 return kNotFound; 1218 unsigned matchLength = matchString->length(); 1219 1220 // Optimization 1: fast case for strings of length 1. 1221 if (matchLength == 1) { 1222 if (is8Bit()) { 1223 if (matchString->is8Bit()) 1224 return WTF::find(characters8(), length(), matchString->characters8()[0]); 1225 return WTF::find(characters8(), length(), matchString->characters16()[0]); 1226 } 1227 if (matchString->is8Bit()) 1228 return WTF::find(characters16(), length(), matchString->characters8()[0]); 1229 return WTF::find(characters16(), length(), matchString->characters16()[0]); 1230 } 1231 1232 // Check matchLength is in range. 1233 if (matchLength > length()) 1234 return kNotFound; 1235 1236 // Check for empty string to match against 1237 if (UNLIKELY(!matchLength)) 1238 return 0; 1239 1240 if (is8Bit()) { 1241 if (matchString->is8Bit()) 1242 return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength); 1243 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength); 1244 } 1245 1246 if (matchString->is8Bit()) 1247 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength); 1248 1249 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength); 1250 } 1251 1252 size_t StringImpl::find(StringImpl* matchString, unsigned index) 1253 { 1254 // Check for null or empty string to match against 1255 if (UNLIKELY(!matchString)) 1256 return kNotFound; 1257 1258 unsigned matchLength = matchString->length(); 1259 1260 // Optimization 1: fast case for strings of length 1. 1261 if (matchLength == 1) { 1262 if (is8Bit()) 1263 return WTF::find(characters8(), length(), (*matchString)[0], index); 1264 return WTF::find(characters16(), length(), (*matchString)[0], index); 1265 } 1266 1267 if (UNLIKELY(!matchLength)) 1268 return min(index, length()); 1269 1270 // Check index & matchLength are in range. 1271 if (index > length()) 1272 return kNotFound; 1273 unsigned searchLength = length() - index; 1274 if (matchLength > searchLength) 1275 return kNotFound; 1276 1277 if (is8Bit()) { 1278 if (matchString->is8Bit()) 1279 return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength); 1280 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength); 1281 } 1282 1283 if (matchString->is8Bit()) 1284 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength); 1285 1286 return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength); 1287 } 1288 1289 template <typename SearchCharacterType, typename MatchCharacterType> 1290 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) 1291 { 1292 // delta is the number of additional times to test; delta == 0 means test only once. 1293 unsigned delta = searchLength - matchLength; 1294 1295 unsigned i = 0; 1296 // keep looping until we match 1297 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { 1298 if (i == delta) 1299 return kNotFound; 1300 ++i; 1301 } 1302 return index + i; 1303 } 1304 1305 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) 1306 { 1307 // Check for null or empty string to match against 1308 if (!matchString) 1309 return kNotFound; 1310 unsigned matchLength = matchString->length(); 1311 if (!matchLength) 1312 return min(index, length()); 1313 1314 // Check index & matchLength are in range. 1315 if (index > length()) 1316 return kNotFound; 1317 unsigned searchLength = length() - index; 1318 if (matchLength > searchLength) 1319 return kNotFound; 1320 1321 if (is8Bit()) { 1322 if (matchString->is8Bit()) 1323 return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength); 1324 return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength); 1325 } 1326 1327 if (matchString->is8Bit()) 1328 return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength); 1329 1330 return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength); 1331 } 1332 1333 size_t StringImpl::findNextLineStart(unsigned index) 1334 { 1335 if (is8Bit()) 1336 return WTF::findNextLineStart(characters8(), m_length, index); 1337 return WTF::findNextLineStart(characters16(), m_length, index); 1338 } 1339 1340 size_t StringImpl::count(LChar c) const 1341 { 1342 int count = 0; 1343 if (is8Bit()) { 1344 for (size_t i = 0; i < m_length; ++i) 1345 count += characters8()[i] == c; 1346 } else { 1347 for (size_t i = 0; i < m_length; ++i) 1348 count += characters16()[i] == c; 1349 } 1350 return count; 1351 } 1352 1353 size_t StringImpl::reverseFind(UChar c, unsigned index) 1354 { 1355 if (is8Bit()) 1356 return WTF::reverseFind(characters8(), m_length, c, index); 1357 return WTF::reverseFind(characters16(), m_length, c, index); 1358 } 1359 1360 template <typename SearchCharacterType, typename MatchCharacterType> 1361 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength) 1362 { 1363 // Optimization: keep a running hash of the strings, 1364 // only call equal if the hashes match. 1365 1366 // delta is the number of additional times to test; delta == 0 means test only once. 1367 unsigned delta = min(index, length - matchLength); 1368 1369 unsigned searchHash = 0; 1370 unsigned matchHash = 0; 1371 for (unsigned i = 0; i < matchLength; ++i) { 1372 searchHash += searchCharacters[delta + i]; 1373 matchHash += matchCharacters[i]; 1374 } 1375 1376 // keep looping until we match 1377 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) { 1378 if (!delta) 1379 return kNotFound; 1380 --delta; 1381 searchHash -= searchCharacters[delta + matchLength]; 1382 searchHash += searchCharacters[delta]; 1383 } 1384 return delta; 1385 } 1386 1387 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) 1388 { 1389 // Check for null or empty string to match against 1390 if (!matchString) 1391 return kNotFound; 1392 unsigned matchLength = matchString->length(); 1393 unsigned ourLength = length(); 1394 if (!matchLength) 1395 return min(index, ourLength); 1396 1397 // Optimization 1: fast case for strings of length 1. 1398 if (matchLength == 1) { 1399 if (is8Bit()) 1400 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index); 1401 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index); 1402 } 1403 1404 // Check index & matchLength are in range. 1405 if (matchLength > ourLength) 1406 return kNotFound; 1407 1408 if (is8Bit()) { 1409 if (matchString->is8Bit()) 1410 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength); 1411 return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength); 1412 } 1413 1414 if (matchString->is8Bit()) 1415 return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength); 1416 1417 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength); 1418 } 1419 1420 template <typename SearchCharacterType, typename MatchCharacterType> 1421 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength) 1422 { 1423 // delta is the number of additional times to test; delta == 0 means test only once. 1424 unsigned delta = min(index, length - matchLength); 1425 1426 // keep looping until we match 1427 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { 1428 if (!delta) 1429 return kNotFound; 1430 --delta; 1431 } 1432 return delta; 1433 } 1434 1435 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) 1436 { 1437 // Check for null or empty string to match against 1438 if (!matchString) 1439 return kNotFound; 1440 unsigned matchLength = matchString->length(); 1441 unsigned ourLength = length(); 1442 if (!matchLength) 1443 return min(index, ourLength); 1444 1445 // Check index & matchLength are in range. 1446 if (matchLength > ourLength) 1447 return kNotFound; 1448 1449 if (is8Bit()) { 1450 if (matchString->is8Bit()) 1451 return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength); 1452 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength); 1453 } 1454 1455 if (matchString->is8Bit()) 1456 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength); 1457 1458 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength); 1459 } 1460 1461 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive) 1462 { 1463 ASSERT(stringImpl); 1464 ASSERT(matchLength <= stringImpl->length()); 1465 ASSERT(startOffset + matchLength <= stringImpl->length()); 1466 1467 if (caseSensitive) { 1468 if (stringImpl->is8Bit()) 1469 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1470 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1471 } 1472 if (stringImpl->is8Bit()) 1473 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1474 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1475 } 1476 1477 bool StringImpl::startsWith(UChar character) const 1478 { 1479 return m_length && (*this)[0] == character; 1480 } 1481 1482 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const 1483 { 1484 ASSERT(matchLength); 1485 if (matchLength > length()) 1486 return false; 1487 return equalInner(this, 0, matchString, matchLength, caseSensitive); 1488 } 1489 1490 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) 1491 { 1492 ASSERT(matchString); 1493 if (m_length >= matchString->m_length) { 1494 unsigned start = m_length - matchString->m_length; 1495 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start; 1496 } 1497 return false; 1498 } 1499 1500 bool StringImpl::endsWith(UChar character) const 1501 { 1502 return m_length && (*this)[m_length - 1] == character; 1503 } 1504 1505 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const 1506 { 1507 ASSERT(matchLength); 1508 if (matchLength > length()) 1509 return false; 1510 unsigned startOffset = length() - matchLength; 1511 return equalInner(this, startOffset, matchString, matchLength, caseSensitive); 1512 } 1513 1514 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) 1515 { 1516 if (oldC == newC) 1517 return this; 1518 unsigned i; 1519 for (i = 0; i != m_length; ++i) { 1520 UChar c = is8Bit() ? characters8()[i] : characters16()[i]; 1521 if (c == oldC) 1522 break; 1523 } 1524 if (i == m_length) 1525 return this; 1526 1527 if (is8Bit()) { 1528 if (oldC > 0xff) 1529 // Looking for a 16 bit char in an 8 bit string, we're done. 1530 return this; 1531 1532 if (newC <= 0xff) { 1533 LChar* data; 1534 LChar oldChar = static_cast<LChar>(oldC); 1535 LChar newChar = static_cast<LChar>(newC); 1536 1537 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1538 1539 for (i = 0; i != m_length; ++i) { 1540 LChar ch = characters8()[i]; 1541 if (ch == oldChar) 1542 ch = newChar; 1543 data[i] = ch; 1544 } 1545 return newImpl.release(); 1546 } 1547 1548 // There is the possibility we need to up convert from 8 to 16 bit, 1549 // create a 16 bit string for the result. 1550 UChar* data; 1551 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1552 1553 for (i = 0; i != m_length; ++i) { 1554 UChar ch = characters8()[i]; 1555 if (ch == oldC) 1556 ch = newC; 1557 data[i] = ch; 1558 } 1559 1560 return newImpl.release(); 1561 } 1562 1563 UChar* data; 1564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1565 1566 for (i = 0; i != m_length; ++i) { 1567 UChar ch = characters16()[i]; 1568 if (ch == oldC) 1569 ch = newC; 1570 data[i] = ch; 1571 } 1572 return newImpl.release(); 1573 } 1574 1575 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) 1576 { 1577 position = min(position, length()); 1578 lengthToReplace = min(lengthToReplace, length() - position); 1579 unsigned lengthToInsert = str ? str->length() : 0; 1580 if (!lengthToReplace && !lengthToInsert) 1581 return this; 1582 1583 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert)); 1584 1585 if (is8Bit() && (!str || str->is8Bit())) { 1586 LChar* data; 1587 RefPtr<StringImpl> newImpl = 1588 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1589 memcpy(data, characters8(), position * sizeof(LChar)); 1590 if (str) 1591 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar)); 1592 memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace, 1593 (length() - position - lengthToReplace) * sizeof(LChar)); 1594 return newImpl.release(); 1595 } 1596 UChar* data; 1597 RefPtr<StringImpl> newImpl = 1598 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1599 if (is8Bit()) 1600 for (unsigned i = 0; i < position; ++i) 1601 data[i] = characters8()[i]; 1602 else 1603 memcpy(data, characters16(), position * sizeof(UChar)); 1604 if (str) { 1605 if (str->is8Bit()) 1606 for (unsigned i = 0; i < lengthToInsert; ++i) 1607 data[i + position] = str->characters8()[i]; 1608 else 1609 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar)); 1610 } 1611 if (is8Bit()) { 1612 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) 1613 data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace]; 1614 } else { 1615 memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace, 1616 (length() - position - lengthToReplace) * sizeof(UChar)); 1617 } 1618 return newImpl.release(); 1619 } 1620 1621 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) 1622 { 1623 if (!replacement) 1624 return this; 1625 1626 if (replacement->is8Bit()) 1627 return replace(pattern, replacement->characters8(), replacement->length()); 1628 1629 return replace(pattern, replacement->characters16(), replacement->length()); 1630 } 1631 1632 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength) 1633 { 1634 ASSERT(replacement); 1635 1636 size_t srcSegmentStart = 0; 1637 unsigned matchCount = 0; 1638 1639 // Count the matches. 1640 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1641 ++matchCount; 1642 ++srcSegmentStart; 1643 } 1644 1645 // If we have 0 matches then we don't have to do any more work. 1646 if (!matchCount) 1647 return this; 1648 1649 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength); 1650 1651 unsigned replaceSize = matchCount * repStrLength; 1652 unsigned newSize = m_length - matchCount; 1653 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 1654 1655 newSize += replaceSize; 1656 1657 // Construct the new data. 1658 size_t srcSegmentEnd; 1659 unsigned srcSegmentLength; 1660 srcSegmentStart = 0; 1661 unsigned dstOffset = 0; 1662 1663 if (is8Bit()) { 1664 LChar* data; 1665 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1666 1667 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1668 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1669 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1670 dstOffset += srcSegmentLength; 1671 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); 1672 dstOffset += repStrLength; 1673 srcSegmentStart = srcSegmentEnd + 1; 1674 } 1675 1676 srcSegmentLength = m_length - srcSegmentStart; 1677 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1678 1679 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1680 1681 return newImpl.release(); 1682 } 1683 1684 UChar* data; 1685 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1686 1687 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1688 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1689 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1690 1691 dstOffset += srcSegmentLength; 1692 for (unsigned i = 0; i < repStrLength; ++i) 1693 data[i + dstOffset] = replacement[i]; 1694 1695 dstOffset += repStrLength; 1696 srcSegmentStart = srcSegmentEnd + 1; 1697 } 1698 1699 srcSegmentLength = m_length - srcSegmentStart; 1700 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1701 1702 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1703 1704 return newImpl.release(); 1705 } 1706 1707 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength) 1708 { 1709 ASSERT(replacement); 1710 1711 size_t srcSegmentStart = 0; 1712 unsigned matchCount = 0; 1713 1714 // Count the matches. 1715 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1716 ++matchCount; 1717 ++srcSegmentStart; 1718 } 1719 1720 // If we have 0 matches then we don't have to do any more work. 1721 if (!matchCount) 1722 return this; 1723 1724 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength); 1725 1726 unsigned replaceSize = matchCount * repStrLength; 1727 unsigned newSize = m_length - matchCount; 1728 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 1729 1730 newSize += replaceSize; 1731 1732 // Construct the new data. 1733 size_t srcSegmentEnd; 1734 unsigned srcSegmentLength; 1735 srcSegmentStart = 0; 1736 unsigned dstOffset = 0; 1737 1738 if (is8Bit()) { 1739 UChar* data; 1740 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1741 1742 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1743 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1744 for (unsigned i = 0; i < srcSegmentLength; ++i) 1745 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1746 1747 dstOffset += srcSegmentLength; 1748 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 1749 1750 dstOffset += repStrLength; 1751 srcSegmentStart = srcSegmentEnd + 1; 1752 } 1753 1754 srcSegmentLength = m_length - srcSegmentStart; 1755 for (unsigned i = 0; i < srcSegmentLength; ++i) 1756 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1757 1758 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1759 1760 return newImpl.release(); 1761 } 1762 1763 UChar* data; 1764 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1765 1766 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1767 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1768 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1769 1770 dstOffset += srcSegmentLength; 1771 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 1772 1773 dstOffset += repStrLength; 1774 srcSegmentStart = srcSegmentEnd + 1; 1775 } 1776 1777 srcSegmentLength = m_length - srcSegmentStart; 1778 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1779 1780 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1781 1782 return newImpl.release(); 1783 } 1784 1785 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) 1786 { 1787 if (!pattern || !replacement) 1788 return this; 1789 1790 unsigned patternLength = pattern->length(); 1791 if (!patternLength) 1792 return this; 1793 1794 unsigned repStrLength = replacement->length(); 1795 size_t srcSegmentStart = 0; 1796 unsigned matchCount = 0; 1797 1798 // Count the matches. 1799 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1800 ++matchCount; 1801 srcSegmentStart += patternLength; 1802 } 1803 1804 // If we have 0 matches, we don't have to do any more work 1805 if (!matchCount) 1806 return this; 1807 1808 unsigned newSize = m_length - matchCount * patternLength; 1809 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength); 1810 1811 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength)); 1812 1813 newSize += matchCount * repStrLength; 1814 1815 1816 // Construct the new data 1817 size_t srcSegmentEnd; 1818 unsigned srcSegmentLength; 1819 srcSegmentStart = 0; 1820 unsigned dstOffset = 0; 1821 bool srcIs8Bit = is8Bit(); 1822 bool replacementIs8Bit = replacement->is8Bit(); 1823 1824 // There are 4 cases: 1825 // 1. This and replacement are both 8 bit. 1826 // 2. This and replacement are both 16 bit. 1827 // 3. This is 8 bit and replacement is 16 bit. 1828 // 4. This is 16 bit and replacement is 8 bit. 1829 if (srcIs8Bit && replacementIs8Bit) { 1830 // Case 1 1831 LChar* data; 1832 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1833 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1834 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1835 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1836 dstOffset += srcSegmentLength; 1837 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar)); 1838 dstOffset += repStrLength; 1839 srcSegmentStart = srcSegmentEnd + patternLength; 1840 } 1841 1842 srcSegmentLength = m_length - srcSegmentStart; 1843 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1844 1845 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1846 1847 return newImpl.release(); 1848 } 1849 1850 UChar* data; 1851 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1852 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1853 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1854 if (srcIs8Bit) { 1855 // Case 3. 1856 for (unsigned i = 0; i < srcSegmentLength; ++i) 1857 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1858 } else { 1859 // Case 2 & 4. 1860 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1861 } 1862 dstOffset += srcSegmentLength; 1863 if (replacementIs8Bit) { 1864 // Cases 2 & 3. 1865 for (unsigned i = 0; i < repStrLength; ++i) 1866 data[i + dstOffset] = replacement->characters8()[i]; 1867 } else { 1868 // Case 4 1869 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar)); 1870 } 1871 dstOffset += repStrLength; 1872 srcSegmentStart = srcSegmentEnd + patternLength; 1873 } 1874 1875 srcSegmentLength = m_length - srcSegmentStart; 1876 if (srcIs8Bit) { 1877 // Case 3. 1878 for (unsigned i = 0; i < srcSegmentLength; ++i) 1879 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1880 } else { 1881 // Cases 2 & 4. 1882 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1883 } 1884 1885 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1886 1887 return newImpl.release(); 1888 } 1889 1890 PassRefPtr<StringImpl> StringImpl::upconvertedString() 1891 { 1892 if (is8Bit()) 1893 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl(); 1894 return this; 1895 } 1896 1897 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) 1898 { 1899 unsigned aLength = a->length(); 1900 unsigned bLength = b->length(); 1901 if (aLength != bLength) 1902 return false; 1903 1904 if (a->is8Bit()) { 1905 if (b->is8Bit()) 1906 return equal(a->characters8(), b->characters8(), aLength); 1907 1908 return equal(a->characters8(), b->characters16(), aLength); 1909 } 1910 1911 if (b->is8Bit()) 1912 return equal(a->characters16(), b->characters8(), aLength); 1913 1914 return equal(a->characters16(), b->characters16(), aLength); 1915 } 1916 1917 bool equal(const StringImpl* a, const StringImpl* b) 1918 { 1919 if (a == b) 1920 return true; 1921 if (!a || !b) 1922 return false; 1923 1924 return stringImplContentEqual(a, b); 1925 } 1926 1927 template <typename CharType> 1928 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length) 1929 { 1930 if (!a) 1931 return !b; 1932 if (!b) 1933 return false; 1934 1935 if (a->length() != length) 1936 return false; 1937 if (a->is8Bit()) 1938 return equal(a->characters8(), b, length); 1939 return equal(a->characters16(), b, length); 1940 } 1941 1942 bool equal(const StringImpl* a, const LChar* b, unsigned length) 1943 { 1944 return equalInternal(a, b, length); 1945 } 1946 1947 bool equal(const StringImpl* a, const UChar* b, unsigned length) 1948 { 1949 return equalInternal(a, b, length); 1950 } 1951 1952 bool equal(const StringImpl* a, const LChar* b) 1953 { 1954 if (!a) 1955 return !b; 1956 if (!b) 1957 return !a; 1958 1959 unsigned length = a->length(); 1960 1961 if (a->is8Bit()) { 1962 const LChar* aPtr = a->characters8(); 1963 for (unsigned i = 0; i != length; ++i) { 1964 LChar bc = b[i]; 1965 LChar ac = aPtr[i]; 1966 if (!bc) 1967 return false; 1968 if (ac != bc) 1969 return false; 1970 } 1971 1972 return !b[length]; 1973 } 1974 1975 const UChar* aPtr = a->characters16(); 1976 for (unsigned i = 0; i != length; ++i) { 1977 LChar bc = b[i]; 1978 if (!bc) 1979 return false; 1980 if (aPtr[i] != bc) 1981 return false; 1982 } 1983 1984 return !b[length]; 1985 } 1986 1987 bool equalNonNull(const StringImpl* a, const StringImpl* b) 1988 { 1989 ASSERT(a && b); 1990 if (a == b) 1991 return true; 1992 1993 return stringImplContentEqual(a, b); 1994 } 1995 1996 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) 1997 { 1998 if (a == b) 1999 return true; 2000 if (!a || !b) 2001 return false; 2002 2003 return CaseFoldingHash::equal(a, b); 2004 } 2005 2006 bool equalIgnoringCase(const StringImpl* a, const LChar* b) 2007 { 2008 if (!a) 2009 return !b; 2010 if (!b) 2011 return !a; 2012 2013 unsigned length = a->length(); 2014 2015 // Do a faster loop for the case where all the characters are ASCII. 2016 UChar ored = 0; 2017 bool equal = true; 2018 if (a->is8Bit()) { 2019 const LChar* as = a->characters8(); 2020 for (unsigned i = 0; i != length; ++i) { 2021 LChar bc = b[i]; 2022 if (!bc) 2023 return false; 2024 UChar ac = as[i]; 2025 ored |= ac; 2026 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); 2027 } 2028 2029 // Do a slower implementation for cases that include non-ASCII characters. 2030 if (ored & ~0x7F) { 2031 equal = true; 2032 for (unsigned i = 0; i != length; ++i) 2033 equal = equal && (foldCase(as[i]) == foldCase(b[i])); 2034 } 2035 2036 return equal && !b[length]; 2037 } 2038 2039 const UChar* as = a->characters16(); 2040 for (unsigned i = 0; i != length; ++i) { 2041 LChar bc = b[i]; 2042 if (!bc) 2043 return false; 2044 UChar ac = as[i]; 2045 ored |= ac; 2046 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); 2047 } 2048 2049 // Do a slower implementation for cases that include non-ASCII characters. 2050 if (ored & ~0x7F) { 2051 equal = true; 2052 for (unsigned i = 0; i != length; ++i) { 2053 equal = equal && (foldCase(as[i]) == foldCase(b[i])); 2054 } 2055 } 2056 2057 return equal && !b[length]; 2058 } 2059 2060 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) 2061 { 2062 ASSERT(a && b); 2063 if (a == b) 2064 return true; 2065 2066 unsigned length = a->length(); 2067 if (length != b->length()) 2068 return false; 2069 2070 if (a->is8Bit()) { 2071 if (b->is8Bit()) 2072 return equalIgnoringCase(a->characters8(), b->characters8(), length); 2073 2074 return equalIgnoringCase(b->characters16(), a->characters8(), length); 2075 } 2076 2077 if (b->is8Bit()) 2078 return equalIgnoringCase(a->characters16(), b->characters8(), length); 2079 2080 return equalIgnoringCase(a->characters16(), b->characters16(), length); 2081 } 2082 2083 bool equalIgnoringNullity(StringImpl* a, StringImpl* b) 2084 { 2085 if (!a && b && !b->length()) 2086 return true; 2087 if (!b && a && !a->length()) 2088 return true; 2089 return equal(a, b); 2090 } 2091 2092 size_t StringImpl::sizeInBytes() const 2093 { 2094 size_t size = length(); 2095 if (!is8Bit()) 2096 size *= 2; 2097 return size + sizeof(*this); 2098 } 2099 2100 } // namespace WTF 2101