1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef StringImpl_h 24 #define StringImpl_h 25 26 #include <limits.h> 27 #include "wtf/ASCIICType.h" 28 #include "wtf/Forward.h" 29 #include "wtf/HashMap.h" 30 #include "wtf/StringHasher.h" 31 #include "wtf/Vector.h" 32 #include "wtf/WTFExport.h" 33 #include "wtf/unicode/Unicode.h" 34 35 #if USE(CF) 36 typedef const struct __CFString * CFStringRef; 37 #endif 38 39 #ifdef __OBJC__ 40 @class NSString; 41 #endif 42 43 namespace WTF { 44 45 struct AlreadyHashed; 46 struct CStringTranslator; 47 template<typename CharacterType> struct HashAndCharactersTranslator; 48 struct HashAndUTF8CharactersTranslator; 49 struct LCharBufferTranslator; 50 struct CharBufferFromLiteralDataTranslator; 51 struct SubstringTranslator; 52 struct UCharBufferTranslator; 53 template<typename> class RetainPtr; 54 55 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; 56 57 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace }; 58 59 typedef bool (*CharacterMatchFunctionPtr)(UChar); 60 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); 61 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable; 62 63 // Define STRING_STATS to turn on run time statistics of string sizes and memory usage 64 #undef STRING_STATS 65 66 #ifdef STRING_STATS 67 struct StringStats { 68 inline void add8BitString(unsigned length) 69 { 70 ++m_totalNumberStrings; 71 ++m_number8BitStrings; 72 m_total8BitData += length; 73 } 74 75 inline void add16BitString(unsigned length) 76 { 77 ++m_totalNumberStrings; 78 ++m_number16BitStrings; 79 m_total16BitData += length; 80 } 81 82 void removeString(StringImpl*); 83 void printStats(); 84 85 static const unsigned s_printStringStatsFrequency = 5000; 86 static unsigned s_stringRemovesTillPrintStats; 87 88 unsigned m_totalNumberStrings; 89 unsigned m_number8BitStrings; 90 unsigned m_number16BitStrings; 91 unsigned long long m_total8BitData; 92 unsigned long long m_total16BitData; 93 }; 94 95 void addStringForStats(StringImpl*); 96 void removeStringForStats(StringImpl*); 97 98 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length); addStringForStats(this) 99 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length); addStringForStats(this) 100 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string); removeStringForStats(this) 101 #else 102 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) 103 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) 104 #define STRING_STATS_REMOVE_STRING(string) ((void)0) 105 #endif 106 107 // You can find documentation about this class in this doc: 108 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing 109 class WTF_EXPORT StringImpl { 110 WTF_MAKE_NONCOPYABLE(StringImpl); 111 friend struct WTF::CStringTranslator; 112 template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator; 113 friend struct WTF::HashAndUTF8CharactersTranslator; 114 friend struct WTF::CharBufferFromLiteralDataTranslator; 115 friend struct WTF::LCharBufferTranslator; 116 friend struct WTF::SubstringTranslator; 117 friend struct WTF::UCharBufferTranslator; 118 119 private: 120 // StringImpls are allocated out of the WTF buffer partition. 121 void* operator new(size_t); 122 void* operator new(size_t, void* ptr) { return ptr; }; 123 void operator delete(void*); 124 125 // Used to construct static strings, which have an special refCount that can never hit zero. 126 // This means that the static string will never be destroyed, which is important because 127 // static strings will be shared across threads & ref-counted in a non-threadsafe manner. 128 enum ConstructEmptyStringTag { ConstructEmptyString }; 129 explicit StringImpl(ConstructEmptyStringTag) 130 : m_refCount(1) 131 , m_length(0) 132 , m_hash(0) 133 , m_isAtomic(false) 134 , m_is8Bit(true) 135 , m_isStatic(true) 136 { 137 // Ensure that the hash is computed so that AtomicStringHash can call existingHash() 138 // with impunity. The empty string is special because it is never entered into 139 // AtomicString's HashKey, but still needs to compare correctly. 140 STRING_STATS_ADD_8BIT_STRING(m_length); 141 hash(); 142 } 143 144 // FIXME: there has to be a less hacky way to do this. 145 enum Force8Bit { Force8BitConstructor }; 146 StringImpl(unsigned length, Force8Bit) 147 : m_refCount(1) 148 , m_length(length) 149 , m_hash(0) 150 , m_isAtomic(false) 151 , m_is8Bit(true) 152 , m_isStatic(false) 153 { 154 ASSERT(m_length); 155 STRING_STATS_ADD_8BIT_STRING(m_length); 156 } 157 158 StringImpl(unsigned length) 159 : m_refCount(1) 160 , m_length(length) 161 , m_hash(0) 162 , m_isAtomic(false) 163 , m_is8Bit(false) 164 , m_isStatic(false) 165 { 166 ASSERT(m_length); 167 STRING_STATS_ADD_16BIT_STRING(m_length); 168 } 169 170 enum StaticStringTag { StaticString }; 171 StringImpl(unsigned length, unsigned hash, StaticStringTag) 172 : m_refCount(1) 173 , m_length(length) 174 , m_hash(hash) 175 , m_isAtomic(false) 176 , m_is8Bit(true) 177 , m_isStatic(true) 178 { 179 } 180 181 public: 182 ~StringImpl(); 183 184 static StringImpl* createStatic(const char* string, unsigned length, unsigned hash); 185 static void freezeStaticStrings(); 186 static const StaticStringsTable& allStaticStrings(); 187 static unsigned highestStaticStringLength() { return m_highestStaticStringLength; } 188 189 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); 190 static PassRefPtr<StringImpl> create(const LChar*, unsigned length); 191 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned length); 192 template<size_t inlineCapacity> 193 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) 194 { 195 return create8BitIfPossible(vector.data(), vector.size()); 196 } 197 198 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } 199 static PassRefPtr<StringImpl> create(const LChar*); 200 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } 201 202 static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data); 203 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); 204 205 // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr. 206 // Just like the input pointer of realloc(), the originalString can't be used after this function. 207 static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length); 208 209 // If this StringImpl has only one reference, we can truncate the string by updating 210 // its m_length property without actually re-allocating its buffer. 211 void truncateAssumingIsolated(unsigned length) 212 { 213 ASSERT(hasOneRef()); 214 ASSERT(length <= m_length); 215 m_length = length; 216 } 217 218 unsigned length() const { return m_length; } 219 bool is8Bit() const { return m_is8Bit; } 220 221 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return reinterpret_cast<const LChar*>(this + 1); } 222 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return reinterpret_cast<const UChar*>(this + 1); } 223 224 template <typename CharType> 225 ALWAYS_INLINE const CharType * getCharacters() const; 226 227 size_t sizeInBytes() const; 228 229 bool isAtomic() const { return m_isAtomic; } 230 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; } 231 232 bool isStatic() const { return m_isStatic; } 233 234 private: 235 // The high bits of 'hash' are always empty, but we prefer to store our flags 236 // in the low bits because it makes them slightly more efficient to access. 237 // So, we shift left and right when setting and getting our hash code. 238 void setHash(unsigned hash) const 239 { 240 ASSERT(!hasHash()); 241 // Multiple clients assume that StringHasher is the canonical string hash function. 242 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(characters8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(), m_length))); 243 m_hash = hash; 244 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. 245 } 246 247 unsigned rawHash() const 248 { 249 return m_hash; 250 } 251 252 void destroyIfNotStatic(); 253 254 public: 255 bool hasHash() const 256 { 257 return rawHash() != 0; 258 } 259 260 unsigned existingHash() const 261 { 262 ASSERT(hasHash()); 263 return rawHash(); 264 } 265 266 unsigned hash() const 267 { 268 if (hasHash()) 269 return existingHash(); 270 return hashSlowCase(); 271 } 272 273 ALWAYS_INLINE bool hasOneRef() const 274 { 275 return m_refCount == 1; 276 } 277 278 ALWAYS_INLINE void ref() 279 { 280 ++m_refCount; 281 } 282 283 ALWAYS_INLINE void deref() 284 { 285 if (hasOneRef()) { 286 destroyIfNotStatic(); 287 return; 288 } 289 290 --m_refCount; 291 } 292 293 static StringImpl* empty(); 294 295 // FIXME: Does this really belong in StringImpl? 296 template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters) 297 { 298 memcpy(destination, source, numCharacters * sizeof(T)); 299 } 300 301 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters) 302 { 303 for (unsigned i = 0; i < numCharacters; ++i) 304 destination[i] = source[i]; 305 } 306 307 // Some string features, like refcounting and the atomicity flag, are not 308 // thread-safe. We achieve thread safety by isolation, giving each thread 309 // its own copy of the string. 310 PassRefPtr<StringImpl> isolatedCopy() const; 311 312 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); 313 314 UChar operator[](unsigned i) const 315 { 316 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); 317 if (is8Bit()) 318 return characters8()[i]; 319 return characters16()[i]; 320 } 321 UChar32 characterStartingAt(unsigned); 322 323 bool containsOnlyWhitespace(); 324 325 int toIntStrict(bool* ok = 0, int base = 10); 326 unsigned toUIntStrict(bool* ok = 0, int base = 10); 327 int64_t toInt64Strict(bool* ok = 0, int base = 10); 328 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); 329 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); 330 331 int toInt(bool* ok = 0); // ignores trailing garbage 332 unsigned toUInt(bool* ok = 0); // ignores trailing garbage 333 int64_t toInt64(bool* ok = 0); // ignores trailing garbage 334 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage 335 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage 336 337 // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. 338 // Like the non-strict functions above, these return the value when there is trailing garbage. 339 // It would be better if these were more consistent with the above functions instead. 340 double toDouble(bool* ok = 0); 341 float toFloat(bool* ok = 0); 342 343 PassRefPtr<StringImpl> lower(); 344 PassRefPtr<StringImpl> upper(); 345 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier); 346 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier); 347 348 PassRefPtr<StringImpl> fill(UChar); 349 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII? 350 PassRefPtr<StringImpl> foldCase(); 351 352 PassRefPtr<StringImpl> stripWhiteSpace(); 353 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); 354 PassRefPtr<StringImpl> simplifyWhiteSpace(StripBehavior stripBehavior = StripExtraWhiteSpace); 355 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr, StripBehavior stripBehavior = StripExtraWhiteSpace); 356 357 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); 358 template <typename CharType> 359 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); 360 361 size_t find(LChar character, unsigned start = 0); 362 size_t find(char character, unsigned start = 0); 363 size_t find(UChar character, unsigned start = 0); 364 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); 365 size_t find(const LChar*, unsigned index = 0); 366 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(s), index); } 367 size_t find(StringImpl*); 368 size_t find(StringImpl*, unsigned index); 369 size_t findIgnoringCase(const LChar*, unsigned index = 0); 370 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } 371 size_t findIgnoringCase(StringImpl*, unsigned index = 0); 372 373 size_t findNextLineStart(unsigned index = UINT_MAX); 374 375 size_t reverseFind(UChar, unsigned index = UINT_MAX); 376 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); 377 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); 378 379 size_t count(LChar) const; 380 381 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } 382 bool startsWith(UChar) const; 383 bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const; 384 template<unsigned matchLength> 385 bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); } 386 387 bool endsWith(StringImpl*, bool caseSensitive = true); 388 bool endsWith(UChar) const; 389 bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const; 390 template<unsigned matchLength> 391 bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); } 392 393 PassRefPtr<StringImpl> replace(UChar, UChar); 394 PassRefPtr<StringImpl> replace(UChar, StringImpl*); 395 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } 396 PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); 397 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); 398 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); 399 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); 400 PassRefPtr<StringImpl> upconvertedString(); 401 402 #if USE(CF) 403 RetainPtr<CFStringRef> createCFString(); 404 #endif 405 #ifdef __OBJC__ 406 operator NSString*(); 407 #endif 408 409 #ifdef STRING_STATS 410 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } 411 #endif 412 413 private: 414 template<typename CharType> static size_t allocationSize(unsigned length) 415 { 416 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharType))); 417 return sizeof(StringImpl) + length * sizeof(CharType); 418 } 419 420 template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate); 421 template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate, StripBehavior); 422 NEVER_INLINE unsigned hashSlowCase() const; 423 424 #ifdef STRING_STATS 425 static StringStats m_stringStats; 426 #endif 427 428 static unsigned m_highestStaticStringLength; 429 430 #ifndef NDEBUG 431 void assertHashIsCorrect() 432 { 433 ASSERT(hasHash()); 434 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length())); 435 } 436 #endif 437 438 private: 439 unsigned m_refCount; 440 unsigned m_length; 441 mutable unsigned m_hash : 24; 442 unsigned m_isAtomic : 1; 443 unsigned m_is8Bit : 1; 444 unsigned m_isStatic : 1; 445 }; 446 447 template <> 448 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); } 449 450 template <> 451 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return characters16(); } 452 453 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*); 454 WTF_EXPORT bool equal(const StringImpl*, const LChar*); 455 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); } 456 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned); 457 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned); 458 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } 459 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } 460 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } 461 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b); 462 463 template<typename CharType> 464 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length) { return !memcmp(a, b, length * sizeof(CharType)); } 465 466 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) 467 { 468 for (unsigned i = 0; i < length; ++i) { 469 if (a[i] != b[i]) 470 return false; 471 } 472 return true; 473 } 474 475 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } 476 477 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*); 478 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*); 479 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); } 480 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned); 481 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned); 482 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } 483 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 484 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 485 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 486 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) 487 { 488 ASSERT(length >= 0); 489 return !Unicode::umemcasecmp(a, b, length); 490 } 491 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); 492 493 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*); 494 495 template<typename CharacterType> 496 inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) 497 { 498 while (index < length) { 499 if (characters[index] == matchCharacter) 500 return index; 501 ++index; 502 } 503 return kNotFound; 504 } 505 506 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) 507 { 508 return find(characters, length, static_cast<UChar>(matchCharacter), index); 509 } 510 511 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) 512 { 513 if (matchCharacter & ~0xFF) 514 return kNotFound; 515 return find(characters, length, static_cast<LChar>(matchCharacter), index); 516 } 517 518 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 519 { 520 while (index < length) { 521 if (matchFunction(characters[index])) 522 return index; 523 ++index; 524 } 525 return kNotFound; 526 } 527 528 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 529 { 530 while (index < length) { 531 if (matchFunction(characters[index])) 532 return index; 533 ++index; 534 } 535 return kNotFound; 536 } 537 538 template<typename CharacterType> 539 inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0) 540 { 541 while (index < length) { 542 CharacterType c = characters[index++]; 543 if ((c != '\n') && (c != '\r')) 544 continue; 545 546 // There can only be a start of a new line if there are more characters 547 // beyond the current character. 548 if (index < length) { 549 // The 3 common types of line terminators are 1. \r\n (Windows), 550 // 2. \r (old MacOS) and 3. \n (Unix'es). 551 552 if (c == '\n') 553 return index; // Case 3: just \n. 554 555 CharacterType c2 = characters[index]; 556 if (c2 != '\n') 557 return index; // Case 2: just \r. 558 559 // Case 1: \r\n. 560 // But, there's only a start of a new line if there are more 561 // characters beyond the \r\n. 562 if (++index < length) 563 return index; 564 } 565 } 566 return kNotFound; 567 } 568 569 template<typename CharacterType> 570 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX) 571 { 572 if (!length) 573 return kNotFound; 574 if (index >= length) 575 index = length - 1; 576 CharacterType c = characters[index]; 577 while ((c != '\n') && (c != '\r')) { 578 if (!index--) 579 return kNotFound; 580 c = characters[index]; 581 } 582 return index; 583 } 584 585 template<typename CharacterType> 586 inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = UINT_MAX) 587 { 588 if (!length) 589 return kNotFound; 590 if (index >= length) 591 index = length - 1; 592 while (characters[index] != matchCharacter) { 593 if (!index--) 594 return kNotFound; 595 } 596 return index; 597 } 598 599 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX) 600 { 601 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index); 602 } 603 604 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) 605 { 606 if (matchCharacter & ~0xFF) 607 return kNotFound; 608 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index); 609 } 610 611 inline size_t StringImpl::find(LChar character, unsigned start) 612 { 613 if (is8Bit()) 614 return WTF::find(characters8(), m_length, character, start); 615 return WTF::find(characters16(), m_length, character, start); 616 } 617 618 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) 619 { 620 return find(static_cast<LChar>(character), start); 621 } 622 623 inline size_t StringImpl::find(UChar character, unsigned start) 624 { 625 if (is8Bit()) 626 return WTF::find(characters8(), m_length, character, start); 627 return WTF::find(characters16(), m_length, character, start); 628 } 629 630 inline unsigned lengthOfNullTerminatedString(const UChar* string) 631 { 632 size_t length = 0; 633 while (string[length] != UChar(0)) 634 ++length; 635 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); 636 return static_cast<unsigned>(length); 637 } 638 639 template<size_t inlineCapacity> 640 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) 641 { 642 if (!b) 643 return !a.size(); 644 if (a.size() != b->length()) 645 return false; 646 if (b->is8Bit()) 647 return equal(a.data(), b->characters8(), b->length()); 648 return equal(a.data(), b->characters16(), b->length()); 649 } 650 651 template<typename CharacterType1, typename CharacterType2> 652 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) 653 { 654 const unsigned lmin = l1 < l2 ? l1 : l2; 655 unsigned pos = 0; 656 while (pos < lmin && *c1 == *c2) { 657 ++c1; 658 ++c2; 659 ++pos; 660 } 661 662 if (pos < lmin) 663 return (c1[0] > c2[0]) ? 1 : -1; 664 665 if (l1 == l2) 666 return 0; 667 668 return (l1 > l2) ? 1 : -1; 669 } 670 671 static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) 672 { 673 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8()); 674 } 675 676 static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) 677 { 678 return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16()); 679 } 680 681 static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) 682 { 683 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16()); 684 } 685 686 static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) 687 { 688 if (!string1) 689 return (string2 && string2->length()) ? -1 : 0; 690 691 if (!string2) 692 return string1->length() ? 1 : 0; 693 694 bool string1Is8Bit = string1->is8Bit(); 695 bool string2Is8Bit = string2->is8Bit(); 696 if (string1Is8Bit) { 697 if (string2Is8Bit) 698 return codePointCompare8(string1, string2); 699 return codePointCompare8To16(string1, string2); 700 } 701 if (string2Is8Bit) 702 return -codePointCompare8To16(string2, string1); 703 return codePointCompare16(string1, string2); 704 } 705 706 static inline bool isSpaceOrNewline(UChar c) 707 { 708 // Use isASCIISpace() for basic Latin-1. 709 // This will include newlines, which aren't included in Unicode DirWS. 710 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; 711 } 712 713 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const 714 { 715 if (is8Bit()) 716 return create(characters8(), m_length); 717 return create(characters16(), m_length); 718 } 719 720 struct StringHash; 721 722 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> 723 template<typename T> struct DefaultHash; 724 template<> struct DefaultHash<StringImpl*> { 725 typedef StringHash Hash; 726 }; 727 template<> struct DefaultHash<RefPtr<StringImpl> > { 728 typedef StringHash Hash; 729 }; 730 731 } 732 733 using WTF::StringImpl; 734 using WTF::equal; 735 using WTF::equalNonNull; 736 using WTF::TextCaseSensitivity; 737 using WTF::TextCaseSensitive; 738 using WTF::TextCaseInsensitive; 739 740 #endif 741