1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef StringImpl_h 24 #define StringImpl_h 25 26 #include <limits.h> 27 #include "wtf/ASCIICType.h" 28 #include "wtf/Forward.h" 29 #include "wtf/HashMap.h" 30 #include "wtf/StringHasher.h" 31 #include "wtf/Vector.h" 32 #include "wtf/WTFExport.h" 33 #include "wtf/unicode/Unicode.h" 34 35 #if USE(CF) 36 typedef const struct __CFString * CFStringRef; 37 #endif 38 39 #ifdef __OBJC__ 40 @class NSString; 41 #endif 42 43 namespace WTF { 44 45 struct AlreadyHashed; 46 struct CStringTranslator; 47 template<typename CharacterType> struct HashAndCharactersTranslator; 48 struct HashAndUTF8CharactersTranslator; 49 struct LCharBufferTranslator; 50 struct CharBufferFromLiteralDataTranslator; 51 struct SubstringTranslator; 52 struct UCharBufferTranslator; 53 template<typename> class RetainPtr; 54 55 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; 56 57 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace }; 58 59 typedef bool (*CharacterMatchFunctionPtr)(UChar); 60 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar); 61 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable; 62 63 // Define STRING_STATS to turn on run time statistics of string sizes and memory usage 64 #undef STRING_STATS 65 66 #ifdef STRING_STATS 67 struct StringStats { 68 inline void add8BitString(unsigned length) 69 { 70 ++m_totalNumberStrings; 71 ++m_number8BitStrings; 72 m_total8BitData += length; 73 } 74 75 inline void add16BitString(unsigned length) 76 { 77 ++m_totalNumberStrings; 78 ++m_number16BitStrings; 79 m_total16BitData += length; 80 } 81 82 void removeString(StringImpl*); 83 void printStats(); 84 85 static const unsigned s_printStringStatsFrequency = 5000; 86 static unsigned s_stringRemovesTillPrintStats; 87 88 unsigned m_totalNumberStrings; 89 unsigned m_number8BitStrings; 90 unsigned m_number16BitStrings; 91 unsigned long long m_total8BitData; 92 unsigned long long m_total16BitData; 93 }; 94 95 void addStringForStats(StringImpl*); 96 void removeStringForStats(StringImpl*); 97 98 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length); addStringForStats(this) 99 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length); addStringForStats(this) 100 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string); removeStringForStats(this) 101 #else 102 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0) 103 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0) 104 #define STRING_STATS_REMOVE_STRING(string) ((void)0) 105 #endif 106 107 // You can find documentation about this class in this doc: 108 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing 109 class WTF_EXPORT StringImpl { 110 WTF_MAKE_NONCOPYABLE(StringImpl); 111 friend struct WTF::CStringTranslator; 112 template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator; 113 friend struct WTF::HashAndUTF8CharactersTranslator; 114 friend struct WTF::CharBufferFromLiteralDataTranslator; 115 friend struct WTF::LCharBufferTranslator; 116 friend struct WTF::SubstringTranslator; 117 friend struct WTF::UCharBufferTranslator; 118 119 private: 120 // StringImpls are allocated out of the WTF buffer partition. 121 void* operator new(size_t); 122 void* operator new(size_t, void* ptr) { return ptr; }; 123 void operator delete(void*); 124 125 // Used to construct static strings, which have an special refCount that can never hit zero. 126 // This means that the static string will never be destroyed, which is important because 127 // static strings will be shared across threads & ref-counted in a non-threadsafe manner. 128 enum ConstructEmptyStringTag { ConstructEmptyString }; 129 explicit StringImpl(ConstructEmptyStringTag) 130 : m_refCount(1) 131 , m_length(0) 132 , m_hash(0) 133 , m_isAtomic(false) 134 , m_is8Bit(true) 135 , m_isStatic(true) 136 { 137 // Ensure that the hash is computed so that AtomicStringHash can call existingHash() 138 // with impunity. The empty string is special because it is never entered into 139 // AtomicString's HashKey, but still needs to compare correctly. 140 STRING_STATS_ADD_8BIT_STRING(m_length); 141 hash(); 142 } 143 144 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit }; 145 explicit StringImpl(ConstructEmptyString16BitTag) 146 : m_refCount(1) 147 , m_length(0) 148 , m_hash(0) 149 , m_isAtomic(false) 150 , m_is8Bit(false) 151 , m_isStatic(true) 152 { 153 STRING_STATS_ADD_16BIT_STRING(m_length); 154 hash(); 155 } 156 157 // FIXME: there has to be a less hacky way to do this. 158 enum Force8Bit { Force8BitConstructor }; 159 StringImpl(unsigned length, Force8Bit) 160 : m_refCount(1) 161 , m_length(length) 162 , m_hash(0) 163 , m_isAtomic(false) 164 , m_is8Bit(true) 165 , m_isStatic(false) 166 { 167 ASSERT(m_length); 168 STRING_STATS_ADD_8BIT_STRING(m_length); 169 } 170 171 StringImpl(unsigned length) 172 : m_refCount(1) 173 , m_length(length) 174 , m_hash(0) 175 , m_isAtomic(false) 176 , m_is8Bit(false) 177 , m_isStatic(false) 178 { 179 ASSERT(m_length); 180 STRING_STATS_ADD_16BIT_STRING(m_length); 181 } 182 183 enum StaticStringTag { StaticString }; 184 StringImpl(unsigned length, unsigned hash, StaticStringTag) 185 : m_refCount(1) 186 , m_length(length) 187 , m_hash(hash) 188 , m_isAtomic(false) 189 , m_is8Bit(true) 190 , m_isStatic(true) 191 { 192 } 193 194 public: 195 ~StringImpl(); 196 197 static StringImpl* createStatic(const char* string, unsigned length, unsigned hash); 198 static void freezeStaticStrings(); 199 static const StaticStringsTable& allStaticStrings(); 200 static unsigned highestStaticStringLength() { return m_highestStaticStringLength; } 201 202 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); 203 static PassRefPtr<StringImpl> create(const LChar*, unsigned length); 204 static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned length); 205 template<size_t inlineCapacity> 206 static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector) 207 { 208 return create8BitIfPossible(vector.data(), vector.size()); 209 } 210 211 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); } 212 static PassRefPtr<StringImpl> create(const LChar*); 213 ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); } 214 215 static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data); 216 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); 217 218 // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr. 219 // Just like the input pointer of realloc(), the originalString can't be used after this function. 220 static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length); 221 222 // If this StringImpl has only one reference, we can truncate the string by updating 223 // its m_length property without actually re-allocating its buffer. 224 void truncateAssumingIsolated(unsigned length) 225 { 226 ASSERT(hasOneRef()); 227 ASSERT(length <= m_length); 228 m_length = length; 229 } 230 231 unsigned length() const { return m_length; } 232 bool is8Bit() const { return m_is8Bit; } 233 234 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return reinterpret_cast<const LChar*>(this + 1); } 235 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return reinterpret_cast<const UChar*>(this + 1); } 236 237 template <typename CharType> 238 ALWAYS_INLINE const CharType * getCharacters() const; 239 240 size_t sizeInBytes() const; 241 242 bool isAtomic() const { return m_isAtomic; } 243 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; } 244 245 bool isStatic() const { return m_isStatic; } 246 247 private: 248 // The high bits of 'hash' are always empty, but we prefer to store our flags 249 // in the low bits because it makes them slightly more efficient to access. 250 // So, we shift left and right when setting and getting our hash code. 251 void setHash(unsigned hash) const 252 { 253 ASSERT(!hasHash()); 254 // Multiple clients assume that StringHasher is the canonical string hash function. 255 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(characters8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(), m_length))); 256 m_hash = hash; 257 ASSERT(hash); // Verify that 0 is a valid sentinel hash value. 258 } 259 260 unsigned rawHash() const 261 { 262 return m_hash; 263 } 264 265 void destroyIfNotStatic(); 266 267 public: 268 bool hasHash() const 269 { 270 return rawHash() != 0; 271 } 272 273 unsigned existingHash() const 274 { 275 ASSERT(hasHash()); 276 return rawHash(); 277 } 278 279 unsigned hash() const 280 { 281 if (hasHash()) 282 return existingHash(); 283 return hashSlowCase(); 284 } 285 286 ALWAYS_INLINE bool hasOneRef() const 287 { 288 return m_refCount == 1; 289 } 290 291 ALWAYS_INLINE void ref() 292 { 293 ++m_refCount; 294 } 295 296 ALWAYS_INLINE void deref() 297 { 298 if (hasOneRef()) { 299 destroyIfNotStatic(); 300 return; 301 } 302 303 --m_refCount; 304 } 305 306 static StringImpl* empty(); 307 static StringImpl* empty16Bit(); 308 309 // FIXME: Does this really belong in StringImpl? 310 template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters) 311 { 312 memcpy(destination, source, numCharacters * sizeof(T)); 313 } 314 315 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters) 316 { 317 for (unsigned i = 0; i < numCharacters; ++i) 318 destination[i] = source[i]; 319 } 320 321 // Some string features, like refcounting and the atomicity flag, are not 322 // thread-safe. We achieve thread safety by isolation, giving each thread 323 // its own copy of the string. 324 PassRefPtr<StringImpl> isolatedCopy() const; 325 326 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); 327 328 UChar operator[](unsigned i) const 329 { 330 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length); 331 if (is8Bit()) 332 return characters8()[i]; 333 return characters16()[i]; 334 } 335 UChar32 characterStartingAt(unsigned); 336 337 bool containsOnlyWhitespace(); 338 339 int toIntStrict(bool* ok = 0, int base = 10); 340 unsigned toUIntStrict(bool* ok = 0, int base = 10); 341 int64_t toInt64Strict(bool* ok = 0, int base = 10); 342 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); 343 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); 344 345 int toInt(bool* ok = 0); // ignores trailing garbage 346 unsigned toUInt(bool* ok = 0); // ignores trailing garbage 347 int64_t toInt64(bool* ok = 0); // ignores trailing garbage 348 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage 349 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage 350 351 // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage. 352 // Like the non-strict functions above, these return the value when there is trailing garbage. 353 // It would be better if these were more consistent with the above functions instead. 354 double toDouble(bool* ok = 0); 355 float toFloat(bool* ok = 0); 356 357 PassRefPtr<StringImpl> lower(); 358 PassRefPtr<StringImpl> upper(); 359 PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier); 360 PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier); 361 362 PassRefPtr<StringImpl> fill(UChar); 363 // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII? 364 PassRefPtr<StringImpl> foldCase(); 365 366 PassRefPtr<StringImpl> stripWhiteSpace(); 367 PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr); 368 PassRefPtr<StringImpl> simplifyWhiteSpace(StripBehavior stripBehavior = StripExtraWhiteSpace); 369 PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr, StripBehavior stripBehavior = StripExtraWhiteSpace); 370 371 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); 372 template <typename CharType> 373 ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr); 374 375 size_t find(LChar character, unsigned start = 0); 376 size_t find(char character, unsigned start = 0); 377 size_t find(UChar character, unsigned start = 0); 378 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); 379 size_t find(const LChar*, unsigned index = 0); 380 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(s), index); } 381 size_t find(StringImpl*); 382 size_t find(StringImpl*, unsigned index); 383 size_t findIgnoringCase(const LChar*, unsigned index = 0); 384 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); } 385 size_t findIgnoringCase(StringImpl*, unsigned index = 0); 386 387 size_t findNextLineStart(unsigned index = UINT_MAX); 388 389 size_t reverseFind(UChar, unsigned index = UINT_MAX); 390 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); 391 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); 392 393 size_t count(LChar) const; 394 395 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } 396 bool startsWith(UChar) const; 397 bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const; 398 template<unsigned matchLength> 399 bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); } 400 401 bool endsWith(StringImpl*, bool caseSensitive = true); 402 bool endsWith(UChar) const; 403 bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const; 404 template<unsigned matchLength> 405 bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); } 406 407 PassRefPtr<StringImpl> replace(UChar, UChar); 408 PassRefPtr<StringImpl> replace(UChar, StringImpl*); 409 ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); } 410 PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLength); 411 PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLength); 412 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); 413 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); 414 PassRefPtr<StringImpl> upconvertedString(); 415 416 #if USE(CF) 417 RetainPtr<CFStringRef> createCFString(); 418 #endif 419 #ifdef __OBJC__ 420 operator NSString*(); 421 #endif 422 423 #ifdef STRING_STATS 424 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; } 425 #endif 426 427 private: 428 template<typename CharType> static size_t allocationSize(unsigned length) 429 { 430 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(CharType))); 431 return sizeof(StringImpl) + length * sizeof(CharType); 432 } 433 434 template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate); 435 template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate, StripBehavior); 436 NEVER_INLINE unsigned hashSlowCase() const; 437 438 #ifdef STRING_STATS 439 static StringStats m_stringStats; 440 #endif 441 442 static unsigned m_highestStaticStringLength; 443 444 #if ENABLE(ASSERT) 445 void assertHashIsCorrect() 446 { 447 ASSERT(hasHash()); 448 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length())); 449 } 450 #endif 451 452 private: 453 unsigned m_refCount; 454 unsigned m_length; 455 mutable unsigned m_hash : 24; 456 unsigned m_isAtomic : 1; 457 unsigned m_is8Bit : 1; 458 unsigned m_isStatic : 1; 459 }; 460 461 template <> 462 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); } 463 464 template <> 465 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return characters16(); } 466 467 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*); 468 WTF_EXPORT bool equal(const StringImpl*, const LChar*); 469 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); } 470 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned); 471 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned); 472 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); } 473 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); } 474 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); } 475 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b); 476 477 template<typename CharType> 478 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length) { return !memcmp(a, b, length * sizeof(CharType)); } 479 480 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) 481 { 482 for (unsigned i = 0; i < length; ++i) { 483 if (a[i] != b[i]) 484 return false; 485 } 486 return true; 487 } 488 489 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); } 490 491 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*); 492 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*); 493 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); } 494 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned); 495 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned); 496 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); } 497 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 498 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 499 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); } 500 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) 501 { 502 ASSERT(length >= 0); 503 return !Unicode::umemcasecmp(a, b, length); 504 } 505 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*); 506 507 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*); 508 509 template<typename CharacterType> 510 inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0) 511 { 512 while (index < length) { 513 if (characters[index] == matchCharacter) 514 return index; 515 ++index; 516 } 517 return kNotFound; 518 } 519 520 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0) 521 { 522 return find(characters, length, static_cast<UChar>(matchCharacter), index); 523 } 524 525 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) 526 { 527 if (matchCharacter & ~0xFF) 528 return kNotFound; 529 return find(characters, length, static_cast<LChar>(matchCharacter), index); 530 } 531 532 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 533 { 534 while (index < length) { 535 if (matchFunction(characters[index])) 536 return index; 537 ++index; 538 } 539 return kNotFound; 540 } 541 542 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 543 { 544 while (index < length) { 545 if (matchFunction(characters[index])) 546 return index; 547 ++index; 548 } 549 return kNotFound; 550 } 551 552 template<typename CharacterType> 553 inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0) 554 { 555 while (index < length) { 556 CharacterType c = characters[index++]; 557 if ((c != '\n') && (c != '\r')) 558 continue; 559 560 // There can only be a start of a new line if there are more characters 561 // beyond the current character. 562 if (index < length) { 563 // The 3 common types of line terminators are 1. \r\n (Windows), 564 // 2. \r (old MacOS) and 3. \n (Unix'es). 565 566 if (c == '\n') 567 return index; // Case 3: just \n. 568 569 CharacterType c2 = characters[index]; 570 if (c2 != '\n') 571 return index; // Case 2: just \r. 572 573 // Case 1: \r\n. 574 // But, there's only a start of a new line if there are more 575 // characters beyond the \r\n. 576 if (++index < length) 577 return index; 578 } 579 } 580 return kNotFound; 581 } 582 583 template<typename CharacterType> 584 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX) 585 { 586 if (!length) 587 return kNotFound; 588 if (index >= length) 589 index = length - 1; 590 CharacterType c = characters[index]; 591 while ((c != '\n') && (c != '\r')) { 592 if (!index--) 593 return kNotFound; 594 c = characters[index]; 595 } 596 return index; 597 } 598 599 template<typename CharacterType> 600 inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = UINT_MAX) 601 { 602 if (!length) 603 return kNotFound; 604 if (index >= length) 605 index = length - 1; 606 while (characters[index] != matchCharacter) { 607 if (!index--) 608 return kNotFound; 609 } 610 return index; 611 } 612 613 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX) 614 { 615 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index); 616 } 617 618 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) 619 { 620 if (matchCharacter & ~0xFF) 621 return kNotFound; 622 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index); 623 } 624 625 inline size_t StringImpl::find(LChar character, unsigned start) 626 { 627 if (is8Bit()) 628 return WTF::find(characters8(), m_length, character, start); 629 return WTF::find(characters16(), m_length, character, start); 630 } 631 632 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start) 633 { 634 return find(static_cast<LChar>(character), start); 635 } 636 637 inline size_t StringImpl::find(UChar character, unsigned start) 638 { 639 if (is8Bit()) 640 return WTF::find(characters8(), m_length, character, start); 641 return WTF::find(characters16(), m_length, character, start); 642 } 643 644 inline unsigned lengthOfNullTerminatedString(const UChar* string) 645 { 646 size_t length = 0; 647 while (string[length] != UChar(0)) 648 ++length; 649 RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max()); 650 return static_cast<unsigned>(length); 651 } 652 653 template<size_t inlineCapacity> 654 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) 655 { 656 if (!b) 657 return !a.size(); 658 if (a.size() != b->length()) 659 return false; 660 if (b->is8Bit()) 661 return equal(a.data(), b->characters8(), b->length()); 662 return equal(a.data(), b->characters16(), b->length()); 663 } 664 665 template<typename CharacterType1, typename CharacterType2> 666 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2) 667 { 668 const unsigned lmin = l1 < l2 ? l1 : l2; 669 unsigned pos = 0; 670 while (pos < lmin && *c1 == *c2) { 671 ++c1; 672 ++c2; 673 ++pos; 674 } 675 676 if (pos < lmin) 677 return (c1[0] > c2[0]) ? 1 : -1; 678 679 if (l1 == l2) 680 return 0; 681 682 return (l1 > l2) ? 1 : -1; 683 } 684 685 static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2) 686 { 687 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8()); 688 } 689 690 static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2) 691 { 692 return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16()); 693 } 694 695 static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2) 696 { 697 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16()); 698 } 699 700 static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2) 701 { 702 if (!string1) 703 return (string2 && string2->length()) ? -1 : 0; 704 705 if (!string2) 706 return string1->length() ? 1 : 0; 707 708 bool string1Is8Bit = string1->is8Bit(); 709 bool string2Is8Bit = string2->is8Bit(); 710 if (string1Is8Bit) { 711 if (string2Is8Bit) 712 return codePointCompare8(string1, string2); 713 return codePointCompare8To16(string1, string2); 714 } 715 if (string2Is8Bit) 716 return -codePointCompare8To16(string2, string1); 717 return codePointCompare16(string1, string2); 718 } 719 720 static inline bool isSpaceOrNewline(UChar c) 721 { 722 // Use isASCIISpace() for basic Latin-1. 723 // This will include newlines, which aren't included in Unicode DirWS. 724 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; 725 } 726 727 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const 728 { 729 if (is8Bit()) 730 return create(characters8(), m_length); 731 return create(characters16(), m_length); 732 } 733 734 struct StringHash; 735 736 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> 737 template<typename T> struct DefaultHash; 738 template<> struct DefaultHash<StringImpl*> { 739 typedef StringHash Hash; 740 }; 741 template<> struct DefaultHash<RefPtr<StringImpl> > { 742 typedef StringHash Hash; 743 }; 744 745 } 746 747 using WTF::StringImpl; 748 using WTF::equal; 749 using WTF::equalNonNull; 750 using WTF::TextCaseSensitivity; 751 using WTF::TextCaseSensitive; 752 using WTF::TextCaseInsensitive; 753 754 #endif 755