Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved.
      4  * Copyright (C) 2009 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef StringImpl_h
     24 #define StringImpl_h
     25 
     26 #include <limits.h>
     27 #include "wtf/ASCIICType.h"
     28 #include "wtf/Forward.h"
     29 #include "wtf/HashMap.h"
     30 #include "wtf/StringHasher.h"
     31 #include "wtf/Vector.h"
     32 #include "wtf/WTFExport.h"
     33 #include "wtf/unicode/Unicode.h"
     34 
     35 #if USE(CF)
     36 typedef const struct __CFString * CFStringRef;
     37 #endif
     38 
     39 #ifdef __OBJC__
     40 @class NSString;
     41 #endif
     42 
     43 namespace WTF {
     44 
     45 struct AlreadyHashed;
     46 struct CStringTranslator;
     47 template<typename CharacterType> struct HashAndCharactersTranslator;
     48 struct HashAndUTF8CharactersTranslator;
     49 struct LCharBufferTranslator;
     50 struct CharBufferFromLiteralDataTranslator;
     51 struct SubstringTranslator;
     52 struct UCharBufferTranslator;
     53 template<typename> class RetainPtr;
     54 
     55 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
     56 
     57 enum StripBehavior { StripExtraWhiteSpace, DoNotStripWhiteSpace };
     58 
     59 typedef bool (*CharacterMatchFunctionPtr)(UChar);
     60 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
     61 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> StaticStringsTable;
     62 
     63 // Define STRING_STATS to turn on run time statistics of string sizes and memory usage
     64 #undef STRING_STATS
     65 
     66 #ifdef STRING_STATS
     67 struct StringStats {
     68     inline void add8BitString(unsigned length)
     69     {
     70         ++m_totalNumberStrings;
     71         ++m_number8BitStrings;
     72         m_total8BitData += length;
     73     }
     74 
     75     inline void add16BitString(unsigned length)
     76     {
     77         ++m_totalNumberStrings;
     78         ++m_number16BitStrings;
     79         m_total16BitData += length;
     80     }
     81 
     82     void removeString(StringImpl*);
     83     void printStats();
     84 
     85     static const unsigned s_printStringStatsFrequency = 5000;
     86     static unsigned s_stringRemovesTillPrintStats;
     87 
     88     unsigned m_totalNumberStrings;
     89     unsigned m_number8BitStrings;
     90     unsigned m_number16BitStrings;
     91     unsigned long long m_total8BitData;
     92     unsigned long long m_total16BitData;
     93 };
     94 
     95 void addStringForStats(StringImpl*);
     96 void removeStringForStats(StringImpl*);
     97 
     98 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length); addStringForStats(this)
     99 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length); addStringForStats(this)
    100 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string); removeStringForStats(this)
    101 #else
    102 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
    103 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
    104 #define STRING_STATS_REMOVE_STRING(string) ((void)0)
    105 #endif
    106 
    107 // You can find documentation about this class in this doc:
    108 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing
    109 class WTF_EXPORT StringImpl {
    110     WTF_MAKE_NONCOPYABLE(StringImpl);
    111     friend struct WTF::CStringTranslator;
    112     template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator;
    113     friend struct WTF::HashAndUTF8CharactersTranslator;
    114     friend struct WTF::CharBufferFromLiteralDataTranslator;
    115     friend struct WTF::LCharBufferTranslator;
    116     friend struct WTF::SubstringTranslator;
    117     friend struct WTF::UCharBufferTranslator;
    118 
    119 private:
    120     // StringImpls are allocated out of the WTF buffer partition.
    121     void* operator new(size_t);
    122     void* operator new(size_t, void* ptr) { return ptr; };
    123     void operator delete(void*);
    124 
    125     // Used to construct static strings, which have an special refCount that can never hit zero.
    126     // This means that the static string will never be destroyed, which is important because
    127     // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
    128     enum ConstructEmptyStringTag { ConstructEmptyString };
    129     explicit StringImpl(ConstructEmptyStringTag)
    130         : m_refCount(1)
    131         , m_length(0)
    132         , m_hash(0)
    133         , m_isAtomic(false)
    134         , m_is8Bit(true)
    135         , m_isStatic(true)
    136     {
    137         // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
    138         // with impunity. The empty string is special because it is never entered into
    139         // AtomicString's HashKey, but still needs to compare correctly.
    140         STRING_STATS_ADD_8BIT_STRING(m_length);
    141         hash();
    142     }
    143 
    144     // FIXME: there has to be a less hacky way to do this.
    145     enum Force8Bit { Force8BitConstructor };
    146     StringImpl(unsigned length, Force8Bit)
    147         : m_refCount(1)
    148         , m_length(length)
    149         , m_hash(0)
    150         , m_isAtomic(false)
    151         , m_is8Bit(true)
    152         , m_isStatic(false)
    153     {
    154         ASSERT(m_length);
    155         STRING_STATS_ADD_8BIT_STRING(m_length);
    156     }
    157 
    158     StringImpl(unsigned length)
    159         : m_refCount(1)
    160         , m_length(length)
    161         , m_hash(0)
    162         , m_isAtomic(false)
    163         , m_is8Bit(false)
    164         , m_isStatic(false)
    165     {
    166         ASSERT(m_length);
    167         STRING_STATS_ADD_16BIT_STRING(m_length);
    168     }
    169 
    170     enum StaticStringTag { StaticString };
    171     StringImpl(unsigned length, unsigned hash, StaticStringTag)
    172         : m_refCount(1)
    173         , m_length(length)
    174         , m_hash(hash)
    175         , m_isAtomic(false)
    176         , m_is8Bit(true)
    177         , m_isStatic(true)
    178     {
    179     }
    180 
    181 public:
    182     ~StringImpl();
    183 
    184     static StringImpl* createStatic(const char* string, unsigned length, unsigned hash);
    185     static void freezeStaticStrings();
    186     static const StaticStringsTable& allStaticStrings();
    187     static unsigned highestStaticStringLength() { return m_highestStaticStringLength; }
    188 
    189     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
    190     static PassRefPtr<StringImpl> create(const LChar*, unsigned length);
    191     static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
    192     template<size_t inlineCapacity>
    193     static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
    194     {
    195         return create8BitIfPossible(vector.data(), vector.size());
    196     }
    197 
    198     ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); }
    199     static PassRefPtr<StringImpl> create(const LChar*);
    200     ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); }
    201 
    202     static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data);
    203     static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
    204 
    205     // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr.
    206     // Just like the input pointer of realloc(), the originalString can't be used after this function.
    207     static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data);
    208     static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data);
    209 
    210     // If this StringImpl has only one reference, we can truncate the string by updating
    211     // its m_length property without actually re-allocating its buffer.
    212     void truncateAssumingIsolated(unsigned length)
    213     {
    214         ASSERT(hasOneRef());
    215         ASSERT(length <= m_length);
    216         m_length = length;
    217     }
    218 
    219     unsigned length() const { return m_length; }
    220     bool is8Bit() const { return m_is8Bit; }
    221 
    222     ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return reinterpret_cast<const LChar*>(this + 1); }
    223     ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return reinterpret_cast<const UChar*>(this + 1); }
    224 
    225     template <typename CharType>
    226     ALWAYS_INLINE const CharType * getCharacters() const;
    227 
    228     size_t sizeInBytes() const;
    229 
    230     bool isAtomic() const { return m_isAtomic; }
    231     void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; }
    232 
    233     bool isStatic() const { return m_isStatic; }
    234 
    235 private:
    236     // The high bits of 'hash' are always empty, but we prefer to store our flags
    237     // in the low bits because it makes them slightly more efficient to access.
    238     // So, we shift left and right when setting and getting our hash code.
    239     void setHash(unsigned hash) const
    240     {
    241         ASSERT(!hasHash());
    242         // Multiple clients assume that StringHasher is the canonical string hash function.
    243         ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(characters8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(), m_length)));
    244         m_hash = hash;
    245         ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
    246     }
    247 
    248     unsigned rawHash() const
    249     {
    250         return m_hash;
    251     }
    252 
    253     void destroyIfNotStatic();
    254 
    255 public:
    256     bool hasHash() const
    257     {
    258         return rawHash() != 0;
    259     }
    260 
    261     unsigned existingHash() const
    262     {
    263         ASSERT(hasHash());
    264         return rawHash();
    265     }
    266 
    267     unsigned hash() const
    268     {
    269         if (hasHash())
    270             return existingHash();
    271         return hashSlowCase();
    272     }
    273 
    274     inline bool hasOneRef() const
    275     {
    276         return m_refCount == 1;
    277     }
    278 
    279     inline void ref()
    280     {
    281         ++m_refCount;
    282     }
    283 
    284     inline void deref()
    285     {
    286         if (hasOneRef()) {
    287             destroyIfNotStatic();
    288             return;
    289         }
    290 
    291         --m_refCount;
    292     }
    293 
    294     static StringImpl* empty();
    295 
    296     // FIXME: Does this really belong in StringImpl?
    297     template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters)
    298     {
    299         if (numCharacters == 1) {
    300             *destination = *source;
    301             return;
    302         }
    303 
    304         // FIXME: Is this implementation really faster than memcpy?
    305         if (numCharacters <= s_copyCharsInlineCutOff) {
    306             unsigned i = 0;
    307 #if (CPU(X86) || CPU(X86_64))
    308             const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T);
    309 
    310             if (numCharacters > charsPerInt) {
    311                 unsigned stopCount = numCharacters & ~(charsPerInt - 1);
    312 
    313                 const uint32_t* srcCharacters = reinterpret_cast<const uint32_t*>(source);
    314                 uint32_t* destCharacters = reinterpret_cast<uint32_t*>(destination);
    315                 for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j)
    316                     destCharacters[j] = srcCharacters[j];
    317             }
    318 #endif
    319             for (; i < numCharacters; ++i)
    320                 destination[i] = source[i];
    321         } else
    322             memcpy(destination, source, numCharacters * sizeof(T));
    323     }
    324 
    325     ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters)
    326     {
    327         for (unsigned i = 0; i < numCharacters; ++i)
    328             destination[i] = source[i];
    329     }
    330 
    331     // Some string features, like refcounting and the atomicity flag, are not
    332     // thread-safe. We achieve thread safety by isolation, giving each thread
    333     // its own copy of the string.
    334     PassRefPtr<StringImpl> isolatedCopy() const;
    335 
    336     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
    337 
    338     UChar operator[](unsigned i) const
    339     {
    340         ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
    341         if (is8Bit())
    342             return characters8()[i];
    343         return characters16()[i];
    344     }
    345     UChar32 characterStartingAt(unsigned);
    346 
    347     bool containsOnlyWhitespace();
    348 
    349     int toIntStrict(bool* ok = 0, int base = 10);
    350     unsigned toUIntStrict(bool* ok = 0, int base = 10);
    351     int64_t toInt64Strict(bool* ok = 0, int base = 10);
    352     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
    353     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
    354 
    355     int toInt(bool* ok = 0); // ignores trailing garbage
    356     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
    357     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
    358     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
    359     intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
    360 
    361     // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
    362     // Like the non-strict functions above, these return the value when there is trailing garbage.
    363     // It would be better if these were more consistent with the above functions instead.
    364     double toDouble(bool* ok = 0);
    365     float toFloat(bool* ok = 0);
    366 
    367     PassRefPtr<StringImpl> lower();
    368     PassRefPtr<StringImpl> upper();
    369     PassRefPtr<StringImpl> lower(const AtomicString& localeIdentifier);
    370     PassRefPtr<StringImpl> upper(const AtomicString& localeIdentifier);
    371 
    372     PassRefPtr<StringImpl> fill(UChar);
    373     // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII?
    374     PassRefPtr<StringImpl> foldCase();
    375 
    376     PassRefPtr<StringImpl> stripWhiteSpace();
    377     PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
    378     PassRefPtr<StringImpl> simplifyWhiteSpace(StripBehavior stripBehavior = StripExtraWhiteSpace);
    379     PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr, StripBehavior stripBehavior = StripExtraWhiteSpace);
    380 
    381     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
    382     template <typename CharType>
    383     ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr);
    384 
    385     size_t find(LChar character, unsigned start = 0);
    386     size_t find(char character, unsigned start = 0);
    387     size_t find(UChar character, unsigned start = 0);
    388     size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
    389     size_t find(const LChar*, unsigned index = 0);
    390     ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(s), index); }
    391     size_t find(StringImpl*);
    392     size_t find(StringImpl*, unsigned index);
    393     size_t findIgnoringCase(const LChar*, unsigned index = 0);
    394     ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); }
    395     size_t findIgnoringCase(StringImpl*, unsigned index = 0);
    396 
    397     size_t findNextLineStart(unsigned index = UINT_MAX);
    398 
    399     size_t reverseFind(UChar, unsigned index = UINT_MAX);
    400     size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
    401     size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
    402 
    403     size_t count(LChar) const;
    404 
    405     bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
    406     bool startsWith(UChar) const;
    407     bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const;
    408     template<unsigned matchLength>
    409     bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); }
    410 
    411     bool endsWith(StringImpl*, bool caseSensitive = true);
    412     bool endsWith(UChar) const;
    413     bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const;
    414     template<unsigned matchLength>
    415     bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); }
    416 
    417     PassRefPtr<StringImpl> replace(UChar, UChar);
    418     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
    419     ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
    420     PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
    421     PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
    422     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
    423     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
    424     PassRefPtr<StringImpl> upconvertedString();
    425 
    426 #if USE(CF)
    427     RetainPtr<CFStringRef> createCFString();
    428 #endif
    429 #ifdef __OBJC__
    430     operator NSString*();
    431 #endif
    432 
    433 #ifdef STRING_STATS
    434     ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
    435 #endif
    436 
    437 private:
    438     // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
    439     static const unsigned s_copyCharsInlineCutOff = 20;
    440 
    441     template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate);
    442     template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate, StripBehavior);
    443     NEVER_INLINE unsigned hashSlowCase() const;
    444 
    445 #ifdef STRING_STATS
    446     static StringStats m_stringStats;
    447 #endif
    448 
    449     static unsigned m_highestStaticStringLength;
    450 
    451 #ifndef NDEBUG
    452     void assertHashIsCorrect()
    453     {
    454         ASSERT(hasHash());
    455         ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
    456     }
    457 #endif
    458 
    459 private:
    460     unsigned m_refCount;
    461     unsigned m_length;
    462     mutable unsigned m_hash : 24;
    463     unsigned m_isAtomic : 1;
    464     unsigned m_is8Bit : 1;
    465     unsigned m_isStatic : 1;
    466 };
    467 
    468 template <>
    469 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); }
    470 
    471 template <>
    472 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return characters16(); }
    473 
    474 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*);
    475 WTF_EXPORT bool equal(const StringImpl*, const LChar*);
    476 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
    477 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned);
    478 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned);
    479 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
    480 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
    481 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
    482 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b);
    483 
    484 template<typename CharType>
    485 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length) { return !memcmp(a, b, length * sizeof(CharType)); }
    486 
    487 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length)
    488 {
    489     for (unsigned i = 0; i < length; ++i) {
    490         if (a[i] != b[i])
    491             return false;
    492     }
    493     return true;
    494 }
    495 
    496 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); }
    497 
    498 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*);
    499 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*);
    500 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); }
    501 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned);
    502 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned);
    503 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); }
    504 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
    505 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); }
    506 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); }
    507 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
    508 {
    509     ASSERT(length >= 0);
    510     return !Unicode::umemcasecmp(a, b, length);
    511 }
    512 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*);
    513 
    514 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*);
    515 
    516 template<typename CharacterType>
    517 inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0)
    518 {
    519     while (index < length) {
    520         if (characters[index] == matchCharacter)
    521             return index;
    522         ++index;
    523     }
    524     return kNotFound;
    525 }
    526 
    527 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0)
    528 {
    529     return find(characters, length, static_cast<UChar>(matchCharacter), index);
    530 }
    531 
    532 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
    533 {
    534     if (matchCharacter & ~0xFF)
    535         return kNotFound;
    536     return find(characters, length, static_cast<LChar>(matchCharacter), index);
    537 }
    538 
    539 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
    540 {
    541     while (index < length) {
    542         if (matchFunction(characters[index]))
    543             return index;
    544         ++index;
    545     }
    546     return kNotFound;
    547 }
    548 
    549 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
    550 {
    551     while (index < length) {
    552         if (matchFunction(characters[index]))
    553             return index;
    554         ++index;
    555     }
    556     return kNotFound;
    557 }
    558 
    559 template<typename CharacterType>
    560 inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0)
    561 {
    562     while (index < length) {
    563         CharacterType c = characters[index++];
    564         if ((c != '\n') && (c != '\r'))
    565             continue;
    566 
    567         // There can only be a start of a new line if there are more characters
    568         // beyond the current character.
    569         if (index < length) {
    570             // The 3 common types of line terminators are 1. \r\n (Windows),
    571             // 2. \r (old MacOS) and 3. \n (Unix'es).
    572 
    573             if (c == '\n')
    574                 return index; // Case 3: just \n.
    575 
    576             CharacterType c2 = characters[index];
    577             if (c2 != '\n')
    578                 return index; // Case 2: just \r.
    579 
    580             // Case 1: \r\n.
    581             // But, there's only a start of a new line if there are more
    582             // characters beyond the \r\n.
    583             if (++index < length)
    584                 return index;
    585         }
    586     }
    587     return kNotFound;
    588 }
    589 
    590 template<typename CharacterType>
    591 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX)
    592 {
    593     if (!length)
    594         return kNotFound;
    595     if (index >= length)
    596         index = length - 1;
    597     CharacterType c = characters[index];
    598     while ((c != '\n') && (c != '\r')) {
    599         if (!index--)
    600             return kNotFound;
    601         c = characters[index];
    602     }
    603     return index;
    604 }
    605 
    606 template<typename CharacterType>
    607 inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = UINT_MAX)
    608 {
    609     if (!length)
    610         return kNotFound;
    611     if (index >= length)
    612         index = length - 1;
    613     while (characters[index] != matchCharacter) {
    614         if (!index--)
    615             return kNotFound;
    616     }
    617     return index;
    618 }
    619 
    620 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX)
    621 {
    622     return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
    623 }
    624 
    625 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
    626 {
    627     if (matchCharacter & ~0xFF)
    628         return kNotFound;
    629     return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
    630 }
    631 
    632 inline size_t StringImpl::find(LChar character, unsigned start)
    633 {
    634     if (is8Bit())
    635         return WTF::find(characters8(), m_length, character, start);
    636     return WTF::find(characters16(), m_length, character, start);
    637 }
    638 
    639 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
    640 {
    641     return find(static_cast<LChar>(character), start);
    642 }
    643 
    644 inline size_t StringImpl::find(UChar character, unsigned start)
    645 {
    646     if (is8Bit())
    647         return WTF::find(characters8(), m_length, character, start);
    648     return WTF::find(characters16(), m_length, character, start);
    649 }
    650 
    651 inline unsigned lengthOfNullTerminatedString(const UChar* string)
    652 {
    653     size_t length = 0;
    654     while (string[length] != UChar(0))
    655         ++length;
    656     RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());
    657     return static_cast<unsigned>(length);
    658 }
    659 
    660 template<size_t inlineCapacity>
    661 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
    662 {
    663     if (!b)
    664         return !a.size();
    665     if (a.size() != b->length())
    666         return false;
    667     if (b->is8Bit())
    668         return equal(a.data(), b->characters8(), b->length());
    669     return equal(a.data(), b->characters16(), b->length());
    670 }
    671 
    672 template<typename CharacterType1, typename CharacterType2>
    673 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2)
    674 {
    675     const unsigned lmin = l1 < l2 ? l1 : l2;
    676     unsigned pos = 0;
    677     while (pos < lmin && *c1 == *c2) {
    678         ++c1;
    679         ++c2;
    680         ++pos;
    681     }
    682 
    683     if (pos < lmin)
    684         return (c1[0] > c2[0]) ? 1 : -1;
    685 
    686     if (l1 == l2)
    687         return 0;
    688 
    689     return (l1 > l2) ? 1 : -1;
    690 }
    691 
    692 static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2)
    693 {
    694     return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8());
    695 }
    696 
    697 static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2)
    698 {
    699     return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16());
    700 }
    701 
    702 static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2)
    703 {
    704     return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16());
    705 }
    706 
    707 static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
    708 {
    709     if (!string1)
    710         return (string2 && string2->length()) ? -1 : 0;
    711 
    712     if (!string2)
    713         return string1->length() ? 1 : 0;
    714 
    715     bool string1Is8Bit = string1->is8Bit();
    716     bool string2Is8Bit = string2->is8Bit();
    717     if (string1Is8Bit) {
    718         if (string2Is8Bit)
    719             return codePointCompare8(string1, string2);
    720         return codePointCompare8To16(string1, string2);
    721     }
    722     if (string2Is8Bit)
    723         return -codePointCompare8To16(string2, string1);
    724     return codePointCompare16(string1, string2);
    725 }
    726 
    727 static inline bool isSpaceOrNewline(UChar c)
    728 {
    729     // Use isASCIISpace() for basic Latin-1.
    730     // This will include newlines, which aren't included in Unicode DirWS.
    731     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
    732 }
    733 
    734 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const
    735 {
    736     if (is8Bit())
    737         return create(characters8(), m_length);
    738     return create(characters16(), m_length);
    739 }
    740 
    741 struct StringHash;
    742 
    743 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
    744 template<typename T> struct DefaultHash;
    745 template<> struct DefaultHash<StringImpl*> {
    746     typedef StringHash Hash;
    747 };
    748 template<> struct DefaultHash<RefPtr<StringImpl> > {
    749     typedef StringHash Hash;
    750 };
    751 
    752 }
    753 
    754 using WTF::StringImpl;
    755 using WTF::equal;
    756 using WTF::equalNonNull;
    757 using WTF::TextCaseSensitivity;
    758 using WTF::TextCaseSensitive;
    759 using WTF::TextCaseInsensitive;
    760 
    761 #endif
    762