Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights reserved.
      4  * Copyright (C) 2009 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef StringImpl_h
     24 #define StringImpl_h
     25 
     26 #include <limits.h>
     27 #include "wtf/ASCIICType.h"
     28 #include "wtf/Forward.h"
     29 #include "wtf/StdLibExtras.h"
     30 #include "wtf/StringHasher.h"
     31 #include "wtf/Vector.h"
     32 #include "wtf/WTFExport.h"
     33 #include "wtf/unicode/Unicode.h"
     34 
     35 #if USE(CF)
     36 typedef const struct __CFString * CFStringRef;
     37 #endif
     38 
     39 #ifdef __OBJC__
     40 @class NSString;
     41 #endif
     42 
     43 namespace WTF {
     44 
     45 struct CStringTranslator;
     46 template<typename CharacterType> struct HashAndCharactersTranslator;
     47 struct HashAndUTF8CharactersTranslator;
     48 struct LCharBufferTranslator;
     49 struct CharBufferFromLiteralDataTranslator;
     50 struct SubstringTranslator;
     51 struct UCharBufferTranslator;
     52 template<typename> class RetainPtr;
     53 
     54 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
     55 
     56 typedef bool (*CharacterMatchFunctionPtr)(UChar);
     57 typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
     58 
     59 // Define STRING_STATS to turn on run time statistics of string sizes and memory usage
     60 #undef STRING_STATS
     61 
     62 #ifdef STRING_STATS
     63 struct StringStats {
     64     inline void add8BitString(unsigned length)
     65     {
     66         ++m_totalNumberStrings;
     67         ++m_number8BitStrings;
     68         m_total8BitData += length;
     69     }
     70 
     71     inline void add16BitString(unsigned length)
     72     {
     73         ++m_totalNumberStrings;
     74         ++m_number16BitStrings;
     75         m_total16BitData += length;
     76     }
     77 
     78     void removeString(StringImpl*);
     79     void printStats();
     80 
     81     static const unsigned s_printStringStatsFrequency = 5000;
     82     static unsigned s_stringRemovesTillPrintStats;
     83 
     84     unsigned m_totalNumberStrings;
     85     unsigned m_number8BitStrings;
     86     unsigned m_number16BitStrings;
     87     unsigned long long m_total8BitData;
     88     unsigned long long m_total16BitData;
     89 };
     90 
     91 void addStringForStats(StringImpl*);
     92 void removeStringForStats(StringImpl*);
     93 
     94 #define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length); addStringForStats(this)
     95 #define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length); addStringForStats(this)
     96 #define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string); removeStringForStats(this)
     97 #else
     98 #define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
     99 #define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
    100 #define STRING_STATS_REMOVE_STRING(string) ((void)0)
    101 #endif
    102 
    103 // You can find documentation about this class in this doc:
    104 // https://docs.google.com/document/d/1kOCUlJdh2WJMJGDf-WoEQhmnjKLaOYRbiHz5TiGJl14/edit?usp=sharing
    105 class WTF_EXPORT StringImpl {
    106     WTF_MAKE_NONCOPYABLE(StringImpl);
    107     WTF_MAKE_FAST_ALLOCATED;
    108     friend struct WTF::CStringTranslator;
    109     template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator;
    110     friend struct WTF::HashAndUTF8CharactersTranslator;
    111     friend struct WTF::CharBufferFromLiteralDataTranslator;
    112     friend struct WTF::LCharBufferTranslator;
    113     friend struct WTF::SubstringTranslator;
    114     friend struct WTF::UCharBufferTranslator;
    115 
    116 private:
    117     // Used to construct static strings, which have an special refCount that can never hit zero.
    118     // This means that the static string will never be destroyed, which is important because
    119     // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
    120     enum ConstructEmptyStringTag { ConstructEmptyString };
    121     explicit StringImpl(ConstructEmptyStringTag)
    122         : m_refCount(s_refCountFlagIsStaticString)
    123         , m_length(0)
    124         , m_hash(0)
    125         , m_isAtomic(false)
    126         , m_is8Bit(true)
    127     {
    128         // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
    129         // with impunity. The empty string is special because it is never entered into
    130         // AtomicString's HashKey, but still needs to compare correctly.
    131         STRING_STATS_ADD_8BIT_STRING(m_length);
    132         hash();
    133     }
    134 
    135     // FIXME: there has to be a less hacky way to do this.
    136     enum Force8Bit { Force8BitConstructor };
    137     StringImpl(unsigned length, Force8Bit)
    138         : m_refCount(s_refCountIncrement)
    139         , m_length(length)
    140         , m_hash(0)
    141         , m_isAtomic(false)
    142         , m_is8Bit(true)
    143     {
    144         ASSERT(m_length);
    145         STRING_STATS_ADD_8BIT_STRING(m_length);
    146     }
    147 
    148     StringImpl(unsigned length)
    149         : m_refCount(s_refCountIncrement)
    150         , m_length(length)
    151         , m_hash(0)
    152         , m_isAtomic(false)
    153         , m_is8Bit(false)
    154     {
    155         ASSERT(m_length);
    156         STRING_STATS_ADD_16BIT_STRING(m_length);
    157     }
    158 
    159     enum StaticStringTag { StaticString };
    160     StringImpl(unsigned length, unsigned hash, StaticStringTag)
    161         : m_refCount(s_refCountFlagIsStaticString)
    162         , m_length(length)
    163         , m_hash(hash)
    164         , m_isAtomic(false)
    165         , m_is8Bit(true)
    166     {
    167     }
    168 
    169 public:
    170     ~StringImpl();
    171 
    172     static StringImpl* createStatic(const char* string, unsigned length, unsigned hash);
    173 
    174     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
    175     static PassRefPtr<StringImpl> create(const LChar*, unsigned length);
    176     static PassRefPtr<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
    177     template<size_t inlineCapacity>
    178     static PassRefPtr<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
    179     {
    180         return create8BitIfPossible(vector.data(), vector.size());
    181     }
    182 
    183     ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); }
    184     static PassRefPtr<StringImpl> create(const LChar*);
    185     ALWAYS_INLINE static PassRefPtr<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); }
    186 
    187     static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data);
    188     static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
    189 
    190     // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr.
    191     // Just like the input pointer of realloc(), the originalString can't be used after this function.
    192     static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data);
    193     static PassRefPtr<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data);
    194 
    195     // If this StringImpl has only one reference, we can truncate the string by updating
    196     // its m_length property without actually re-allocating its buffer.
    197     void truncateAssumingIsolated(unsigned length)
    198     {
    199         ASSERT(hasOneRef());
    200         ASSERT(length <= m_length);
    201         m_length = length;
    202     }
    203 
    204     unsigned length() const { return m_length; }
    205     bool is8Bit() const { return m_is8Bit; }
    206 
    207     ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return reinterpret_cast<const LChar*>(this + 1); }
    208     ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return reinterpret_cast<const UChar*>(this + 1); }
    209 
    210     template <typename CharType>
    211     ALWAYS_INLINE const CharType * getCharacters() const;
    212 
    213     size_t sizeInBytes() const;
    214 
    215     bool isAtomic() const { return m_isAtomic; }
    216     void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; }
    217 
    218     bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
    219 
    220 private:
    221     // The high bits of 'hash' are always empty, but we prefer to store our flags
    222     // in the low bits because it makes them slightly more efficient to access.
    223     // So, we shift left and right when setting and getting our hash code.
    224     void setHash(unsigned hash) const
    225     {
    226         ASSERT(!hasHash());
    227         // Multiple clients assume that StringHasher is the canonical string hash function.
    228         ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(characters8(), m_length) : StringHasher::computeHashAndMaskTop8Bits(characters16(), m_length)));
    229         m_hash = hash;
    230         ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
    231     }
    232 
    233     unsigned rawHash() const
    234     {
    235         return m_hash;
    236     }
    237 
    238 public:
    239     bool hasHash() const
    240     {
    241         return rawHash() != 0;
    242     }
    243 
    244     unsigned existingHash() const
    245     {
    246         ASSERT(hasHash());
    247         return rawHash();
    248     }
    249 
    250     unsigned hash() const
    251     {
    252         if (hasHash())
    253             return existingHash();
    254         return hashSlowCase();
    255     }
    256 
    257     inline bool hasOneRef() const
    258     {
    259         return m_refCount == s_refCountIncrement;
    260     }
    261 
    262     inline void ref()
    263     {
    264         m_refCount += s_refCountIncrement;
    265     }
    266 
    267     inline void deref()
    268     {
    269         if (m_refCount == s_refCountIncrement) {
    270             delete this;
    271             return;
    272         }
    273 
    274         m_refCount -= s_refCountIncrement;
    275     }
    276 
    277     static StringImpl* empty();
    278 
    279     // FIXME: Does this really belong in StringImpl?
    280     template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters)
    281     {
    282         if (numCharacters == 1) {
    283             *destination = *source;
    284             return;
    285         }
    286 
    287         // FIXME: Is this implementation really faster than memcpy?
    288         if (numCharacters <= s_copyCharsInlineCutOff) {
    289             unsigned i = 0;
    290 #if (CPU(X86) || CPU(X86_64))
    291             const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T);
    292 
    293             if (numCharacters > charsPerInt) {
    294                 unsigned stopCount = numCharacters & ~(charsPerInt - 1);
    295 
    296                 const uint32_t* srcCharacters = reinterpret_cast<const uint32_t*>(source);
    297                 uint32_t* destCharacters = reinterpret_cast<uint32_t*>(destination);
    298                 for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j)
    299                     destCharacters[j] = srcCharacters[j];
    300             }
    301 #endif
    302             for (; i < numCharacters; ++i)
    303                 destination[i] = source[i];
    304         } else
    305             memcpy(destination, source, numCharacters * sizeof(T));
    306     }
    307 
    308     ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters)
    309     {
    310         for (unsigned i = 0; i < numCharacters; ++i)
    311             destination[i] = source[i];
    312     }
    313 
    314     // Some string features, like refcounting and the atomicity flag, are not
    315     // thread-safe. We achieve thread safety by isolation, giving each thread
    316     // its own copy of the string.
    317     PassRefPtr<StringImpl> isolatedCopy() const;
    318 
    319     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
    320 
    321     UChar operator[](unsigned i) const
    322     {
    323         ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
    324         if (is8Bit())
    325             return characters8()[i];
    326         return characters16()[i];
    327     }
    328     UChar32 characterStartingAt(unsigned);
    329 
    330     bool containsOnlyWhitespace();
    331 
    332     int toIntStrict(bool* ok = 0, int base = 10);
    333     unsigned toUIntStrict(bool* ok = 0, int base = 10);
    334     int64_t toInt64Strict(bool* ok = 0, int base = 10);
    335     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
    336     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
    337 
    338     int toInt(bool* ok = 0); // ignores trailing garbage
    339     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
    340     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
    341     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
    342     intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
    343 
    344     // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
    345     // Like the non-strict functions above, these return the value when there is trailing garbage.
    346     // It would be better if these were more consistent with the above functions instead.
    347     double toDouble(bool* ok = 0);
    348     float toFloat(bool* ok = 0);
    349 
    350     PassRefPtr<StringImpl> lower();
    351     PassRefPtr<StringImpl> upper();
    352 
    353     PassRefPtr<StringImpl> fill(UChar);
    354     // FIXME: Do we need fill(char) or can we just do the right thing if UChar is ASCII?
    355     PassRefPtr<StringImpl> foldCase();
    356 
    357     PassRefPtr<StringImpl> stripWhiteSpace();
    358     PassRefPtr<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
    359     PassRefPtr<StringImpl> simplifyWhiteSpace();
    360     PassRefPtr<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr);
    361 
    362     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
    363     template <typename CharType>
    364     ALWAYS_INLINE PassRefPtr<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr);
    365 
    366     size_t find(LChar character, unsigned start = 0);
    367     size_t find(char character, unsigned start = 0);
    368     size_t find(UChar character, unsigned start = 0);
    369     size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
    370     size_t find(const LChar*, unsigned index = 0);
    371     ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(s), index); }
    372     size_t find(StringImpl*);
    373     size_t find(StringImpl*, unsigned index);
    374     size_t findIgnoringCase(const LChar*, unsigned index = 0);
    375     ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); }
    376     size_t findIgnoringCase(StringImpl*, unsigned index = 0);
    377 
    378     size_t findNextLineStart(unsigned index = UINT_MAX);
    379 
    380     size_t reverseFind(UChar, unsigned index = UINT_MAX);
    381     size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
    382     size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
    383 
    384     size_t count(LChar) const;
    385 
    386     bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
    387     bool startsWith(UChar) const;
    388     bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const;
    389     template<unsigned matchLength>
    390     bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); }
    391 
    392     bool endsWith(StringImpl*, bool caseSensitive = true);
    393     bool endsWith(UChar) const;
    394     bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const;
    395     template<unsigned matchLength>
    396     bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); }
    397 
    398     PassRefPtr<StringImpl> replace(UChar, UChar);
    399     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
    400     ALWAYS_INLINE PassRefPtr<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
    401     PassRefPtr<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
    402     PassRefPtr<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
    403     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
    404     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
    405 
    406     WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0);
    407 
    408 #if USE(CF)
    409     RetainPtr<CFStringRef> createCFString();
    410 #endif
    411 #ifdef __OBJC__
    412     operator NSString*();
    413 #endif
    414 
    415 #ifdef STRING_STATS
    416     ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
    417 #endif
    418 
    419 private:
    420     // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
    421     static const unsigned s_copyCharsInlineCutOff = 20;
    422 
    423     template <class UCharPredicate> PassRefPtr<StringImpl> stripMatchedCharacters(UCharPredicate);
    424     template <typename CharType, class UCharPredicate> PassRefPtr<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate);
    425     NEVER_INLINE unsigned hashSlowCase() const;
    426 
    427     // The bottom bit in the ref count indicates a static (immortal) string.
    428     static const unsigned s_refCountFlagIsStaticString = 0x1;
    429     static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag.
    430 
    431 #ifdef STRING_STATS
    432     static StringStats m_stringStats;
    433 #endif
    434 
    435 #ifndef NDEBUG
    436     void assertHashIsCorrect()
    437     {
    438         ASSERT(hasHash());
    439         ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
    440     }
    441 #endif
    442 
    443 private:
    444     unsigned m_refCount;
    445     unsigned m_length;
    446     mutable unsigned m_hash : 24;
    447     mutable unsigned m_isAtomic : 1;
    448     mutable unsigned m_is8Bit : 1;
    449 };
    450 
    451 template <>
    452 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const { return characters8(); }
    453 
    454 template <>
    455 ALWAYS_INLINE const UChar* StringImpl::getCharacters<UChar>() const { return characters16(); }
    456 
    457 WTF_EXPORT bool equal(const StringImpl*, const StringImpl*);
    458 WTF_EXPORT bool equal(const StringImpl*, const LChar*);
    459 inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
    460 WTF_EXPORT bool equal(const StringImpl*, const LChar*, unsigned);
    461 WTF_EXPORT bool equal(const StringImpl*, const UChar*, unsigned);
    462 inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
    463 inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
    464 inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
    465 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b);
    466 
    467 template<typename CharType>
    468 ALWAYS_INLINE bool equal(const CharType* a, const CharType* b, unsigned length) { return !memcmp(a, b, length * sizeof(CharType)); }
    469 
    470 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length)
    471 {
    472     for (unsigned i = 0; i < length; ++i) {
    473         if (a[i] != b[i])
    474             return false;
    475     }
    476     return true;
    477 }
    478 
    479 ALWAYS_INLINE bool equal(const UChar* a, const LChar* b, unsigned length) { return equal(b, a, length); }
    480 
    481 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const StringImpl*);
    482 WTF_EXPORT bool equalIgnoringCase(const StringImpl*, const LChar*);
    483 inline bool equalIgnoringCase(const LChar* a, const StringImpl* b) { return equalIgnoringCase(b, a); }
    484 WTF_EXPORT bool equalIgnoringCase(const LChar*, const LChar*, unsigned);
    485 WTF_EXPORT bool equalIgnoringCase(const UChar*, const LChar*, unsigned);
    486 inline bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) { return equalIgnoringCase(a, reinterpret_cast<const LChar*>(b), length); }
    487 inline bool equalIgnoringCase(const LChar* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
    488 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); }
    489 inline bool equalIgnoringCase(const char* a, const LChar* b, unsigned length) { return equalIgnoringCase(b, reinterpret_cast<const LChar*>(a), length); }
    490 inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
    491 {
    492     ASSERT(length >= 0);
    493     return !Unicode::umemcasecmp(a, b, length);
    494 }
    495 WTF_EXPORT bool equalIgnoringCaseNonNull(const StringImpl*, const StringImpl*);
    496 
    497 WTF_EXPORT bool equalIgnoringNullity(StringImpl*, StringImpl*);
    498 
    499 template<typename CharacterType>
    500 inline size_t find(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = 0)
    501 {
    502     while (index < length) {
    503         if (characters[index] == matchCharacter)
    504             return index;
    505         ++index;
    506     }
    507     return notFound;
    508 }
    509 
    510 ALWAYS_INLINE size_t find(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = 0)
    511 {
    512     return find(characters, length, static_cast<UChar>(matchCharacter), index);
    513 }
    514 
    515 inline size_t find(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
    516 {
    517     if (matchCharacter & ~0xFF)
    518         return notFound;
    519     return find(characters, length, static_cast<LChar>(matchCharacter), index);
    520 }
    521 
    522 inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
    523 {
    524     while (index < length) {
    525         if (matchFunction(characters[index]))
    526             return index;
    527         ++index;
    528     }
    529     return notFound;
    530 }
    531 
    532 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
    533 {
    534     while (index < length) {
    535         if (matchFunction(characters[index]))
    536             return index;
    537         ++index;
    538     }
    539     return notFound;
    540 }
    541 
    542 template<typename CharacterType>
    543 inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0)
    544 {
    545     while (index < length) {
    546         CharacterType c = characters[index++];
    547         if ((c != '\n') && (c != '\r'))
    548             continue;
    549 
    550         // There can only be a start of a new line if there are more characters
    551         // beyond the current character.
    552         if (index < length) {
    553             // The 3 common types of line terminators are 1. \r\n (Windows),
    554             // 2. \r (old MacOS) and 3. \n (Unix'es).
    555 
    556             if (c == '\n')
    557                 return index; // Case 3: just \n.
    558 
    559             CharacterType c2 = characters[index];
    560             if (c2 != '\n')
    561                 return index; // Case 2: just \r.
    562 
    563             // Case 1: \r\n.
    564             // But, there's only a start of a new line if there are more
    565             // characters beyond the \r\n.
    566             if (++index < length)
    567                 return index;
    568         }
    569     }
    570     return notFound;
    571 }
    572 
    573 template<typename CharacterType>
    574 inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX)
    575 {
    576     if (!length)
    577         return notFound;
    578     if (index >= length)
    579         index = length - 1;
    580     CharacterType c = characters[index];
    581     while ((c != '\n') && (c != '\r')) {
    582         if (!index--)
    583             return notFound;
    584         c = characters[index];
    585     }
    586     return index;
    587 }
    588 
    589 template<typename CharacterType>
    590 inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = UINT_MAX)
    591 {
    592     if (!length)
    593         return notFound;
    594     if (index >= length)
    595         index = length - 1;
    596     while (characters[index] != matchCharacter) {
    597         if (!index--)
    598             return notFound;
    599     }
    600     return index;
    601 }
    602 
    603 ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX)
    604 {
    605     return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
    606 }
    607 
    608 inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
    609 {
    610     if (matchCharacter & ~0xFF)
    611         return notFound;
    612     return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
    613 }
    614 
    615 inline size_t StringImpl::find(LChar character, unsigned start)
    616 {
    617     if (is8Bit())
    618         return WTF::find(characters8(), m_length, character, start);
    619     return WTF::find(characters16(), m_length, character, start);
    620 }
    621 
    622 ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
    623 {
    624     return find(static_cast<LChar>(character), start);
    625 }
    626 
    627 inline size_t StringImpl::find(UChar character, unsigned start)
    628 {
    629     if (is8Bit())
    630         return WTF::find(characters8(), m_length, character, start);
    631     return WTF::find(characters16(), m_length, character, start);
    632 }
    633 
    634 inline unsigned lengthOfNullTerminatedString(const UChar* string)
    635 {
    636     size_t length = 0;
    637     while (string[length] != UChar(0))
    638         ++length;
    639     RELEASE_ASSERT(length <= std::numeric_limits<unsigned>::max());
    640     return static_cast<unsigned>(length);
    641 }
    642 
    643 template<size_t inlineCapacity>
    644 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
    645 {
    646     if (!b)
    647         return !a.size();
    648     if (a.size() != b->length())
    649         return false;
    650     if (b->is8Bit())
    651         return equal(a.data(), b->characters8(), b->length());
    652     return equal(a.data(), b->characters16(), b->length());
    653 }
    654 
    655 template<typename CharacterType1, typename CharacterType2>
    656 static inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2)
    657 {
    658     const unsigned lmin = l1 < l2 ? l1 : l2;
    659     unsigned pos = 0;
    660     while (pos < lmin && *c1 == *c2) {
    661         ++c1;
    662         ++c2;
    663         ++pos;
    664     }
    665 
    666     if (pos < lmin)
    667         return (c1[0] > c2[0]) ? 1 : -1;
    668 
    669     if (l1 == l2)
    670         return 0;
    671 
    672     return (l1 > l2) ? 1 : -1;
    673 }
    674 
    675 static inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2)
    676 {
    677     return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8());
    678 }
    679 
    680 static inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2)
    681 {
    682     return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16());
    683 }
    684 
    685 static inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2)
    686 {
    687     return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16());
    688 }
    689 
    690 static inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
    691 {
    692     if (!string1)
    693         return (string2 && string2->length()) ? -1 : 0;
    694 
    695     if (!string2)
    696         return string1->length() ? 1 : 0;
    697 
    698     bool string1Is8Bit = string1->is8Bit();
    699     bool string2Is8Bit = string2->is8Bit();
    700     if (string1Is8Bit) {
    701         if (string2Is8Bit)
    702             return codePointCompare8(string1, string2);
    703         return codePointCompare8To16(string1, string2);
    704     }
    705     if (string2Is8Bit)
    706         return -codePointCompare8To16(string2, string1);
    707     return codePointCompare16(string1, string2);
    708 }
    709 
    710 static inline bool isSpaceOrNewline(UChar c)
    711 {
    712     // Use isASCIISpace() for basic Latin-1.
    713     // This will include newlines, which aren't included in Unicode DirWS.
    714     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
    715 }
    716 
    717 inline PassRefPtr<StringImpl> StringImpl::isolatedCopy() const
    718 {
    719     if (is8Bit())
    720         return create(characters8(), m_length);
    721     return create(characters16(), m_length);
    722 }
    723 
    724 struct StringHash;
    725 
    726 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
    727 template<typename T> struct DefaultHash;
    728 template<> struct DefaultHash<StringImpl*> {
    729     typedef StringHash Hash;
    730 };
    731 template<> struct DefaultHash<RefPtr<StringImpl> > {
    732     typedef StringHash Hash;
    733 };
    734 
    735 }
    736 
    737 using WTF::StringImpl;
    738 using WTF::equal;
    739 using WTF::equalNonNull;
    740 using WTF::TextCaseSensitivity;
    741 using WTF::TextCaseSensitive;
    742 using WTF::TextCaseInsensitive;
    743 
    744 #endif
    745