Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
      4  * Copyright (C) 2009 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef StringImpl_h
     24 #define StringImpl_h
     25 
     26 #include <limits.h>
     27 #include <wtf/ASCIICType.h>
     28 #include <wtf/CrossThreadRefCounted.h>
     29 #include <wtf/Forward.h>
     30 #include <wtf/OwnFastMallocPtr.h>
     31 #include <wtf/StdLibExtras.h>
     32 #include <wtf/StringHasher.h>
     33 #include <wtf/Vector.h>
     34 #include <wtf/text/StringImplBase.h>
     35 #include <wtf/unicode/Unicode.h>
     36 
     37 #if USE(CF)
     38 typedef const struct __CFString * CFStringRef;
     39 #endif
     40 
     41 #ifdef __OBJC__
     42 @class NSString;
     43 #endif
     44 
     45 // FIXME: This is a temporary layering violation while we move string code to WTF.
     46 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
     47 namespace JSC {
     48 struct IdentifierCStringTranslator;
     49 struct IdentifierUCharBufferTranslator;
     50 }
     51 
     52 namespace WTF {
     53 
     54 struct CStringTranslator;
     55 struct HashAndCharactersTranslator;
     56 struct HashAndUTF8CharactersTranslator;
     57 struct UCharBufferTranslator;
     58 
     59 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
     60 
     61 typedef OwnFastMallocPtr<const UChar> SharableUChar;
     62 typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
     63 typedef bool (*CharacterMatchFunctionPtr)(UChar);
     64 
     65 class StringImpl : public StringImplBase {
     66     friend struct JSC::IdentifierCStringTranslator;
     67     friend struct JSC::IdentifierUCharBufferTranslator;
     68     friend struct WTF::CStringTranslator;
     69     friend struct WTF::HashAndCharactersTranslator;
     70     friend struct WTF::HashAndUTF8CharactersTranslator;
     71     friend struct WTF::UCharBufferTranslator;
     72     friend class AtomicStringImpl;
     73 private:
     74     // Used to construct static strings, which have an special refCount that can never hit zero.
     75     // This means that the static string will never be destroyed, which is important because
     76     // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
     77     StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
     78         : StringImplBase(length, ConstructStaticString)
     79         , m_data(characters)
     80         , m_buffer(0)
     81         , m_hash(0)
     82     {
     83         // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
     84         // with impunity. The empty string is special because it is never entered into
     85         // AtomicString's HashKey, but still needs to compare correctly.
     86         hash();
     87     }
     88 
     89     // Create a normal string with internal storage (BufferInternal)
     90     StringImpl(unsigned length)
     91         : StringImplBase(length, BufferInternal)
     92         , m_data(reinterpret_cast<const UChar*>(this + 1))
     93         , m_buffer(0)
     94         , m_hash(0)
     95     {
     96         ASSERT(m_data);
     97         ASSERT(m_length);
     98     }
     99 
    100     // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
    101     StringImpl(const UChar* characters, unsigned length)
    102         : StringImplBase(length, BufferOwned)
    103         , m_data(characters)
    104         , m_buffer(0)
    105         , m_hash(0)
    106     {
    107         ASSERT(m_data);
    108         ASSERT(m_length);
    109     }
    110 
    111     // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
    112     StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
    113         : StringImplBase(length, BufferSubstring)
    114         , m_data(characters)
    115         , m_substringBuffer(base.leakRef())
    116         , m_hash(0)
    117     {
    118         ASSERT(m_data);
    119         ASSERT(m_length);
    120         ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
    121     }
    122 
    123     // Used to construct new strings sharing an existing SharedUChar (BufferShared)
    124     StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
    125         : StringImplBase(length, BufferShared)
    126         , m_data(characters)
    127         , m_sharedBuffer(sharedBuffer.leakRef())
    128         , m_hash(0)
    129     {
    130         ASSERT(m_data);
    131         ASSERT(m_length);
    132     }
    133 
    134     // For use only by AtomicString's XXXTranslator helpers.
    135     void setHash(unsigned hash)
    136     {
    137         ASSERT(!isStatic());
    138         ASSERT(!m_hash);
    139         ASSERT(hash == StringHasher::computeHash(m_data, m_length));
    140         m_hash = hash;
    141     }
    142 
    143 public:
    144     ~StringImpl();
    145 
    146     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
    147     static PassRefPtr<StringImpl> create(const char*, unsigned length);
    148     static PassRefPtr<StringImpl> create(const char*);
    149     static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
    150     static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
    151     {
    152         ASSERT(rep);
    153         ASSERT(length <= rep->length());
    154 
    155         if (!length)
    156             return empty();
    157 
    158         StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
    159         return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
    160     }
    161 
    162     static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
    163     static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
    164     {
    165         if (!length) {
    166             output = 0;
    167             return empty();
    168         }
    169 
    170         if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
    171             output = 0;
    172             return 0;
    173         }
    174         StringImpl* resultImpl;
    175         if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
    176             output = 0;
    177             return 0;
    178         }
    179         output = reinterpret_cast<UChar*>(resultImpl + 1);
    180         return adoptRef(new(resultImpl) StringImpl(length));
    181     }
    182 
    183     static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
    184     static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
    185     static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
    186 
    187     template<size_t inlineCapacity>
    188     static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
    189     {
    190         if (size_t size = vector.size()) {
    191             ASSERT(vector.data());
    192             if (size > std::numeric_limits<unsigned>::max())
    193                 CRASH();
    194             return adoptRef(new StringImpl(vector.releaseBuffer(), size));
    195         }
    196         return empty();
    197     }
    198     static PassRefPtr<StringImpl> adopt(StringBuffer&);
    199 
    200     SharedUChar* sharedBuffer();
    201     const UChar* characters() const { return m_data; }
    202 
    203     size_t cost()
    204     {
    205         // For substrings, return the cost of the base string.
    206         if (bufferOwnership() == BufferSubstring)
    207             return m_substringBuffer->cost();
    208 
    209         if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
    210             m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
    211             return m_length;
    212         }
    213         return 0;
    214     }
    215 
    216     bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
    217     void setIsIdentifier(bool isIdentifier)
    218     {
    219         ASSERT(!isStatic());
    220         if (isIdentifier)
    221             m_refCountAndFlags |= s_refCountFlagIsIdentifier;
    222         else
    223             m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
    224     }
    225 
    226     bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
    227 
    228     bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
    229     void setIsAtomic(bool isIdentifier)
    230     {
    231         ASSERT(!isStatic());
    232         if (isIdentifier)
    233             m_refCountAndFlags |= s_refCountFlagIsAtomic;
    234         else
    235             m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
    236     }
    237 
    238     unsigned hash() const { if (!m_hash) m_hash = StringHasher::computeHash(m_data, m_length); return m_hash; }
    239     unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
    240 
    241     ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
    242     ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
    243 
    244     static StringImpl* empty();
    245 
    246     static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
    247     {
    248         if (numCharacters <= s_copyCharsInlineCutOff) {
    249             for (unsigned i = 0; i < numCharacters; ++i)
    250                 destination[i] = source[i];
    251         } else
    252             memcpy(destination, source, numCharacters * sizeof(UChar));
    253     }
    254 
    255     // Returns a StringImpl suitable for use on another thread.
    256     PassRefPtr<StringImpl> crossThreadString();
    257     // Makes a deep copy. Helpful only if you need to use a String on another thread
    258     // (use crossThreadString if the method call doesn't need to be threadsafe).
    259     // Since StringImpl objects are immutable, there's no other reason to make a copy.
    260     PassRefPtr<StringImpl> threadsafeCopy() const;
    261 
    262     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
    263 
    264     UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
    265     UChar32 characterStartingAt(unsigned);
    266 
    267     bool containsOnlyWhitespace();
    268 
    269     int toIntStrict(bool* ok = 0, int base = 10);
    270     unsigned toUIntStrict(bool* ok = 0, int base = 10);
    271     int64_t toInt64Strict(bool* ok = 0, int base = 10);
    272     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
    273     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
    274 
    275     int toInt(bool* ok = 0); // ignores trailing garbage
    276     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
    277     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
    278     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
    279     intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
    280 
    281     double toDouble(bool* ok = 0, bool* didReadNumber = 0);
    282     float toFloat(bool* ok = 0, bool* didReadNumber = 0);
    283 
    284     PassRefPtr<StringImpl> lower();
    285     PassRefPtr<StringImpl> upper();
    286 
    287     enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter };
    288 
    289     PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter);
    290     PassRefPtr<StringImpl> foldCase();
    291 
    292     PassRefPtr<StringImpl> stripWhiteSpace();
    293     PassRefPtr<StringImpl> simplifyWhiteSpace();
    294 
    295     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
    296 
    297     size_t find(UChar, unsigned index = 0);
    298     size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
    299     size_t find(const char*, unsigned index = 0);
    300     size_t find(StringImpl*, unsigned index = 0);
    301     size_t findIgnoringCase(const char*, unsigned index = 0);
    302     size_t findIgnoringCase(StringImpl*, unsigned index = 0);
    303 
    304     size_t reverseFind(UChar, unsigned index = UINT_MAX);
    305     size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
    306     size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
    307 
    308     bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
    309     bool endsWith(StringImpl*, bool caseSensitive = true);
    310 
    311     PassRefPtr<StringImpl> replace(UChar, UChar);
    312     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
    313     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
    314     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
    315 
    316     WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0);
    317 
    318 #if USE(CF)
    319     CFStringRef createCFString();
    320 #endif
    321 #ifdef __OBJC__
    322     operator NSString*();
    323 #endif
    324 
    325 private:
    326     // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
    327     static const unsigned s_copyCharsInlineCutOff = 20;
    328 
    329     static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
    330 
    331     BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
    332     bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
    333     const UChar* m_data;
    334     union {
    335         void* m_buffer;
    336         StringImpl* m_substringBuffer;
    337         SharedUChar* m_sharedBuffer;
    338     };
    339     mutable unsigned m_hash;
    340 };
    341 
    342 bool equal(const StringImpl*, const StringImpl*);
    343 bool equal(const StringImpl*, const char*);
    344 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
    345 
    346 bool equalIgnoringCase(StringImpl*, StringImpl*);
    347 bool equalIgnoringCase(StringImpl*, const char*);
    348 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
    349 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
    350 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
    351 
    352 bool equalIgnoringNullity(StringImpl*, StringImpl*);
    353 
    354 template<size_t inlineCapacity>
    355 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
    356 {
    357     if (!b)
    358         return !a.size();
    359     if (a.size() != b->length())
    360         return false;
    361     return !memcmp(a.data(), b->characters(), b->length());
    362 }
    363 
    364 int codePointCompare(const StringImpl*, const StringImpl*);
    365 
    366 static inline bool isSpaceOrNewline(UChar c)
    367 {
    368     // Use isASCIISpace() for basic Latin-1.
    369     // This will include newlines, which aren't included in Unicode DirWS.
    370     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
    371 }
    372 
    373 // This is a hot function because it's used when parsing HTML.
    374 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
    375 {
    376     ASSERT(characters);
    377     ASSERT(length);
    378 
    379     // Optimize for the case where there are no Null characters by quickly
    380     // searching for nulls, and then using StringImpl::create, which will
    381     // memcpy the whole buffer.  This is faster than assigning character by
    382     // character during the loop.
    383 
    384     // Fast case.
    385     int foundNull = 0;
    386     for (unsigned i = 0; !foundNull && i < length; i++) {
    387         int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
    388         foundNull |= !c;
    389     }
    390     if (!foundNull)
    391         return StringImpl::create(characters, length);
    392 
    393     return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
    394 }
    395 
    396 struct StringHash;
    397 
    398 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
    399 template<typename T> struct DefaultHash;
    400 template<> struct DefaultHash<StringImpl*> {
    401     typedef StringHash Hash;
    402 };
    403 template<> struct DefaultHash<RefPtr<StringImpl> > {
    404     typedef StringHash Hash;
    405 };
    406 
    407 }
    408 
    409 using WTF::StringImpl;
    410 using WTF::equal;
    411 using WTF::TextCaseSensitivity;
    412 using WTF::TextCaseSensitive;
    413 using WTF::TextCaseInsensitive;
    414 
    415 #endif
    416