Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      4  * Copyright (C) 2009 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef StringImpl_h
     24 #define StringImpl_h
     25 
     26 #include <limits.h>
     27 #include <wtf/ASCIICType.h>
     28 #include <wtf/CrossThreadRefCounted.h>
     29 #include <wtf/OwnFastMallocPtr.h>
     30 #include <wtf/PtrAndFlags.h>
     31 #include <wtf/RefCounted.h>
     32 #include <wtf/StringHashFunctions.h>
     33 #include <wtf/Vector.h>
     34 #include <wtf/unicode/Unicode.h>
     35 
     36 #if PLATFORM(CF)
     37 typedef const struct __CFString * CFStringRef;
     38 #endif
     39 
     40 #ifdef __OBJC__
     41 @class NSString;
     42 #endif
     43 
     44 namespace JSC {
     45 class UString;
     46 }
     47 
     48 namespace WebCore {
     49 
     50 class StringBuffer;
     51 
     52 struct CStringTranslator;
     53 struct HashAndCharactersTranslator;
     54 struct StringHash;
     55 struct UCharBufferTranslator;
     56 
     57 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
     58 
     59 typedef bool (*CharacterMatchFunctionPtr)(UChar);
     60 
     61 class StringImpl : public RefCounted<StringImpl> {
     62     friend struct CStringTranslator;
     63     friend struct HashAndCharactersTranslator;
     64     friend struct UCharBufferTranslator;
     65 private:
     66     friend class ThreadGlobalData;
     67     StringImpl();
     68 
     69     // This constructor adopts the UChar* without copying the buffer.
     70     StringImpl(const UChar*, unsigned length);
     71 
     72     // This constructor assumes that 'this' was allocated with a UChar buffer of size 'length' at the end.
     73     StringImpl(unsigned length);
     74 
     75     // For use only by AtomicString's XXXTranslator helpers.
     76     void setHash(unsigned hash) { ASSERT(!m_hash); m_hash = hash; }
     77 
     78     typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar;
     79 
     80 public:
     81     ~StringImpl();
     82 
     83     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
     84     static PassRefPtr<StringImpl> create(const char*, unsigned length);
     85     static PassRefPtr<StringImpl> create(const char*);
     86     static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
     87 
     88     static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
     89 
     90     static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
     91     static PassRefPtr<StringImpl> adopt(StringBuffer&);
     92     static PassRefPtr<StringImpl> adopt(Vector<UChar>&);
     93 #if USE(JSC)
     94     static PassRefPtr<StringImpl> create(const JSC::UString&);
     95     JSC::UString ustring();
     96 #endif
     97 
     98     SharedUChar* sharedBuffer();
     99     const UChar* characters() { return m_data; }
    100     unsigned length() { return m_length; }
    101 
    102     bool hasTerminatingNullCharacter() const { return m_sharedBufferAndFlags.isFlagSet(HasTerminatingNullCharacter); }
    103 
    104     bool inTable() const { return m_sharedBufferAndFlags.isFlagSet(InTable); }
    105     void setInTable() { return m_sharedBufferAndFlags.setFlag(InTable); }
    106 
    107     unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
    108     unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
    109     inline static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); }
    110     inline static unsigned computeHash(const char* data) { return WTF::stringHash(data); }
    111 
    112     // Returns a StringImpl suitable for use on another thread.
    113     PassRefPtr<StringImpl> crossThreadString();
    114     // Makes a deep copy. Helpful only if you need to use a String on another thread
    115     // (use crossThreadString if the method call doesn't need to be threadsafe).
    116     // Since StringImpl objects are immutable, there's no other reason to make a copy.
    117     PassRefPtr<StringImpl> threadsafeCopy() const;
    118 
    119     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
    120 
    121     UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
    122     UChar32 characterStartingAt(unsigned);
    123 
    124     bool containsOnlyWhitespace();
    125 
    126     int toIntStrict(bool* ok = 0, int base = 10);
    127     unsigned toUIntStrict(bool* ok = 0, int base = 10);
    128     int64_t toInt64Strict(bool* ok = 0, int base = 10);
    129     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
    130     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
    131 
    132     int toInt(bool* ok = 0); // ignores trailing garbage
    133     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
    134     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
    135     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
    136     intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
    137 
    138     double toDouble(bool* ok = 0);
    139     float toFloat(bool* ok = 0);
    140 
    141     PassRefPtr<StringImpl> lower();
    142     PassRefPtr<StringImpl> upper();
    143     PassRefPtr<StringImpl> secure(UChar aChar);
    144     PassRefPtr<StringImpl> capitalize(UChar previousCharacter);
    145     PassRefPtr<StringImpl> foldCase();
    146 
    147     PassRefPtr<StringImpl> stripWhiteSpace();
    148     PassRefPtr<StringImpl> simplifyWhiteSpace();
    149 
    150     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
    151 
    152     int find(const char*, int index = 0, bool caseSensitive = true);
    153     int find(UChar, int index = 0);
    154     int find(CharacterMatchFunctionPtr, int index = 0);
    155     int find(StringImpl*, int index, bool caseSensitive = true);
    156 
    157     int reverseFind(UChar, int index);
    158     int reverseFind(StringImpl*, int index, bool caseSensitive = true);
    159 
    160     bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
    161     bool endsWith(StringImpl*, bool caseSensitive = true);
    162 
    163     PassRefPtr<StringImpl> replace(UChar, UChar);
    164     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
    165     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
    166     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
    167 
    168     static StringImpl* empty();
    169 
    170     Vector<char> ascii();
    171 
    172     WTF::Unicode::Direction defaultWritingDirection();
    173 
    174 #if PLATFORM(CF)
    175     CFStringRef createCFString();
    176 #endif
    177 #ifdef __OBJC__
    178     operator NSString*();
    179 #endif
    180 
    181     void operator delete(void*);
    182 
    183 private:
    184     // Allocation from a custom buffer is only allowed internally to avoid
    185     // mismatched allocators. Callers should use create().
    186     void* operator new(size_t size);
    187     void* operator new(size_t size, void* address);
    188 
    189     static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
    190 
    191     // The StringImpl struct and its data may be allocated within a single heap block.
    192     // In this case, the m_data pointer is an "internal buffer", and does not need to be deallocated.
    193     bool bufferIsInternal() { return m_data == reinterpret_cast<const UChar*>(this + 1); }
    194 
    195     enum StringImplFlags {
    196         HasTerminatingNullCharacter,
    197         InTable,
    198     };
    199 
    200     const UChar* m_data;
    201     unsigned m_length;
    202     mutable unsigned m_hash;
    203     PtrAndFlags<SharedUChar, StringImplFlags> m_sharedBufferAndFlags;
    204     // There is a fictitious variable-length UChar array at the end, which is used
    205     // as the internal buffer by the createUninitialized and create methods.
    206 };
    207 
    208 bool equal(StringImpl*, StringImpl*);
    209 bool equal(StringImpl*, const char*);
    210 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
    211 
    212 bool equalIgnoringCase(StringImpl*, StringImpl*);
    213 bool equalIgnoringCase(StringImpl*, const char*);
    214 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
    215 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
    216 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
    217 
    218 bool equalIgnoringNullity(StringImpl*, StringImpl*);
    219 
    220 static inline bool isSpaceOrNewline(UChar c)
    221 {
    222     // Use isASCIISpace() for basic Latin-1.
    223     // This will include newlines, which aren't included in Unicode DirWS.
    224     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
    225 }
    226 
    227 // This is a hot function because it's used when parsing HTML.
    228 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
    229 {
    230     ASSERT(characters);
    231     ASSERT(length);
    232 
    233     // Optimize for the case where there are no Null characters by quickly
    234     // searching for nulls, and then using StringImpl::create, which will
    235     // memcpy the whole buffer.  This is faster than assigning character by
    236     // character during the loop.
    237 
    238     // Fast case.
    239     int foundNull = 0;
    240     for (unsigned i = 0; !foundNull && i < length; i++) {
    241         int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
    242         foundNull |= !c;
    243     }
    244     if (!foundNull)
    245         return StringImpl::create(characters, length);
    246 
    247     return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
    248 }
    249 
    250 }
    251 
    252 namespace WTF {
    253 
    254     // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
    255     template<typename T> struct DefaultHash;
    256     template<> struct DefaultHash<WebCore::StringImpl*> {
    257         typedef WebCore::StringHash Hash;
    258     };
    259     template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
    260         typedef WebCore::StringHash Hash;
    261     };
    262 
    263 }
    264 
    265 #endif
    266