1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef StringImpl_h 24 #define StringImpl_h 25 26 #include <limits.h> 27 #include <wtf/ASCIICType.h> 28 #include <wtf/CrossThreadRefCounted.h> 29 #include <wtf/OwnFastMallocPtr.h> 30 #include <wtf/PtrAndFlags.h> 31 #include <wtf/RefCounted.h> 32 #include <wtf/StringHashFunctions.h> 33 #include <wtf/Vector.h> 34 #include <wtf/unicode/Unicode.h> 35 36 #if PLATFORM(CF) 37 typedef const struct __CFString * CFStringRef; 38 #endif 39 40 #ifdef __OBJC__ 41 @class NSString; 42 #endif 43 44 namespace JSC { 45 class UString; 46 } 47 48 namespace WebCore { 49 50 class StringBuffer; 51 52 struct CStringTranslator; 53 struct HashAndCharactersTranslator; 54 struct StringHash; 55 struct UCharBufferTranslator; 56 57 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; 58 59 typedef bool (*CharacterMatchFunctionPtr)(UChar); 60 61 class StringImpl : public RefCounted<StringImpl> { 62 friend struct CStringTranslator; 63 friend struct HashAndCharactersTranslator; 64 friend struct UCharBufferTranslator; 65 private: 66 friend class ThreadGlobalData; 67 StringImpl(); 68 69 // This constructor adopts the UChar* without copying the buffer. 70 StringImpl(const UChar*, unsigned length); 71 72 // This constructor assumes that 'this' was allocated with a UChar buffer of size 'length' at the end. 73 StringImpl(unsigned length); 74 75 // For use only by AtomicString's XXXTranslator helpers. 76 void setHash(unsigned hash) { ASSERT(!m_hash); m_hash = hash; } 77 78 typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar; 79 80 public: 81 ~StringImpl(); 82 83 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); 84 static PassRefPtr<StringImpl> create(const char*, unsigned length); 85 static PassRefPtr<StringImpl> create(const char*); 86 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); 87 88 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&); 89 90 static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length); 91 static PassRefPtr<StringImpl> adopt(StringBuffer&); 92 static PassRefPtr<StringImpl> adopt(Vector<UChar>&); 93 #if USE(JSC) 94 static PassRefPtr<StringImpl> create(const JSC::UString&); 95 JSC::UString ustring(); 96 #endif 97 98 SharedUChar* sharedBuffer(); 99 const UChar* characters() { return m_data; } 100 unsigned length() { return m_length; } 101 102 bool hasTerminatingNullCharacter() const { return m_sharedBufferAndFlags.isFlagSet(HasTerminatingNullCharacter); } 103 104 bool inTable() const { return m_sharedBufferAndFlags.isFlagSet(InTable); } 105 void setInTable() { return m_sharedBufferAndFlags.setFlag(InTable); } 106 107 unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; } 108 unsigned existingHash() const { ASSERT(m_hash); return m_hash; } 109 inline static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); } 110 inline static unsigned computeHash(const char* data) { return WTF::stringHash(data); } 111 112 // Returns a StringImpl suitable for use on another thread. 113 PassRefPtr<StringImpl> crossThreadString(); 114 // Makes a deep copy. Helpful only if you need to use a String on another thread 115 // (use crossThreadString if the method call doesn't need to be threadsafe). 116 // Since StringImpl objects are immutable, there's no other reason to make a copy. 117 PassRefPtr<StringImpl> threadsafeCopy() const; 118 119 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); 120 121 UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } 122 UChar32 characterStartingAt(unsigned); 123 124 bool containsOnlyWhitespace(); 125 126 int toIntStrict(bool* ok = 0, int base = 10); 127 unsigned toUIntStrict(bool* ok = 0, int base = 10); 128 int64_t toInt64Strict(bool* ok = 0, int base = 10); 129 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); 130 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); 131 132 int toInt(bool* ok = 0); // ignores trailing garbage 133 unsigned toUInt(bool* ok = 0); // ignores trailing garbage 134 int64_t toInt64(bool* ok = 0); // ignores trailing garbage 135 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage 136 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage 137 138 double toDouble(bool* ok = 0); 139 float toFloat(bool* ok = 0); 140 141 PassRefPtr<StringImpl> lower(); 142 PassRefPtr<StringImpl> upper(); 143 PassRefPtr<StringImpl> secure(UChar aChar); 144 PassRefPtr<StringImpl> capitalize(UChar previousCharacter); 145 PassRefPtr<StringImpl> foldCase(); 146 147 PassRefPtr<StringImpl> stripWhiteSpace(); 148 PassRefPtr<StringImpl> simplifyWhiteSpace(); 149 150 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); 151 152 int find(const char*, int index = 0, bool caseSensitive = true); 153 int find(UChar, int index = 0); 154 int find(CharacterMatchFunctionPtr, int index = 0); 155 int find(StringImpl*, int index, bool caseSensitive = true); 156 157 int reverseFind(UChar, int index); 158 int reverseFind(StringImpl*, int index, bool caseSensitive = true); 159 160 bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; } 161 bool endsWith(StringImpl*, bool caseSensitive = true); 162 163 PassRefPtr<StringImpl> replace(UChar, UChar); 164 PassRefPtr<StringImpl> replace(UChar, StringImpl*); 165 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); 166 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); 167 168 static StringImpl* empty(); 169 170 Vector<char> ascii(); 171 172 WTF::Unicode::Direction defaultWritingDirection(); 173 174 #if PLATFORM(CF) 175 CFStringRef createCFString(); 176 #endif 177 #ifdef __OBJC__ 178 operator NSString*(); 179 #endif 180 181 void operator delete(void*); 182 183 private: 184 // Allocation from a custom buffer is only allowed internally to avoid 185 // mismatched allocators. Callers should use create(). 186 void* operator new(size_t size); 187 void* operator new(size_t size, void* address); 188 189 static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length); 190 191 // The StringImpl struct and its data may be allocated within a single heap block. 192 // In this case, the m_data pointer is an "internal buffer", and does not need to be deallocated. 193 bool bufferIsInternal() { return m_data == reinterpret_cast<const UChar*>(this + 1); } 194 195 enum StringImplFlags { 196 HasTerminatingNullCharacter, 197 InTable, 198 }; 199 200 const UChar* m_data; 201 unsigned m_length; 202 mutable unsigned m_hash; 203 PtrAndFlags<SharedUChar, StringImplFlags> m_sharedBufferAndFlags; 204 // There is a fictitious variable-length UChar array at the end, which is used 205 // as the internal buffer by the createUninitialized and create methods. 206 }; 207 208 bool equal(StringImpl*, StringImpl*); 209 bool equal(StringImpl*, const char*); 210 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); } 211 212 bool equalIgnoringCase(StringImpl*, StringImpl*); 213 bool equalIgnoringCase(StringImpl*, const char*); 214 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); } 215 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length); 216 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 217 218 bool equalIgnoringNullity(StringImpl*, StringImpl*); 219 220 static inline bool isSpaceOrNewline(UChar c) 221 { 222 // Use isASCIISpace() for basic Latin-1. 223 // This will include newlines, which aren't included in Unicode DirWS. 224 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; 225 } 226 227 // This is a hot function because it's used when parsing HTML. 228 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length) 229 { 230 ASSERT(characters); 231 ASSERT(length); 232 233 // Optimize for the case where there are no Null characters by quickly 234 // searching for nulls, and then using StringImpl::create, which will 235 // memcpy the whole buffer. This is faster than assigning character by 236 // character during the loop. 237 238 // Fast case. 239 int foundNull = 0; 240 for (unsigned i = 0; !foundNull && i < length; i++) { 241 int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) 242 foundNull |= !c; 243 } 244 if (!foundNull) 245 return StringImpl::create(characters, length); 246 247 return StringImpl::createStrippingNullCharactersSlowCase(characters, length); 248 } 249 250 } 251 252 namespace WTF { 253 254 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl> 255 template<typename T> struct DefaultHash; 256 template<> struct DefaultHash<WebCore::StringImpl*> { 257 typedef WebCore::StringHash Hash; 258 }; 259 template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > { 260 typedef WebCore::StringHash Hash; 261 }; 262 263 } 264 265 #endif 266