1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef StringImpl_h 24 #define StringImpl_h 25 26 #include <limits.h> 27 #include <wtf/ASCIICType.h> 28 #include <wtf/CrossThreadRefCounted.h> 29 #include <wtf/Forward.h> 30 #include <wtf/OwnFastMallocPtr.h> 31 #include <wtf/StdLibExtras.h> 32 #include <wtf/StringHasher.h> 33 #include <wtf/Vector.h> 34 #include <wtf/text/StringImplBase.h> 35 #include <wtf/unicode/Unicode.h> 36 37 #if USE(CF) 38 typedef const struct __CFString * CFStringRef; 39 #endif 40 41 #ifdef __OBJC__ 42 @class NSString; 43 #endif 44 45 // FIXME: This is a temporary layering violation while we move string code to WTF. 46 // Landing the file moves in one patch, will follow on with patches to change the namespaces. 47 namespace JSC { 48 struct IdentifierCStringTranslator; 49 struct IdentifierUCharBufferTranslator; 50 } 51 52 namespace WTF { 53 54 struct CStringTranslator; 55 struct HashAndCharactersTranslator; 56 struct HashAndUTF8CharactersTranslator; 57 struct UCharBufferTranslator; 58 59 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; 60 61 typedef OwnFastMallocPtr<const UChar> SharableUChar; 62 typedef CrossThreadRefCounted<SharableUChar> SharedUChar; 63 typedef bool (*CharacterMatchFunctionPtr)(UChar); 64 65 class StringImpl : public StringImplBase { 66 friend struct JSC::IdentifierCStringTranslator; 67 friend struct JSC::IdentifierUCharBufferTranslator; 68 friend struct WTF::CStringTranslator; 69 friend struct WTF::HashAndCharactersTranslator; 70 friend struct WTF::HashAndUTF8CharactersTranslator; 71 friend struct WTF::UCharBufferTranslator; 72 friend class AtomicStringImpl; 73 private: 74 // Used to construct static strings, which have an special refCount that can never hit zero. 75 // This means that the static string will never be destroyed, which is important because 76 // static strings will be shared across threads & ref-counted in a non-threadsafe manner. 77 StringImpl(const UChar* characters, unsigned length, StaticStringConstructType) 78 : StringImplBase(length, ConstructStaticString) 79 , m_data(characters) 80 , m_buffer(0) 81 , m_hash(0) 82 { 83 // Ensure that the hash is computed so that AtomicStringHash can call existingHash() 84 // with impunity. The empty string is special because it is never entered into 85 // AtomicString's HashKey, but still needs to compare correctly. 86 hash(); 87 } 88 89 // Create a normal string with internal storage (BufferInternal) 90 StringImpl(unsigned length) 91 : StringImplBase(length, BufferInternal) 92 , m_data(reinterpret_cast<const UChar*>(this + 1)) 93 , m_buffer(0) 94 , m_hash(0) 95 { 96 ASSERT(m_data); 97 ASSERT(m_length); 98 } 99 100 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) 101 StringImpl(const UChar* characters, unsigned length) 102 : StringImplBase(length, BufferOwned) 103 , m_data(characters) 104 , m_buffer(0) 105 , m_hash(0) 106 { 107 ASSERT(m_data); 108 ASSERT(m_length); 109 } 110 111 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring) 112 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) 113 : StringImplBase(length, BufferSubstring) 114 , m_data(characters) 115 , m_substringBuffer(base.leakRef()) 116 , m_hash(0) 117 { 118 ASSERT(m_data); 119 ASSERT(m_length); 120 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); 121 } 122 123 // Used to construct new strings sharing an existing SharedUChar (BufferShared) 124 StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) 125 : StringImplBase(length, BufferShared) 126 , m_data(characters) 127 , m_sharedBuffer(sharedBuffer.leakRef()) 128 , m_hash(0) 129 { 130 ASSERT(m_data); 131 ASSERT(m_length); 132 } 133 134 // For use only by AtomicString's XXXTranslator helpers. 135 void setHash(unsigned hash) 136 { 137 ASSERT(!isStatic()); 138 ASSERT(!m_hash); 139 ASSERT(hash == StringHasher::computeHash(m_data, m_length)); 140 m_hash = hash; 141 } 142 143 public: 144 ~StringImpl(); 145 146 static PassRefPtr<StringImpl> create(const UChar*, unsigned length); 147 static PassRefPtr<StringImpl> create(const char*, unsigned length); 148 static PassRefPtr<StringImpl> create(const char*); 149 static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer); 150 static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) 151 { 152 ASSERT(rep); 153 ASSERT(length <= rep->length()); 154 155 if (!length) 156 return empty(); 157 158 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); 159 return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep)); 160 } 161 162 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); 163 static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output) 164 { 165 if (!length) { 166 output = 0; 167 return empty(); 168 } 169 170 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) { 171 output = 0; 172 return 0; 173 } 174 StringImpl* resultImpl; 175 if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) { 176 output = 0; 177 return 0; 178 } 179 output = reinterpret_cast<UChar*>(resultImpl + 1); 180 return adoptRef(new(resultImpl) StringImpl(length)); 181 } 182 183 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); } 184 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&); 185 static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length); 186 187 template<size_t inlineCapacity> 188 static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector) 189 { 190 if (size_t size = vector.size()) { 191 ASSERT(vector.data()); 192 if (size > std::numeric_limits<unsigned>::max()) 193 CRASH(); 194 return adoptRef(new StringImpl(vector.releaseBuffer(), size)); 195 } 196 return empty(); 197 } 198 static PassRefPtr<StringImpl> adopt(StringBuffer&); 199 200 SharedUChar* sharedBuffer(); 201 const UChar* characters() const { return m_data; } 202 203 size_t cost() 204 { 205 // For substrings, return the cost of the base string. 206 if (bufferOwnership() == BufferSubstring) 207 return m_substringBuffer->cost(); 208 209 if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) { 210 m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost; 211 return m_length; 212 } 213 return 0; 214 } 215 216 bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; } 217 void setIsIdentifier(bool isIdentifier) 218 { 219 ASSERT(!isStatic()); 220 if (isIdentifier) 221 m_refCountAndFlags |= s_refCountFlagIsIdentifier; 222 else 223 m_refCountAndFlags &= ~s_refCountFlagIsIdentifier; 224 } 225 226 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; } 227 228 bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; } 229 void setIsAtomic(bool isIdentifier) 230 { 231 ASSERT(!isStatic()); 232 if (isIdentifier) 233 m_refCountAndFlags |= s_refCountFlagIsAtomic; 234 else 235 m_refCountAndFlags &= ~s_refCountFlagIsAtomic; 236 } 237 238 unsigned hash() const { if (!m_hash) m_hash = StringHasher::computeHash(m_data, m_length); return m_hash; } 239 unsigned existingHash() const { ASSERT(m_hash); return m_hash; } 240 241 ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; } 242 ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; } 243 244 static StringImpl* empty(); 245 246 static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) 247 { 248 if (numCharacters <= s_copyCharsInlineCutOff) { 249 for (unsigned i = 0; i < numCharacters; ++i) 250 destination[i] = source[i]; 251 } else 252 memcpy(destination, source, numCharacters * sizeof(UChar)); 253 } 254 255 // Returns a StringImpl suitable for use on another thread. 256 PassRefPtr<StringImpl> crossThreadString(); 257 // Makes a deep copy. Helpful only if you need to use a String on another thread 258 // (use crossThreadString if the method call doesn't need to be threadsafe). 259 // Since StringImpl objects are immutable, there's no other reason to make a copy. 260 PassRefPtr<StringImpl> threadsafeCopy() const; 261 262 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); 263 264 UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } 265 UChar32 characterStartingAt(unsigned); 266 267 bool containsOnlyWhitespace(); 268 269 int toIntStrict(bool* ok = 0, int base = 10); 270 unsigned toUIntStrict(bool* ok = 0, int base = 10); 271 int64_t toInt64Strict(bool* ok = 0, int base = 10); 272 uint64_t toUInt64Strict(bool* ok = 0, int base = 10); 273 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); 274 275 int toInt(bool* ok = 0); // ignores trailing garbage 276 unsigned toUInt(bool* ok = 0); // ignores trailing garbage 277 int64_t toInt64(bool* ok = 0); // ignores trailing garbage 278 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage 279 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage 280 281 double toDouble(bool* ok = 0, bool* didReadNumber = 0); 282 float toFloat(bool* ok = 0, bool* didReadNumber = 0); 283 284 PassRefPtr<StringImpl> lower(); 285 PassRefPtr<StringImpl> upper(); 286 287 enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter }; 288 289 PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter); 290 PassRefPtr<StringImpl> foldCase(); 291 292 PassRefPtr<StringImpl> stripWhiteSpace(); 293 PassRefPtr<StringImpl> simplifyWhiteSpace(); 294 295 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); 296 297 size_t find(UChar, unsigned index = 0); 298 size_t find(CharacterMatchFunctionPtr, unsigned index = 0); 299 size_t find(const char*, unsigned index = 0); 300 size_t find(StringImpl*, unsigned index = 0); 301 size_t findIgnoringCase(const char*, unsigned index = 0); 302 size_t findIgnoringCase(StringImpl*, unsigned index = 0); 303 304 size_t reverseFind(UChar, unsigned index = UINT_MAX); 305 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); 306 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); 307 308 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } 309 bool endsWith(StringImpl*, bool caseSensitive = true); 310 311 PassRefPtr<StringImpl> replace(UChar, UChar); 312 PassRefPtr<StringImpl> replace(UChar, StringImpl*); 313 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); 314 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); 315 316 WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0); 317 318 #if USE(CF) 319 CFStringRef createCFString(); 320 #endif 321 #ifdef __OBJC__ 322 operator NSString*(); 323 #endif 324 325 private: 326 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. 327 static const unsigned s_copyCharsInlineCutOff = 20; 328 329 static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length); 330 331 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); } 332 bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; } 333 const UChar* m_data; 334 union { 335 void* m_buffer; 336 StringImpl* m_substringBuffer; 337 SharedUChar* m_sharedBuffer; 338 }; 339 mutable unsigned m_hash; 340 }; 341 342 bool equal(const StringImpl*, const StringImpl*); 343 bool equal(const StringImpl*, const char*); 344 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); } 345 346 bool equalIgnoringCase(StringImpl*, StringImpl*); 347 bool equalIgnoringCase(StringImpl*, const char*); 348 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); } 349 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length); 350 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } 351 352 bool equalIgnoringNullity(StringImpl*, StringImpl*); 353 354 template<size_t inlineCapacity> 355 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b) 356 { 357 if (!b) 358 return !a.size(); 359 if (a.size() != b->length()) 360 return false; 361 return !memcmp(a.data(), b->characters(), b->length()); 362 } 363 364 int codePointCompare(const StringImpl*, const StringImpl*); 365 366 static inline bool isSpaceOrNewline(UChar c) 367 { 368 // Use isASCIISpace() for basic Latin-1. 369 // This will include newlines, which aren't included in Unicode DirWS. 370 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; 371 } 372 373 // This is a hot function because it's used when parsing HTML. 374 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length) 375 { 376 ASSERT(characters); 377 ASSERT(length); 378 379 // Optimize for the case where there are no Null characters by quickly 380 // searching for nulls, and then using StringImpl::create, which will 381 // memcpy the whole buffer. This is faster than assigning character by 382 // character during the loop. 383 384 // Fast case. 385 int foundNull = 0; 386 for (unsigned i = 0; !foundNull && i < length; i++) { 387 int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) 388 foundNull |= !c; 389 } 390 if (!foundNull) 391 return StringImpl::create(characters, length); 392 393 return StringImpl::createStrippingNullCharactersSlowCase(characters, length); 394 } 395 396 struct StringHash; 397 398 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl> 399 template<typename T> struct DefaultHash; 400 template<> struct DefaultHash<StringImpl*> { 401 typedef StringHash Hash; 402 }; 403 template<> struct DefaultHash<RefPtr<StringImpl> > { 404 typedef StringHash Hash; 405 }; 406 407 } 408 409 using WTF::StringImpl; 410 using WTF::equal; 411 using WTF::TextCaseSensitivity; 412 using WTF::TextCaseSensitive; 413 using WTF::TextCaseInsensitive; 414 415 #endif 416