1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com> 4 * Copyright (C) 2012 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #include "config.h" 24 #include "AtomicString.h" 25 26 #include "StringHash.h" 27 #include "wtf/HashSet.h" 28 #include "wtf/WTFThreadData.h" 29 #include "wtf/unicode/UTF8.h" 30 31 namespace WTF { 32 33 using namespace Unicode; 34 35 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); 36 37 class AtomicStringTable { 38 public: 39 static AtomicStringTable* create(WTFThreadData& data) 40 { 41 data.m_atomicStringTable = new AtomicStringTable; 42 data.m_atomicStringTableDestructor = AtomicStringTable::destroy; 43 return data.m_atomicStringTable; 44 } 45 46 HashSet<StringImpl*>& table() 47 { 48 return m_table; 49 } 50 51 private: 52 static void destroy(AtomicStringTable* table) 53 { 54 HashSet<StringImpl*>::iterator end = table->m_table.end(); 55 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) 56 (*iter)->setIsAtomic(false); 57 delete table; 58 } 59 60 HashSet<StringImpl*> m_table; 61 }; 62 63 static inline HashSet<StringImpl*>& stringTable() 64 { 65 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). 66 WTFThreadData& data = wtfThreadData(); 67 AtomicStringTable* table = data.atomicStringTable(); 68 if (UNLIKELY(!table)) 69 table = AtomicStringTable::create(data); 70 return table->table(); 71 } 72 73 template<typename T, typename HashTranslator> 74 static inline PassRefPtr<StringImpl> addToStringTable(const T& value) 75 { 76 HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value); 77 78 // If the string is newly-translated, then we need to adopt it. 79 // The boolean in the pair tells us if that is so. 80 return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator; 81 } 82 83 struct CStringTranslator { 84 static unsigned hash(const LChar* c) 85 { 86 return StringHasher::computeHashAndMaskTop8Bits(c); 87 } 88 89 static inline bool equal(StringImpl* r, const LChar* s) 90 { 91 return WTF::equal(r, s); 92 } 93 94 static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) 95 { 96 location = StringImpl::create(c).leakRef(); 97 location->setHash(hash); 98 location->setIsAtomic(true); 99 } 100 }; 101 102 PassRefPtr<StringImpl> AtomicString::add(const LChar* c) 103 { 104 if (!c) 105 return 0; 106 if (!*c) 107 return StringImpl::empty(); 108 109 return addToStringTable<const LChar*, CStringTranslator>(c); 110 } 111 112 template<typename CharacterType> 113 struct HashTranslatorCharBuffer { 114 const CharacterType* s; 115 unsigned length; 116 }; 117 118 typedef HashTranslatorCharBuffer<UChar> UCharBuffer; 119 struct UCharBufferTranslator { 120 static unsigned hash(const UCharBuffer& buf) 121 { 122 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 123 } 124 125 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 126 { 127 return WTF::equal(str, buf.s, buf.length); 128 } 129 130 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 131 { 132 location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); 133 location->setHash(hash); 134 location->setIsAtomic(true); 135 } 136 }; 137 138 template<typename CharacterType> 139 struct HashAndCharacters { 140 unsigned hash; 141 const CharacterType* characters; 142 unsigned length; 143 }; 144 145 template<typename CharacterType> 146 struct HashAndCharactersTranslator { 147 static unsigned hash(const HashAndCharacters<CharacterType>& buffer) 148 { 149 ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); 150 return buffer.hash; 151 } 152 153 static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) 154 { 155 return WTF::equal(string, buffer.characters, buffer.length); 156 } 157 158 static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) 159 { 160 location = StringImpl::create(buffer.characters, buffer.length).leakRef(); 161 location->setHash(hash); 162 location->setIsAtomic(true); 163 } 164 }; 165 166 struct HashAndUTF8Characters { 167 unsigned hash; 168 const char* characters; 169 unsigned length; 170 unsigned utf16Length; 171 }; 172 173 struct HashAndUTF8CharactersTranslator { 174 static unsigned hash(const HashAndUTF8Characters& buffer) 175 { 176 return buffer.hash; 177 } 178 179 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 180 { 181 if (buffer.utf16Length != string->length()) 182 return false; 183 184 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 185 if (buffer.utf16Length != buffer.length) { 186 if (string->is8Bit()) { 187 const LChar* characters8 = string->characters8(); 188 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length); 189 } 190 const UChar* characters16 = string->characters16(); 191 return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length); 192 } 193 194 if (string->is8Bit()) { 195 const LChar* stringCharacters = string->characters8(); 196 197 for (unsigned i = 0; i < buffer.length; ++i) { 198 ASSERT(isASCII(buffer.characters[i])); 199 if (stringCharacters[i] != buffer.characters[i]) 200 return false; 201 } 202 203 return true; 204 } 205 206 const UChar* stringCharacters = string->characters16(); 207 208 for (unsigned i = 0; i < buffer.length; ++i) { 209 ASSERT(isASCII(buffer.characters[i])); 210 if (stringCharacters[i] != buffer.characters[i]) 211 return false; 212 } 213 214 return true; 215 } 216 217 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 218 { 219 UChar* target; 220 RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target); 221 222 bool isAllASCII; 223 const char* source = buffer.characters; 224 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) 225 ASSERT_NOT_REACHED(); 226 227 if (isAllASCII) 228 newString = StringImpl::create(buffer.characters, buffer.length); 229 230 location = newString.release().leakRef(); 231 location->setHash(hash); 232 location->setIsAtomic(true); 233 } 234 }; 235 236 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 237 { 238 if (!s) 239 return 0; 240 241 if (!length) 242 return StringImpl::empty(); 243 244 UCharBuffer buffer = { s, length }; 245 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 246 } 247 248 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 249 { 250 ASSERT(s); 251 ASSERT(existingHash); 252 253 if (!length) 254 return StringImpl::empty(); 255 256 HashAndCharacters<UChar> buffer = { existingHash, s, length }; 257 return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer); 258 } 259 260 PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 261 { 262 if (!s) 263 return 0; 264 265 unsigned length = 0; 266 while (s[length] != UChar(0)) 267 ++length; 268 269 if (!length) 270 return StringImpl::empty(); 271 272 UCharBuffer buffer = { s, length }; 273 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 274 } 275 276 struct SubstringLocation { 277 StringImpl* baseString; 278 unsigned start; 279 unsigned length; 280 }; 281 282 struct SubstringTranslator { 283 static unsigned hash(const SubstringLocation& buffer) 284 { 285 if (buffer.baseString->is8Bit()) 286 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); 287 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); 288 } 289 290 static bool equal(StringImpl* const& string, const SubstringLocation& buffer) 291 { 292 if (buffer.baseString->is8Bit()) 293 return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); 294 return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); 295 } 296 297 static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) 298 { 299 location = buffer.baseString->substring(buffer.start, buffer.length).leakRef(); 300 location->setHash(hash); 301 location->setIsAtomic(true); 302 } 303 }; 304 305 PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length) 306 { 307 if (!baseString) 308 return 0; 309 310 if (!length || start >= baseString->length()) 311 return StringImpl::empty(); 312 313 unsigned maxLength = baseString->length() - start; 314 if (length >= maxLength) { 315 if (!start) 316 return add(baseString); 317 length = maxLength; 318 } 319 320 SubstringLocation buffer = { baseString, start, length }; 321 return addToStringTable<SubstringLocation, SubstringTranslator>(buffer); 322 } 323 324 typedef HashTranslatorCharBuffer<LChar> LCharBuffer; 325 struct LCharBufferTranslator { 326 static unsigned hash(const LCharBuffer& buf) 327 { 328 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 329 } 330 331 static bool equal(StringImpl* const& str, const LCharBuffer& buf) 332 { 333 return WTF::equal(str, buf.s, buf.length); 334 } 335 336 static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) 337 { 338 location = StringImpl::create(buf.s, buf.length).leakRef(); 339 location->setHash(hash); 340 location->setIsAtomic(true); 341 } 342 }; 343 344 typedef HashTranslatorCharBuffer<char> CharBuffer; 345 struct CharBufferFromLiteralDataTranslator { 346 static unsigned hash(const CharBuffer& buf) 347 { 348 return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); 349 } 350 351 static bool equal(StringImpl* const& str, const CharBuffer& buf) 352 { 353 return WTF::equal(str, buf.s, buf.length); 354 } 355 356 static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) 357 { 358 location = StringImpl::create(buf.s, buf.length).leakRef(); 359 location->setHash(hash); 360 location->setIsAtomic(true); 361 } 362 }; 363 364 PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length) 365 { 366 if (!s) 367 return 0; 368 369 if (!length) 370 return StringImpl::empty(); 371 372 LCharBuffer buffer = { s, length }; 373 return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); 374 } 375 376 PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length) 377 { 378 ASSERT(characters); 379 ASSERT(length); 380 381 CharBuffer buffer = { characters, length }; 382 return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); 383 } 384 385 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r) 386 { 387 if (!r->length()) 388 return StringImpl::empty(); 389 390 StringImpl* result = *stringTable().add(r).iterator; 391 if (result == r) 392 r->setIsAtomic(true); 393 ASSERT(!r->isStatic() || result->isStatic()); 394 return result; 395 } 396 397 template<typename CharacterType> 398 static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl) 399 { 400 HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() }; 401 return stringTable().find<HashAndCharactersTranslator<CharacterType> >(buffer); 402 } 403 404 StringImpl* AtomicString::find(const StringImpl* stringImpl) 405 { 406 ASSERT(stringImpl); 407 ASSERT(stringImpl->existingHash()); 408 409 if (!stringImpl->length()) 410 return StringImpl::empty(); 411 412 HashSet<StringImpl*>::iterator iterator; 413 if (stringImpl->is8Bit()) 414 iterator = findString<LChar>(stringImpl); 415 else 416 iterator = findString<UChar>(stringImpl); 417 if (iterator == stringTable().end()) 418 return 0; 419 return *iterator; 420 } 421 422 void AtomicString::remove(StringImpl* r) 423 { 424 HashSet<StringImpl*>::iterator iterator; 425 if (r->is8Bit()) 426 iterator = findString<LChar>(r); 427 else 428 iterator = findString<UChar>(r); 429 RELEASE_ASSERT(iterator != stringTable().end()); 430 stringTable().remove(iterator); 431 } 432 433 AtomicString AtomicString::lower() const 434 { 435 // Note: This is a hot function in the Dromaeo benchmark. 436 StringImpl* impl = this->impl(); 437 if (UNLIKELY(!impl)) 438 return *this; 439 RefPtr<StringImpl> newImpl = impl->lower(); 440 if (LIKELY(newImpl == impl)) 441 return *this; 442 return AtomicString(newImpl); 443 } 444 445 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) 446 { 447 HashAndUTF8Characters buffer; 448 buffer.characters = charactersStart; 449 buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); 450 451 if (!buffer.hash) 452 return nullAtom; 453 454 AtomicString atomicString; 455 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 456 return atomicString; 457 } 458 459 #ifndef NDEBUG 460 void AtomicString::show() const 461 { 462 m_string.show(); 463 } 464 #endif 465 466 } // namespace WTF 467