1 /* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com> 4 * Copyright (C) 2012 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #include "config.h" 24 #include "AtomicString.h" 25 26 #include "StringHash.h" 27 #include "wtf/HashSet.h" 28 #include "wtf/WTFThreadData.h" 29 #include "wtf/dtoa.h" 30 #include "wtf/text/IntegerToStringConversion.h" 31 #include "wtf/unicode/UTF8.h" 32 33 namespace WTF { 34 35 using namespace Unicode; 36 37 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); 38 39 class AtomicStringTable { 40 WTF_MAKE_NONCOPYABLE(AtomicStringTable); 41 public: 42 static AtomicStringTable* create(WTFThreadData& data) 43 { 44 data.m_atomicStringTable = new AtomicStringTable; 45 data.m_atomicStringTableDestructor = AtomicStringTable::destroy; 46 data.m_atomicStringTable->addStaticStrings(); 47 return data.m_atomicStringTable; 48 } 49 50 StringImpl* addStringImpl(StringImpl* string) 51 { 52 if (!string->length()) 53 return StringImpl::empty(); 54 55 StringImpl* result = *m_table.add(string).storedValue; 56 57 if (!result->isAtomic()) 58 result->setIsAtomic(true); 59 60 ASSERT(!string->isStatic() || result->isStatic()); 61 return result; 62 } 63 64 HashSet<StringImpl*>& table() 65 { 66 return m_table; 67 } 68 69 private: 70 AtomicStringTable() { } 71 72 void addStaticStrings() 73 { 74 const StaticStringsTable& staticStrings = StringImpl::allStaticStrings(); 75 76 StaticStringsTable::const_iterator it = staticStrings.begin(); 77 for (; it != staticStrings.end(); ++it) { 78 addStringImpl(it->value); 79 } 80 } 81 82 static void destroy(AtomicStringTable* table) 83 { 84 HashSet<StringImpl*>::iterator end = table->m_table.end(); 85 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) { 86 StringImpl* string = *iter; 87 if (!string->isStatic()) { 88 ASSERT(string->isAtomic()); 89 string->setIsAtomic(false); 90 } 91 } 92 delete table; 93 } 94 95 HashSet<StringImpl*> m_table; 96 }; 97 98 static inline AtomicStringTable& atomicStringTable() 99 { 100 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). 101 WTFThreadData& data = wtfThreadData(); 102 AtomicStringTable* table = data.atomicStringTable(); 103 if (UNLIKELY(!table)) 104 table = AtomicStringTable::create(data); 105 return *table; 106 } 107 108 static inline HashSet<StringImpl*>& atomicStrings() 109 { 110 return atomicStringTable().table(); 111 } 112 113 template<typename T, typename HashTranslator> 114 static inline PassRefPtr<StringImpl> addToStringTable(const T& value) 115 { 116 HashSet<StringImpl*>::AddResult addResult = atomicStrings().add<HashTranslator>(value); 117 118 // If the string is newly-translated, then we need to adopt it. 119 // The boolean in the pair tells us if that is so. 120 return addResult.isNewEntry ? adoptRef(*addResult.storedValue) : *addResult.storedValue; 121 } 122 123 PassRefPtr<StringImpl> AtomicString::add(const LChar* c) 124 { 125 if (!c) 126 return nullptr; 127 if (!*c) 128 return StringImpl::empty(); 129 130 return add(c, strlen(reinterpret_cast<const char*>(c))); 131 } 132 133 template<typename CharacterType> 134 struct HashTranslatorCharBuffer { 135 const CharacterType* s; 136 unsigned length; 137 }; 138 139 typedef HashTranslatorCharBuffer<UChar> UCharBuffer; 140 struct UCharBufferTranslator { 141 static unsigned hash(const UCharBuffer& buf) 142 { 143 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 144 } 145 146 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 147 { 148 return WTF::equal(str, buf.s, buf.length); 149 } 150 151 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 152 { 153 location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); 154 location->setHash(hash); 155 location->setIsAtomic(true); 156 } 157 }; 158 159 template<typename CharacterType> 160 struct HashAndCharacters { 161 unsigned hash; 162 const CharacterType* characters; 163 unsigned length; 164 }; 165 166 template<typename CharacterType> 167 struct HashAndCharactersTranslator { 168 static unsigned hash(const HashAndCharacters<CharacterType>& buffer) 169 { 170 ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); 171 return buffer.hash; 172 } 173 174 static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) 175 { 176 return WTF::equal(string, buffer.characters, buffer.length); 177 } 178 179 static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) 180 { 181 location = StringImpl::create(buffer.characters, buffer.length).leakRef(); 182 location->setHash(hash); 183 location->setIsAtomic(true); 184 } 185 }; 186 187 struct HashAndUTF8Characters { 188 unsigned hash; 189 const char* characters; 190 unsigned length; 191 unsigned utf16Length; 192 }; 193 194 struct HashAndUTF8CharactersTranslator { 195 static unsigned hash(const HashAndUTF8Characters& buffer) 196 { 197 return buffer.hash; 198 } 199 200 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 201 { 202 if (buffer.utf16Length != string->length()) 203 return false; 204 205 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 206 if (buffer.utf16Length != buffer.length) { 207 if (string->is8Bit()) { 208 const LChar* characters8 = string->characters8(); 209 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length); 210 } 211 const UChar* characters16 = string->characters16(); 212 return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length); 213 } 214 215 if (string->is8Bit()) { 216 const LChar* stringCharacters = string->characters8(); 217 218 for (unsigned i = 0; i < buffer.length; ++i) { 219 ASSERT(isASCII(buffer.characters[i])); 220 if (stringCharacters[i] != buffer.characters[i]) 221 return false; 222 } 223 224 return true; 225 } 226 227 const UChar* stringCharacters = string->characters16(); 228 229 for (unsigned i = 0; i < buffer.length; ++i) { 230 ASSERT(isASCII(buffer.characters[i])); 231 if (stringCharacters[i] != buffer.characters[i]) 232 return false; 233 } 234 235 return true; 236 } 237 238 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 239 { 240 UChar* target; 241 RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target); 242 243 bool isAllASCII; 244 const char* source = buffer.characters; 245 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) 246 ASSERT_NOT_REACHED(); 247 248 if (isAllASCII) 249 newString = StringImpl::create(buffer.characters, buffer.length); 250 251 location = newString.release().leakRef(); 252 location->setHash(hash); 253 location->setIsAtomic(true); 254 } 255 }; 256 257 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 258 { 259 if (!s) 260 return nullptr; 261 262 if (!length) 263 return StringImpl::empty(); 264 265 UCharBuffer buffer = { s, length }; 266 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 267 } 268 269 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 270 { 271 ASSERT(s); 272 ASSERT(existingHash); 273 274 if (!length) 275 return StringImpl::empty(); 276 277 HashAndCharacters<UChar> buffer = { existingHash, s, length }; 278 return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer); 279 } 280 281 PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 282 { 283 if (!s) 284 return nullptr; 285 286 unsigned length = 0; 287 while (s[length] != UChar(0)) 288 ++length; 289 290 if (!length) 291 return StringImpl::empty(); 292 293 UCharBuffer buffer = { s, length }; 294 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 295 } 296 297 struct SubstringLocation { 298 StringImpl* baseString; 299 unsigned start; 300 unsigned length; 301 }; 302 303 struct SubstringTranslator { 304 static unsigned hash(const SubstringLocation& buffer) 305 { 306 if (buffer.baseString->is8Bit()) 307 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); 308 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); 309 } 310 311 static bool equal(StringImpl* const& string, const SubstringLocation& buffer) 312 { 313 if (buffer.baseString->is8Bit()) 314 return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); 315 return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); 316 } 317 318 static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) 319 { 320 location = buffer.baseString->substring(buffer.start, buffer.length).leakRef(); 321 location->setHash(hash); 322 location->setIsAtomic(true); 323 } 324 }; 325 326 PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length) 327 { 328 if (!baseString) 329 return nullptr; 330 331 if (!length || start >= baseString->length()) 332 return StringImpl::empty(); 333 334 unsigned maxLength = baseString->length() - start; 335 if (length >= maxLength) { 336 if (!start) 337 return add(baseString); 338 length = maxLength; 339 } 340 341 SubstringLocation buffer = { baseString, start, length }; 342 return addToStringTable<SubstringLocation, SubstringTranslator>(buffer); 343 } 344 345 typedef HashTranslatorCharBuffer<LChar> LCharBuffer; 346 struct LCharBufferTranslator { 347 static unsigned hash(const LCharBuffer& buf) 348 { 349 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 350 } 351 352 static bool equal(StringImpl* const& str, const LCharBuffer& buf) 353 { 354 return WTF::equal(str, buf.s, buf.length); 355 } 356 357 static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) 358 { 359 location = StringImpl::create(buf.s, buf.length).leakRef(); 360 location->setHash(hash); 361 location->setIsAtomic(true); 362 } 363 }; 364 365 typedef HashTranslatorCharBuffer<char> CharBuffer; 366 struct CharBufferFromLiteralDataTranslator { 367 static unsigned hash(const CharBuffer& buf) 368 { 369 return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); 370 } 371 372 static bool equal(StringImpl* const& str, const CharBuffer& buf) 373 { 374 return WTF::equal(str, buf.s, buf.length); 375 } 376 377 static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) 378 { 379 location = StringImpl::create(buf.s, buf.length).leakRef(); 380 location->setHash(hash); 381 location->setIsAtomic(true); 382 } 383 }; 384 385 PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length) 386 { 387 if (!s) 388 return nullptr; 389 390 if (!length) 391 return StringImpl::empty(); 392 393 LCharBuffer buffer = { s, length }; 394 return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); 395 } 396 397 PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length) 398 { 399 ASSERT(characters); 400 ASSERT(length); 401 402 CharBuffer buffer = { characters, length }; 403 return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); 404 } 405 406 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string) 407 { 408 return atomicStringTable().addStringImpl(string); 409 } 410 411 template<typename CharacterType> 412 static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl) 413 { 414 HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() }; 415 return atomicStrings().find<HashAndCharactersTranslator<CharacterType> >(buffer); 416 } 417 418 StringImpl* AtomicString::find(const StringImpl* stringImpl) 419 { 420 ASSERT(stringImpl); 421 ASSERT(stringImpl->existingHash()); 422 423 if (!stringImpl->length()) 424 return StringImpl::empty(); 425 426 HashSet<StringImpl*>::iterator iterator; 427 if (stringImpl->is8Bit()) 428 iterator = findString<LChar>(stringImpl); 429 else 430 iterator = findString<UChar>(stringImpl); 431 if (iterator == atomicStrings().end()) 432 return 0; 433 return *iterator; 434 } 435 436 void AtomicString::remove(StringImpl* r) 437 { 438 HashSet<StringImpl*>::iterator iterator; 439 if (r->is8Bit()) 440 iterator = findString<LChar>(r); 441 else 442 iterator = findString<UChar>(r); 443 RELEASE_ASSERT(iterator != atomicStrings().end()); 444 atomicStrings().remove(iterator); 445 } 446 447 AtomicString AtomicString::lower() const 448 { 449 // Note: This is a hot function in the Dromaeo benchmark. 450 StringImpl* impl = this->impl(); 451 if (UNLIKELY(!impl)) 452 return *this; 453 RefPtr<StringImpl> newImpl = impl->lower(); 454 if (LIKELY(newImpl == impl)) 455 return *this; 456 return AtomicString(newImpl.release()); 457 } 458 459 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) 460 { 461 HashAndUTF8Characters buffer; 462 buffer.characters = charactersStart; 463 buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); 464 465 if (!buffer.hash) 466 return nullAtom; 467 468 AtomicString atomicString; 469 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 470 return atomicString; 471 } 472 473 AtomicString AtomicString::number(int number) 474 { 475 return numberToStringSigned<AtomicString>(number); 476 } 477 478 AtomicString AtomicString::number(unsigned number) 479 { 480 return numberToStringUnsigned<AtomicString>(number); 481 } 482 483 AtomicString AtomicString::number(long number) 484 { 485 return numberToStringSigned<AtomicString>(number); 486 } 487 488 AtomicString AtomicString::number(unsigned long number) 489 { 490 return numberToStringUnsigned<AtomicString>(number); 491 } 492 493 AtomicString AtomicString::number(long long number) 494 { 495 return numberToStringSigned<AtomicString>(number); 496 } 497 498 AtomicString AtomicString::number(unsigned long long number) 499 { 500 return numberToStringUnsigned<AtomicString>(number); 501 } 502 503 AtomicString AtomicString::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy) 504 { 505 NumberToStringBuffer buffer; 506 return AtomicString(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros)); 507 } 508 509 #ifndef NDEBUG 510 void AtomicString::show() const 511 { 512 m_string.show(); 513 } 514 #endif 515 516 } // namespace WTF 517